Merge branch 'master' into zvonand-minmax

2024-11-21 23:21:59 +00:00 · 2022-08-23 17:18:57 +03:00 · 2022-08-23 17:18:57 +03:00 · 52159b77d0
commit 52159b77d0
parent 6dd9f7fd23 3ee52eb25f
119 changed files with 1650 additions and 1758 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -54,8 +54,6 @@ Checks: '*,
    -cppcoreguidelines-slicing,
    -cppcoreguidelines-special-member-functions,

-    -concurrency-mt-unsafe,
-
    -darwin-*,

    -fuchsia-*,
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -19,7 +19,6 @@
 * Make the remote filesystem cache composable, allow not to evict certain files (regarding idx, mrk, ..), delete old cache version. Now it is possible to configure cache over Azure blob storage disk, over Local disk, over StaticWeb disk, etc. This PR is marked backward incompatible because cache configuration changes and in order for cache to work need to update the config file. Old cache will still be used with new configuration. The server will startup fine with the old cache configuration. Closes https://github.com/ClickHouse/ClickHouse/issues/36140. Closes https://github.com/ClickHouse/ClickHouse/issues/37889. ([Kseniia Sumarokova](https://github.com/kssenii)). [#36171](https://github.com/ClickHouse/ClickHouse/pull/36171))

 #### New Feature
-* Support SQL standard DELETE FROM syntax on merge tree tables and lightweight delete implementation for merge tree families. [#37893](https://github.com/ClickHouse/ClickHouse/pull/37893) ([Jianmei Zhang](https://github.com/zhangjmruc)) ([Alexander Gololobov](https://github.com/davenger)). Note: this new feature does not make ClickHouse an HTAP DBMS.
 * Query parameters can be set in interactive mode as `SET param_abc = 'def'` and transferred via the native protocol as settings. [#39906](https://github.com/ClickHouse/ClickHouse/pull/39906) ([Nikita Taranov](https://github.com/nickitat)).
 * Quota key can be set in the native protocol ([Yakov Olkhovsky](https://github.com/ClickHouse/ClickHouse/pull/39874)).
 * Added a setting `exact_rows_before_limit` (0/1). When enabled, ClickHouse will provide exact value for `rows_before_limit_at_least` statistic, but with the cost that the data before limit will have to be read completely. This closes [#6613](https://github.com/ClickHouse/ClickHouse/issues/6613). [#25333](https://github.com/ClickHouse/ClickHouse/pull/25333) ([kevin wan](https://github.com/MaxWk)).
@ -33,6 +32,8 @@
 * Add formats `PrettyMonoBlock`, `PrettyNoEscapesMonoBlock`, `PrettyCompactNoEscapes`, `PrettyCompactNoEscapesMonoBlock`, `PrettySpaceNoEscapes`, `PrettySpaceMonoBlock`, `PrettySpaceNoEscapesMonoBlock`. [#39646](https://github.com/ClickHouse/ClickHouse/pull/39646) ([Kruglov Pavel](https://github.com/Avogar)).
 * Add new setting schema_inference_hints that allows to specify structure hints in schema inference for specific columns. Closes [#39569](https://github.com/ClickHouse/ClickHouse/issues/39569). [#40068](https://github.com/ClickHouse/ClickHouse/pull/40068) ([Kruglov Pavel](https://github.com/Avogar)).

+#### Experimental Feature
+* Support SQL standard DELETE FROM syntax on merge tree tables and lightweight delete implementation for merge tree families. [#37893](https://github.com/ClickHouse/ClickHouse/pull/37893) ([Jianmei Zhang](https://github.com/zhangjmruc)) ([Alexander Gololobov](https://github.com/davenger)). Note: this new feature does not make ClickHouse an HTAP DBMS.

 #### Performance Improvement
 * Improved memory usage during memory efficient merging of aggregation results. [#39429](https://github.com/ClickHouse/ClickHouse/pull/39429) ([Nikita Taranov](https://github.com/nickitat)).
--- a/base/base/ReplxxLineReader.cpp
+++ b/base/base/ReplxxLineReader.cpp
@ -27,7 +27,7 @@ void trim(String & s)

 std::string getEditor()
 {
-    const char * editor = std::getenv("EDITOR");
+    const char * editor = std::getenv("EDITOR"); // NOLINT(concurrency-mt-unsafe)

    if (!editor || !*editor)
        editor = "vim";
@ -76,7 +76,7 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx)
    if (!in)
    {
        rx.print("Cannot open %s reading (for conversion): %s\n",
-            path.c_str(), errnoToString(errno).c_str());
+            path.c_str(), errnoToString().c_str());
        return;
    }

@ -84,7 +84,7 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx)
    if (getline(in, line).bad())
    {
        rx.print("Cannot read from %s (for conversion): %s\n",
-            path.c_str(), errnoToString(errno).c_str());
+            path.c_str(), errnoToString().c_str());
        return;
    }

@ -113,7 +113,7 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx)
    if (!out)
    {
        rx.print("Cannot open %s for writing (for conversion): %s\n",
-            path.c_str(), errnoToString(errno).c_str());
+            path.c_str(), errnoToString().c_str());
        return;
    }

@ -151,7 +151,7 @@ ReplxxLineReader::ReplxxLineReader(
        history_file_fd = open(history_file_path.c_str(), O_RDWR);
        if (history_file_fd < 0)
        {
-            rx.print("Open of history file failed: %s\n", errnoToString(errno).c_str());
+            rx.print("Open of history file failed: %s\n", errnoToString().c_str());
        }
        else
        {
@ -159,18 +159,18 @@ ReplxxLineReader::ReplxxLineReader(

            if (flock(history_file_fd, LOCK_SH))
            {
-                rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str());
+                rx.print("Shared lock of history file failed: %s\n", errnoToString().c_str());
            }
            else
            {
                if (!rx.history_load(history_file_path))
                {
-                    rx.print("Loading history failed: %s\n", errnoToString(errno).c_str());
+                    rx.print("Loading history failed: %s\n", errnoToString().c_str());
                }

                if (flock(history_file_fd, LOCK_UN))
                {
-                    rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
+                    rx.print("Unlock of history file failed: %s\n", errnoToString().c_str());
                }
            }
        }
@ -225,7 +225,7 @@ ReplxxLineReader::ReplxxLineReader(
 ReplxxLineReader::~ReplxxLineReader()
 {
    if (close(history_file_fd))
-        rx.print("Close of history file failed: %s\n", errnoToString(errno).c_str());
+        rx.print("Close of history file failed: %s\n", errnoToString().c_str());
 }

 LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
@ -250,7 +250,7 @@ void ReplxxLineReader::addToHistory(const String & line)
    // and that is why flock() is added here.
    bool locked = false;
    if (flock(history_file_fd, LOCK_EX))
-        rx.print("Lock of history file failed: %s\n", errnoToString(errno).c_str());
+        rx.print("Lock of history file failed: %s\n", errnoToString().c_str());
    else
        locked = true;

@ -258,10 +258,10 @@ void ReplxxLineReader::addToHistory(const String & line)

    // flush changes to the disk
    if (!rx.history_save(history_file_path))
-        rx.print("Saving history failed: %s\n", errnoToString(errno).c_str());
+        rx.print("Saving history failed: %s\n", errnoToString().c_str());

    if (locked && 0 != flock(history_file_fd, LOCK_UN))
-        rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
+        rx.print("Unlock of history file failed: %s\n", errnoToString().c_str());
 }

 /// See comments in ShellCommand::executeImpl()
@ -275,7 +275,7 @@ int ReplxxLineReader::executeEditor(const std::string & path)
    static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
    if (!real_vfork)
    {
-        rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString(errno).c_str());
+        rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString().c_str());
        return -1;
    }

@ -283,7 +283,7 @@ int ReplxxLineReader::executeEditor(const std::string & path)

    if (-1 == pid)
    {
-        rx.print("Cannot vfork: %s\n", errnoToString(errno).c_str());
+        rx.print("Cannot vfork: %s\n", errnoToString().c_str());
        return -1;
    }

@ -292,11 +292,11 @@ int ReplxxLineReader::executeEditor(const std::string & path)
    {
        sigset_t mask;
        sigemptyset(&mask);
-        sigprocmask(0, nullptr, &mask);
-        sigprocmask(SIG_UNBLOCK, &mask, nullptr);
+        sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
+        sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process

        execvp(editor.c_str(), argv);
-        rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString(errno).c_str());
+        rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString().c_str());
        _exit(-1);
    }

@ -309,7 +309,7 @@ int ReplxxLineReader::executeEditor(const std::string & path)
            if (errno == EINTR)
                continue;

-            rx.print("Cannot waitpid: %s\n", errnoToString(errno).c_str());
+            rx.print("Cannot waitpid: %s\n", errnoToString().c_str());
            return -1;
        }
        else
@ -324,7 +324,7 @@ void ReplxxLineReader::openEditor()
    int fd = ::mkstemps(filename, 4);
    if (-1 == fd)
    {
-        rx.print("Cannot create temporary file to edit query: %s\n", errnoToString(errno).c_str());
+        rx.print("Cannot create temporary file to edit query: %s\n", errnoToString().c_str());
        return;
    }

@ -338,7 +338,7 @@ void ReplxxLineReader::openEditor()
        ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written);
        if ((-1 == res || 0 == res) && errno != EINTR)
        {
-            rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+            rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString().c_str());
            break;
        }
        bytes_written += res;
@ -346,7 +346,7 @@ void ReplxxLineReader::openEditor()

    if (0 != ::close(fd))
    {
-        rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+        rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString().c_str());
        return;
    }

@ -364,7 +364,7 @@ void ReplxxLineReader::openEditor()
        }
        catch (...)
        {
-            rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+            rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString().c_str());
            return;
        }
    }
@ -373,7 +373,7 @@ void ReplxxLineReader::openEditor()
        enableBracketedPaste();

    if (0 != ::unlink(filename))
-        rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+        rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString().c_str());
 }

 void ReplxxLineReader::enableBracketedPaste()
--- a/base/base/errnoToString.cpp
+++ b/base/base/errnoToString.cpp
@ -3,10 +3,11 @@
 #include <fmt/format.h>


-std::string errnoToString(int code, int the_errno)
+std::string errnoToString(int the_errno)
 {
    const size_t buf_size = 128;
    char buf[buf_size];
+
 #ifndef _GNU_SOURCE
    int rc = strerror_r(the_errno, buf, buf_size);
 #ifdef OS_DARWIN
@ -15,7 +16,7 @@ std::string errnoToString(int code, int the_errno)
    if (rc != 0)
 #endif
    {
-        std::string tmp = std::to_string(code);
+        std::string tmp = std::to_string(the_errno);
        const char * code_str = tmp.c_str();
        const char * unknown_message = "Unknown error ";
        strcpy(buf, unknown_message);
@ -23,7 +24,6 @@ std::string errnoToString(int code, int the_errno)
    }
    return fmt::format("errno: {}, strerror: {}", the_errno, buf);
 #else
-    (void)code;
    return fmt::format("errno: {}, strerror: {}", the_errno, strerror_r(the_errno, buf, sizeof(buf)));
 #endif
 }
--- a/base/base/errnoToString.h
+++ b/base/base/errnoToString.h
@ -3,4 +3,4 @@
 #include <cerrno>
 #include <string>

-std::string errnoToString(int code, int the_errno = errno);
+std::string errnoToString(int the_errno = errno);
--- a/base/base/setTerminalEcho.cpp
+++ b/base/base/setTerminalEcho.cpp
@ -16,7 +16,7 @@ void setTerminalEcho(bool enable)
    struct termios tty{};

    if (0 != tcgetattr(STDIN_FILENO, &tty))
-        throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString(errno));
+        throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString());

    if (enable)
        tty.c_lflag |= ECHO;
@ -24,5 +24,5 @@ void setTerminalEcho(bool enable)
        tty.c_lflag &= ~ECHO;

    if (0 != tcsetattr(STDIN_FILENO, TCSANOW, &tty))
-        throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString(errno));
+        throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString());
 }
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit 1b0af760b3506b8e35b50cb7df098cbad5064ff2
+Subproject commit 33f60f961d4914441b684af43e9e5535078ba54b
--- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
+++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md
@ -15,11 +15,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
    ...
-) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name)
+) ENGINE = EmbeddedRocksDB([ttl]) PRIMARY KEY(primary_key_name)
 ```

-Required parameters:
+Engine parameters:

+- `ttl` - time to live for values. TTL is accepted in seconds. If TTL is 0, regular RocksDB instance is used (without TTL).
 - `primary_key_name` – any column name in the column list.
 - `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
 - columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
--- a/docs/en/operations/named-collections.md
+++ b/docs/en/operations/named-collections.md
@ -1,6 +1,6 @@
 ---
 sidebar_position: 69
-sidebar_label: "Named connections"
+sidebar_label: "Named collections"
 ---

 # Storing details for connecting to external sources in configuration files
@ -12,7 +12,7 @@ from users with only SQL access.
 Parameters can be set in XML `<format>CSV</format>` and overridden in SQL `, format = 'TSV'`.
 The parameters in SQL can be overridden using format `key` = `value`: `compression_method = 'gzip'`.

-Named connections are stored in the `config.xml` file of the ClickHouse server in the `<named_collections>` section and are applied when ClickHouse starts.
+Named collections are stored in the `config.xml` file of the ClickHouse server in the `<named_collections>` section and are applied when ClickHouse starts.

 Example of configuration:
 ```xml
@ -24,7 +24,7 @@ $ cat /etc/clickhouse-server/config.d/named_collections.xml
 </clickhouse>
 ```

-## Named connections for accessing S3.
+## Named collections for accessing S3.

 The description of parameters see [s3 Table Function](../sql-reference/table-functions/s3.md).

@ -42,7 +42,7 @@ Example of configuration:
 </clickhouse>
 ```

-### Example of using named connections with the s3 function
+### Example of using named collections with the s3 function

 ```sql
 INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz',
@ -58,7 +58,7 @@ FROM s3(s3_mydata, filename = 'test_file.tsv.gz')
 1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.)
 ```

-### Example of using named connections with an S3 table
+### Example of using named collections with an S3 table

 ```sql
 CREATE TABLE s3_engine_table (number Int64)
@ -73,7 +73,7 @@ SELECT * FROM s3_engine_table LIMIT 3;
 └────────┘
 ```

-## Named connections for accessing MySQL database
+## Named collections for accessing MySQL database

 The description of parameters see [mysql](../sql-reference/table-functions/mysql.md).

@ -95,7 +95,7 @@ Example of configuration:
 </clickhouse>
 ```

-### Example of using named connections with the mysql function
+### Example of using named collections with the mysql function

 ```sql
 SELECT count() FROM mysql(mymysql, table = 'test');
@ -105,7 +105,7 @@ SELECT count() FROM mysql(mymysql, table = 'test');
 └─────────┘
 ```

-### Example of using named connections with an MySQL table
+### Example of using named collections with an MySQL table

 ```sql
 CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0);
@ -116,7 +116,7 @@ SELECT count() FROM mytable;
 └─────────┘
 ```

-### Example of using named connections with database with engine MySQL
+### Example of using named collections with database with engine MySQL

 ```sql
 CREATE DATABASE mydatabase ENGINE = MySQL(mymysql);
@ -129,7 +129,7 @@ SHOW TABLES FROM mydatabase;
 └────────┘
 ```

-### Example of using named connections with an external dictionary with source MySQL
+### Example of using named collections with an external dictionary with source MySQL

 ```sql
 CREATE DICTIONARY dict (A Int64, B String)
@ -145,7 +145,7 @@ SELECT dictGet('dict', 'B', 2);
 └─────────────────────────┘
 ```

-## Named connections for accessing PostgreSQL database
+## Named collections for accessing PostgreSQL database

 The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md).

@ -166,7 +166,7 @@ Example of configuration:
 </clickhouse>
 ```

-### Example of using named connections with the postgresql function
+### Example of using named collections with the postgresql function

 ```sql
 SELECT * FROM postgresql(mypg, table = 'test');
@ -186,8 +186,7 @@ SELECT * FROM postgresql(mypg, table = 'test', schema = 'public');
 └───┘
 ```

-
-### Example of using named connections with database with engine PostgreSQL
+### Example of using named collections with database with engine PostgreSQL

 ```sql
 CREATE TABLE mypgtable (a Int64) ENGINE = PostgreSQL(mypg, table = 'test', schema = 'public');
@ -201,7 +200,7 @@ SELECT * FROM mypgtable;
 └───┘
 ```

-### Example of using named connections with database with engine PostgreSQL
+### Example of using named collections with database with engine PostgreSQL

 ```sql
 CREATE DATABASE mydatabase ENGINE = PostgreSQL(mypg);
@ -213,7 +212,7 @@ SHOW TABLES FROM mydatabase
 └──────┘
 ```

-### Example of using named connections with an external dictionary with source POSTGRESQL
+### Example of using named collections with an external dictionary with source POSTGRESQL

 ```sql
 CREATE DICTIONARY dict (a Int64, b String)
@ -228,3 +227,59 @@ SELECT dictGet('dict', 'b', 2);
 │ two                     │
 └─────────────────────────┘
 ```
+
+## Named collections for accessing remote ClickHouse database
+
+The description of parameters see [remote](../sql-reference/table-functions/remote.md/#parameters).
+
+Example of configuration:
+
+```xml
+<clickhouse>
+    <named_collections>
+        <remote1>
+            <host>localhost</host>
+            <port>9000</port>
+            <database>system</database>
+            <user>foo</user>
+            <password>secret</password>
+        </remote1>
+    </named_collections>
+</clickhouse>
+```
+
+### Example of using named collections with the `remote`/`remoteSecure` functions
+
+```sql
+SELECT * FROM remote(remote1, table = one);
+┌─dummy─┐
+│     0 │
+└───────┘
+
+SELECT * FROM remote(remote1, database = merge(system, '^one'));
+┌─dummy─┐
+│     0 │
+└───────┘
+
+INSERT INTO FUNCTION remote(remote1, database = default, table = test) VALUES (1,'a');
+
+SELECT * FROM remote(remote1, database = default, table = test);
+┌─a─┬─b─┐
+│ 1 │ a │
+└───┴───┘
+```
+
+### Example of using named collections with an external dictionary with source ClickHouse
+
+```sql
+CREATE DICTIONARY dict(a Int64, b String)
+PRIMARY KEY a
+SOURCE(CLICKHOUSE(NAME remote1 TABLE test DB default))
+LIFETIME(MIN 1 MAX 2)
+LAYOUT(HASHED());
+
+SELECT dictGet('dict', 'b', 1);
+┌─dictGet('dict', 'b', 1)─┐
+│ a                       │
+└─────────────────────────┘
+```
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -441,6 +441,8 @@ For more information, see the section “[Configuration files](../../operations/
 ## interserver_listen_host {#interserver-listen-host}

 Restriction on hosts that can exchange data between ClickHouse servers.
+If Keeper is used, the same restriction will be applied to the communication
+between different Keeper instances.
 The default value equals to `listen_host` setting.

 Examples:
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@ -411,6 +411,8 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
 ## interserver_listen_host {#interserver-listen-host}

 Ограничение по хостам, для обмена между серверами ClickHouse.
+Если используется Keeper, то такое же ограничение будет применяться к обмену данными
+между различными экземплярами Keeper.
 Значение по умолчанию совпадает со значением параметра listen_host

 Примеры:
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@ -17,9 +17,10 @@ empty([x])

 Массив считается пустым, если он не содержит ни одного элемента.

-    :::note "Примечание"
+:::note "Примечание"
    Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT empty(arr) FROM TABLE` преобразуется к запросу `SELECT arr.size0 = 0 FROM TABLE`.
-    :::
+:::
+
 Функция также поддерживает работу с типами [String](string-functions.md#empty) и [UUID](uuid-functions.md#empty).

 **Параметры**
@ -60,9 +61,10 @@ notEmpty([x])

 Массив считается непустым, если он содержит хотя бы один элемент.

-    :::note "Примечание"
+:::note "Примечание"
    Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT notEmpty(arr) FROM table` преобразуется к запросу `SELECT arr.size0 != 0 FROM TABLE`.
-    :::
+:::
+
 Функция также поддерживает работу с типами [String](string-functions.md#notempty) и [UUID](uuid-functions.md#notempty).

 **Параметры**
@ -689,9 +691,10 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res;
 └─────────┘
 ```

-    :::note "Примечание"
+:::note "Примечание"
    Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0).
-    :::
+:::
+    
 ## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort}

 Возвращает массив `arr`, отсортированный в нисходящем порядке. Если указана функция `func`, то массив `arr` сначала сортируется в порядке, который определяется функцией `func`, а затем отсортированный массив переворачивается. Если функция `func` принимает несколько аргументов, то в функцию `arrayReverseSort` необходимо передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания функции `arrayReverseSort`.
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@ -266,10 +266,6 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
 └────────────────┘
 ```

-    :::note "Attention"
-    `Date` или `DateTime` это возвращаемый тип функций `toStartOf*`, который описан ниже. Несмотря на то, что эти функции могут принимать `DateTime64` в качестве аргумента, если переданное значение типа `DateTime64` выходит за пределы нормального диапазона (с 1900 по 2299 год), то это даст неверный результат.
-    :::
-
 :::Attention
 Тип возвращаемого описанными далее функциями `toStartOf*`, `toMonday` значения - `Date` или `DateTime`.
 Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
@ -278,8 +274,8 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
 * `2106-02-07 08:28:15` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `DateTime`,
 * `2149-06-06` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `Date`,
 * `2149-05-31` будет результатом функции `toLastDayOfMonth` при обработке аргумента больше `2149-05-31`.
-  :::
-* 
+:::
+
 ## toStartOfYear {#tostartofyear}

 Округляет дату или дату-с-временем вниз до первого дня года.
--- a/docs/ru/sql-reference/functions/encoding-functions.md
+++ b/docs/ru/sql-reference/functions/encoding-functions.md
@ -153,9 +153,10 @@ SELECT hex(toFloat64(number)) AS hex_presentation FROM numbers(15, 2);

 Если вы хотите преобразовать результат в число, вы можете использовать функции [reverse](../../sql-reference/functions/string-functions.md#reverse) и [reinterpretAs&lt;Type&gt;](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions).

-    :::note "Примечание"
-    Если `unhex` вызывается из `clickhouse-client`, двоичные строки отображаются с использованием UTF-8.
-    :::
+:::note "Примечание"
+Если `unhex` вызывается из `clickhouse-client`, двоичные строки отображаются с использованием UTF-8.
+:::
+    
 Синоним: `UNHEX`.

 **Синтаксис**
@ -294,9 +295,10 @@ unbin(arg)

 Для числового аргумента `unbin()` не возвращает значение, обратное результату `bin()`. Чтобы преобразовать результат в число, используйте функции [reverse](../../sql-reference/functions/string-functions.md#reverse) и [reinterpretAs&lt;Type&gt;](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264).

-    :::note "Примечание"
-    Если `unbin` вызывается из клиента `clickhouse-client`, бинарная строка возвращается в кодировке UTF-8.
-    :::
+:::note "Примечание"
+Если `unbin` вызывается из клиента `clickhouse-client`, бинарная строка возвращается в кодировке UTF-8.
+:::
+    
 Поддерживает двоичные цифры `0` и `1`. Количество двоичных цифр не обязательно должно быть кратно восьми. Если строка аргумента содержит что-либо, кроме двоичных цифр, возвращается некоторый результат, определенный реализацией (ошибки не возникает). 

 **Аргументы**
--- a/docs/ru/sql-reference/functions/ext-dict-functions.md
+++ b/docs/ru/sql-reference/functions/ext-dict-functions.md
@ -3,9 +3,10 @@ sidebar_position: 58
 sidebar_label: "Функции для работы с внешними словарями"
 ---

-    :::note "Внимание"
+:::note "Внимание"
    Для словарей, созданных с помощью [DDL-запросов](../../sql-reference/statements/create/dictionary.md), в параметре `dict_name` указывается полное имя словаря вместе с базой данных, например: `<database>.<dict_name>`. Если база данных не указана, используется текущая.
-    :::
+:::
+
 # Функции для работы с внешними словарями {#ext_dict_functions}

 Информацию о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
--- a/docs/ru/sql-reference/functions/introspection.md
+++ b/docs/ru/sql-reference/functions/introspection.md
@ -9,6 +9,7 @@ sidebar_label: "Функции интроспекции"

 :::danger "Предупреждение"
    Эти функции выполняются медленно и могут приводить к нежелательным последствиям в плане безопасности.
+:::

 Для правильной работы функций интроспекции:

--- a/docs/ru/sql-reference/functions/json-functions.md
+++ b/docs/ru/sql-reference/functions/json-functions.md
@ -360,9 +360,10 @@ SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]');
 SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]');
 ```

-    :::note "Примечание"
+:::note "Примечание"
    до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_EXISTS(path, json)
-    :::
+:::
+
 ## JSON_QUERY(json, path) {#json-query}

 Парсит JSON и извлекает значение как JSON массив или JSON объект.
@ -386,9 +387,10 @@ SELECT toTypeName(JSON_QUERY('{"hello":2}', '$.hello'));
 [2]
 String
 ```
-    :::note "Примечание"
+:::note "Примечание"
    до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_QUERY(path, json)
-    :::
+:::
+
 ## JSON_VALUE(json, path) {#json-value}

 Парсит JSON и извлекает значение как JSON скаляр.
@ -413,9 +415,10 @@ world
 String
 ```

-    :::note "Примечание"
+:::note "Примечание"
    до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_VALUE(path, json)
-    :::
+:::
+
 ## toJSONString {#tojsonstring}

 Сериализует значение в JSON представление. Поддерживаются различные типы данных и вложенные структуры.
--- a/docs/ru/sql-reference/functions/nlp-functions.md
+++ b/docs/ru/sql-reference/functions/nlp-functions.md
@ -7,6 +7,7 @@ sidebar_label: NLP

 :::danger "Предупреждение"
    Сейчас использование функций для работы с естественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
+:::

 ## stem {#stem}

@ -129,4 +130,4 @@ SELECT synonyms('list', 'important');
        <path>en/</path>
    </extension>
 </synonyms_extensions>
-```
+```
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@ -2020,9 +2020,10 @@ countDigits(x)

 Тип: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).

-     :::note "Примечание"
+:::note "Примечание"
    Для `Decimal` значений учитывается их масштаб: вычисляется результат по базовому целочисленному типу, полученному как `(value * scale)`. Например: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. То есть вы можете проверить десятичное переполнение для `Decimal64` с помощью `countDecimal(x) > 18`. Это медленный вариант [isDecimalOverflow](#is-decimal-overflow).
-    :::
+:::
+ 
 **Пример**

 Запрос:
--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@ -27,9 +27,10 @@ position(needle IN haystack)

 Алиас: `locate(haystack, needle[, start_pos])`.

-    :::note "Примечание"
+:::note "Примечание"
    Синтаксис `position(needle IN haystack)` обеспечивает совместимость с SQL, функция работает так же, как `position(haystack, needle)`.
-    :::
+:::
+
 **Аргументы**

 -   `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal).
@ -327,9 +328,10 @@ Result:

 Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`.

-    :::note "Примечание"
+:::note "Примечание"
    Во всех функциях `multiSearch*` количество needles должно быть меньше 2<sup>8</sup> из-за особенностей реализации.
-    :::
+:::
+
 ## match(haystack, pattern) {#matchhaystack-pattern}

 Проверка строки на соответствие регулярному выражению pattern. Регулярное выражение **re2**. Синтаксис регулярных выражений **re2** является более ограниченным по сравнению с регулярными выражениями **Perl** ([подробнее](https://github.com/google/re2/wiki/Syntax)).
@ -344,9 +346,9 @@ Result:

 То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется библиотека [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее.

-    :::note "Примечание"
+:::note "Примечание"
    Длина любой строки из `haystack` должна быть меньше 2<sup>32</sup> байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API.
-    :::
+:::
 ## multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn}

 То же, что и `multiMatchAny`, только возвращает любой индекс подходящего регулярного выражения.
@ -367,12 +369,13 @@ Result:

 То же, что и `multiFuzzyMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке в пределах константного редакционного расстояния.

-    :::note "Примечание"
+:::note "Примечание"
    `multiFuzzyMatch*` функции не поддерживают UTF-8 закодированные регулярные выражения, и такие выражения рассматриваются как байтовые из-за ограничения hyperscan.
-    :::
-    :::note "Примечание"
+:::
+    
+:::note "Примечание"
    Чтобы выключить все функции, использующие hyperscan, используйте настройку `SET allow_hyperscan = 0;`.
-    :::
+:::
 ## extract(haystack, pattern) {#extracthaystack-pattern}

 Извлечение фрагмента строки по регулярному выражению. Если haystack не соответствует регулярному выражению pattern, то возвращается пустая строка. Если регулярное выражение не содержит subpattern-ов, то вынимается фрагмент, который подпадает под всё регулярное выражение. Иначе вынимается фрагмент, который подпадает под первый subpattern.
@ -385,9 +388,10 @@ Result:

 Разбирает строку `haystack` на фрагменты, соответствующие группам регулярного выражения `pattern`. Возвращает массив массивов, где первый массив содержит все фрагменты, соответствующие первой группе регулярного выражения, второй массив - соответствующие второй группе, и т.д.

-    :::note "Замечание"
+:::note "Замечание"
    Функция `extractAllGroupsHorizontal` работает медленнее, чем функция [extractAllGroupsVertical](#extractallgroups-vertical).
-    :::
+:::
+
 **Синтаксис**

 ``` sql
@ -556,9 +560,10 @@ SELECT * FROM Months WHERE ilike(name, '%j%');

 Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`.

-    :::note "Примечание"
+:::note "Примечание"
    Для случая UTF-8 мы используем триграммное расстояние. Вычисление n-граммного расстояния не совсем честное. Мы используем 2-х байтные хэши для хэширования n-грамм, а затем вычисляем (не)симметрическую разность между хэш таблицами – могут возникнуть коллизии. В формате UTF-8 без учета регистра мы не используем честную функцию `tolower` – мы обнуляем 5-й бит (нумерация с нуля) каждого байта кодовой точки, а также первый бит нулевого байта, если байтов больше 1 – это работает для латиницы и почти для всех кириллических букв.
-    :::
+:::
+
 ## countMatches(haystack, pattern) {#countmatcheshaystack-pattern}

 Возвращает количество совпадений, найденных в строке `haystack`, для регулярного выражения `pattern`.
--- a/docs/ru/sql-reference/functions/type-conversion-functions.md
+++ b/docs/ru/sql-reference/functions/type-conversion-functions.md
@ -684,9 +684,10 @@ x::t

 -   Преобразованное значение.

-    :::note "Примечание"
-    Если входное значение выходит за границы нового типа, то результат переполняется. Например, `CAST(-1, 'UInt8')` возвращает `255`.
-    :::
+:::note "Примечание"
+Если входное значение выходит за границы нового типа, то результат переполняется. Например, `CAST(-1, 'UInt8')` возвращает `255`.
+:::
+
 **Примеры**

 Запрос:
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -120,7 +120,7 @@ public:
    void initialize(Poco::Util::Application & self [[maybe_unused]]) override
    {
        std::string home_path;
-        const char * home_path_cstr = getenv("HOME");
+        const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe)
        if (home_path_cstr)
            home_path = home_path_cstr;

@ -613,15 +613,15 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
        std::optional<std::string> env_password_str;
        std::optional<std::string> env_quota_key_str;

-        const char * env_user = getenv("CLICKHOUSE_USER");
+        const char * env_user = getenv("CLICKHOUSE_USER"); // NOLINT(concurrency-mt-unsafe)
        if (env_user != nullptr)
            env_user_str.emplace(std::string(env_user));

-        const char * env_password = getenv("CLICKHOUSE_PASSWORD");
+        const char * env_password = getenv("CLICKHOUSE_PASSWORD"); // NOLINT(concurrency-mt-unsafe)
        if (env_password != nullptr)
            env_password_str.emplace(std::string(env_password));

-        const char * env_quota_key = getenv("CLICKHOUSE_QUOTA_KEY");
+        const char * env_quota_key = getenv("CLICKHOUSE_QUOTA_KEY"); // NOLINT(concurrency-mt-unsafe)
        if (env_quota_key != nullptr)
            env_quota_key_str.emplace(std::string(env_quota_key));

--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -183,7 +183,7 @@ void Client::initialize(Poco::Util::Application & self)
 {
    Poco::Util::Application::initialize(self);

-    const char * home_path_cstr = getenv("HOME");
+    const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe)
    if (home_path_cstr)
        home_path = home_path_cstr;

@ -202,11 +202,11 @@ void Client::initialize(Poco::Util::Application & self)
      * may be statically allocated, and can be modified by a subsequent call to getenv(), putenv(3), setenv(3), or unsetenv(3).
      */

-    const char * env_user = getenv("CLICKHOUSE_USER");
+    const char * env_user = getenv("CLICKHOUSE_USER"); // NOLINT(concurrency-mt-unsafe)
    if (env_user)
        config().setString("user", env_user);

-    const char * env_password = getenv("CLICKHOUSE_PASSWORD");
+    const char * env_password = getenv("CLICKHOUSE_PASSWORD"); // NOLINT(concurrency-mt-unsafe)
    if (env_password)
        config().setString("password", env_password);

@ -620,7 +620,7 @@ bool Client::processWithFuzzing(const String & full_query)
                    stderr,
                    "Found error: IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly.");

-                exit(1);
+                _exit(1);
            }

            auto fuzzed_text = ast_to_process->formatForErrorMessage();
@ -770,7 +770,7 @@ bool Client::processWithFuzzing(const String & full_query)
                    fmt::print(stderr, "Text-3 (AST-3 formatted):\n'{}'\n", text_3);
                    fmt::print(stderr, "Text-3 must be equal to Text-2, but it is not.\n");

-                    exit(1);
+                    _exit(1);
                }
            }
        }
@ -909,7 +909,7 @@ void Client::processOptions(const OptionsDescription & options_description,
            auto exit_code = e.code() % 256;
            if (exit_code == 0)
                exit_code = 255;
-            exit(exit_code);
+            _exit(exit_code);
        }
    }

--- a/programs/disks/DisksApp.cpp
+++ b/programs/disks/DisksApp.cpp
@ -110,7 +110,7 @@ void DisksApp::init(std::vector<String> & common_arguments)
    if (options.count("help"))
    {
        printHelpMessage(options_description);
-        exit(0);
+        exit(0); // NOLINT(concurrency-mt-unsafe)
    }

    if (!supported_commands.contains(command_name))
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@ -708,7 +708,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)

        /// dpkg or apt installers can ask for non-interactive work explicitly.

-        const char * debian_frontend_var = getenv("DEBIAN_FRONTEND");
+        const char * debian_frontend_var = getenv("DEBIAN_FRONTEND"); // NOLINT(concurrency-mt-unsafe)
        bool noninteractive = debian_frontend_var && debian_frontend_var == std::string_view("noninteractive");

        bool is_interactive = !noninteractive && stdin_is_a_tty && stdout_is_a_tty;
--- a/programs/main.cpp
+++ b/programs/main.cpp
@ -366,10 +366,10 @@ void checkHarmfulEnvironmentVariables(char ** argv)
    bool require_reexec = false;
    for (const auto * var : harmful_env_variables)
    {
-        if (const char * value = getenv(var); value && value[0])
+        if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe)
        {
            /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful
-            if (setenv(var, "", true))
+            if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently
            {
                fmt::print(stderr, "Cannot override {} environment variable", var);
                _exit(1);
--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@ -34,6 +34,6 @@ install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse

 clickhouse_embed_binaries(
    TARGET clickhouse_server_configs
-    RESOURCES config.xml users.xml embedded.xml play.html
+    RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
 )
 add_dependencies(clickhouse-server-lib clickhouse_server_configs)
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -4,18 +4,14 @@
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#include <sys/wait.h>
-#include <cerrno>
 #include <pwd.h>
 #include <unistd.h>
 #include <Poco/Version.h>
-#include <Poco/DirectoryIterator.h>
 #include <Poco/Net/HTTPServer.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/Util/HelpFormatter.h>
 #include <Poco/Environment.h>
 #include <Common/scope_guard_safe.h>
-#include <base/defines.h>
 #include <Common/logger_useful.h>
 #include <base/phdr_cache.h>
 #include <Common/ErrorHandlers.h>
@ -45,7 +41,6 @@
 #include <Common/remapExecutable.h>
 #include <Common/TLDListsHolder.h>
 #include <Core/ServerUUID.h>
-#include <IO/HTTPCommon.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/IOThreadPool.h>
@ -84,7 +79,6 @@
 #include <Common/ThreadFuzzer.h>
 #include <Common/getHashOfLoadedBinary.h>
 #include <Common/filesystemHelpers.h>
-#include <Common/Elf.h>
 #include <Compression/CompressionCodecEncrypted.h>
 #include <Server/MySQLHandlerFactory.h>
 #include <Server/PostgreSQLHandlerFactory.h>
@ -170,7 +164,7 @@ int mainEntryClickHouseServer(int argc, char ** argv)
    /// Can be overridden by environment variable (cannot use server config at this moment).
    if (argc > 0)
    {
-        const char * env_watchdog = getenv("CLICKHOUSE_WATCHDOG_ENABLE");
+        const char * env_watchdog = getenv("CLICKHOUSE_WATCHDOG_ENABLE"); // NOLINT(concurrency-mt-unsafe)
        if (env_watchdog)
        {
            if (0 == strcmp(env_watchdog, "1"))
@ -268,7 +262,6 @@ namespace ErrorCodes
    extern const int ARGUMENT_OUT_OF_BOUND;
    extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
    extern const int INVALID_CONFIG_PARAMETER;
-    extern const int SYSTEM_ERROR;
    extern const int FAILED_TO_GETPWUID;
    extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
    extern const int NETWORK_ERROR;
@ -658,6 +651,24 @@ int Server::main(const std::vector<std::string> & /*args*/)

    StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));

+#if USE_HDFS
+    /// This will point libhdfs3 to the right location for its config.
+    /// Note: this has to be done once at server initialization, because 'setenv' is not thread-safe.
+
+    String libhdfs3_conf = config().getString("hdfs.libhdfs3_conf", "");
+    if (!libhdfs3_conf.empty())
+    {
+        if (std::filesystem::path{libhdfs3_conf}.is_relative() && !std::filesystem::exists(libhdfs3_conf))
+        {
+            const String config_path = config().getString("config-file", "config.xml");
+            const auto config_dir = std::filesystem::path{config_path}.remove_filename();
+            if (std::filesystem::exists(config_dir / libhdfs3_conf))
+                libhdfs3_conf = std::filesystem::absolute(config_dir / libhdfs3_conf);
+        }
+        setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), true /* overwrite */); // NOLINT
+    }
+#endif
+
    registerFunctions();
    registerAggregateFunctions();
    registerTableFunctions();
@ -698,8 +709,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
    GlobalThreadPool::initialize(
        config().getUInt("max_thread_pool_size", 10000),
        config().getUInt("max_thread_pool_free_size", 1000),
-        config().getUInt("thread_pool_queue_size", 10000)
-    );
+        config().getUInt("thread_pool_queue_size", 10000));

    IOThreadPool::initialize(
        config().getUInt("max_io_thread_pool_size", 100),
@ -840,7 +850,7 @@ int Server::main(const std::vector<std::string> & /*args*/)

                    LOG_TRACE(log, "Will do mlock to prevent executable memory from being paged out. It may take a few seconds.");
                    if (0 != mlock(addr, len))
-                        LOG_WARNING(log, "Failed mlock: {}", errnoToString(ErrorCodes::SYSTEM_ERROR));
+                        LOG_WARNING(log, "Failed mlock: {}", errnoToString());
                    else
                        LOG_TRACE(log, "The memory map of clickhouse executable has been mlock'ed, total {}", ReadableSize(len));
                }
@ -908,7 +918,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
            rlim.rlim_cur = config().getUInt("max_open_files", rlim.rlim_max);
            int rc = setrlimit(RLIMIT_NOFILE, &rlim);
            if (rc != 0)
-                LOG_WARNING(log, "Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, strerror(errno));
+                LOG_WARNING(log, "Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString());
            else
                LOG_DEBUG(log, "Set max number of file descriptors to {} (was {}).", rlim.rlim_cur, old);
        }
@ -931,7 +941,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
            int rc = setrlimit(RLIMIT_NPROC, &rlim);
            if (rc != 0)
            {
-                LOG_WARNING(log, "Cannot set max number of threads to {}. error: {}", rlim.rlim_cur, strerror(errno));
+                LOG_WARNING(log, "Cannot set max number of threads to {}. error: {}", rlim.rlim_cur, errnoToString());
                rlim.rlim_cur = old;
            }
            else
--- a/programs/server/dashboard.html
+++ b/programs/server/dashboard.html
@ -0,0 +1,905 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>ClickHouse Dashboard</title>
+    <link rel="icon" href="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI1NCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDkgOCI+PHN0eWxlPi5ve2ZpbGw6I2ZjMH0ucntmaWxsOnJlZH08L3N0eWxlPjxwYXRoIGQ9Ik0wLDcgaDEgdjEgaC0xIHoiIGNsYXNzPSJyIi8+PHBhdGggZD0iTTAsMCBoMSB2NyBoLTEgeiIgY2xhc3M9Im8iLz48cGF0aCBkPSJNMiwwIGgxIHY4IGgtMSB6IiBjbGFzcz0ibyIvPjxwYXRoIGQ9Ik00LDAgaDEgdjggaC0xIHoiIGNsYXNzPSJvIi8+PHBhdGggZD0iTTYsMCBoMSB2OCBoLTEgeiIgY2xhc3M9Im8iLz48cGF0aCBkPSJNOCwzLjI1IGgxIHYxLjUgaC0xIHoiIGNsYXNzPSJvIi8+PC9zdmc+">
+    <script src="https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.iife.min.js"></script>
+    <style>
+        :root {
+            --color: black;
+            --background: linear-gradient(to bottom, #00CCFF, #00D0D0);
+            --chart-background: white;
+            --shadow-color: rgba(0, 0, 0, 0.25);
+            --input-shadow-color: rgba(0, 255, 0, 1);
+            --error-color: red;
+            --legend-background: rgba(255, 255, 255, 0.75);
+            --title-color: #666;
+            --text-color: black;
+            --edit-title-background: #FEE;
+            --edit-title-border: #F88;
+            --button-background-color: #FFCB80;
+            --button-text-color: black;
+            --new-chart-background-color: #EEE;
+            --new-chart-text-color: black;
+            --param-background-color: #EEE;
+            --param-text-color: black;
+            --input-background: white;
+            --chart-button-hover-color: red;
+        }
+
+        [data-theme="dark"] {
+            --color: white;
+            --background: #151C2C;
+            --chart-background: #1b2834;
+            --shadow-color: rgba(0, 0, 0, 0);
+            --input-shadow-color: rgba(255, 128, 0, 0.25);
+            --error-color: #F66;
+            --legend-background: rgba(255, 255, 255, 0.25);
+            --title-color: white;
+            --text-color: white;
+            --edit-title-background: #364f69;
+            --edit-title-border: #333;
+            --button-background-color: orange;
+            --button-text-color: black;
+            --new-chart-background-color: #666;
+            --new-chart-text-color: white;
+            --param-background-color: #666;
+            --param-text-color: white;
+            --input-background: #364f69;
+            --chart-button-hover-color: #F40;
+        }
+
+        * {
+            box-sizing: border-box;
+        }
+        html, body {
+            color: var(--color);
+            height: 100%;
+            overflow: auto;
+            margin: 0;
+        }
+        body {
+            font-family: Liberation Sans, DejaVu Sans, sans-serif, Noto Color Emoji, Apple Color Emoji, Segoe UI Emoji;
+            padding: 1rem;
+            overflow-x: hidden;
+            background: var(--background);
+            display: grid;
+            grid-template-columns: auto;
+            grid-template-rows: fit-content(10%) auto;
+        }
+        input {
+            /* iPad, Safari */
+            border-radius: 0;
+            margin: 0;
+        }
+        #charts
+        {
+            height: 100%;
+            display: flex;
+            flex-flow: row wrap;
+            gap: 1rem;
+        }
+        .chart {
+            flex: 1 40%;
+            min-width: 20rem;
+            min-height: 16rem;
+            background: var(--chart-background);
+            box-shadow: 0 0 1rem var(--shadow-color);
+            overflow: hidden;
+            position: relative;
+        }
+
+        .chart div { position: absolute; }
+
+        .inputs { font-size: 14pt; }
+
+        #connection-params {
+            margin-bottom: 0.5rem;
+            display: grid;
+            grid-template-columns: auto 15% 15%;
+            column-gap: 0.25rem;
+        }
+
+        .inputs input {
+            box-shadow: 0 0 1rem var(--shadow-color);
+            padding: 0.25rem;
+        }
+
+        #chart-params input {
+            margin-right: 0.25rem;
+        }
+
+        input {
+            font-family: Liberation Sans, DejaVu Sans, sans-serif, Noto Color Emoji, Apple Color Emoji, Segoe UI Emoji;
+            outline: none;
+            border: none;
+            font-size: 14pt;
+            background-color: var(--input-background);
+            color: var(--text-color);
+        }
+
+        .u-legend th { display: none; }
+
+        .themes {
+            float: right;
+            font-size: 20pt;
+            margin-bottom: 1rem;
+        }
+
+        #toggle-dark, #toggle-light {
+            padding-right: 0.5rem;
+            user-select: none;
+            cursor: pointer;
+        }
+
+        #toggle-dark:hover, #toggle-light:hover {
+            display: inline-block;
+            transform: translate(1px, 1px);
+            filter: brightness(125%);
+        }
+
+        #run {
+            background: var(--button-background-color);
+            color: var(--button-text-color);
+            font-weight: bold;
+            user-select: none;
+            cursor: pointer;
+            margin-bottom: 1rem;
+        }
+
+        #run:hover {
+            filter: contrast(125%);
+        }
+
+        #add {
+            font-weight: bold;
+            user-select: none;
+            cursor: pointer;
+            padding-left: 0.5rem;
+            padding-right: 0.5rem;
+            background: var(--new-chart-background-color);
+            color: var(--new-chart-text-color);
+            float: right;
+            margin-right: 0 !important;
+            margin-left: 1rem;
+            margin-bottom: 1rem;
+        }
+
+        #add:hover {
+            background: var(--button-background-color);
+        }
+
+        form {
+            display: inline;
+        }
+
+        form .param_name {
+            font-size: 14pt;
+            padding: 0.25rem;
+            background: var(--param-background-color);
+            color: var(--param-text-color);
+            display: inline-block;
+            box-shadow: 0 0 1rem var(--shadow-color);
+            margin-bottom: 1rem;
+        }
+
+        input:focus {
+            box-shadow: 0 0 1rem var(--input-shadow-color);
+        }
+
+        .title {
+            left: 50%;
+            top: 0.25em;
+            transform: translate(-50%, 0);
+            font-size: 16pt;
+            font-weight: bold;
+            color: var(--title-color);
+            z-index: 10;
+        }
+
+        .chart-buttons {
+            cursor: pointer;
+            display: none;
+            position: absolute;
+            top: 0.25rem;
+            right: 0.25rem;
+            font-size: 200%;
+            color: #888;
+            z-index: 10;
+        }
+        .chart-buttons a {
+            margin-right: 0.25rem;
+        }
+        .chart-buttons a:hover {
+            color: var(--chart-button-hover-color);
+        }
+
+        .query-editor {
+            display: none;
+            grid-template-columns: auto fit-content(10%);
+            grid-template-rows: auto fit-content(10%);
+            z-index: 11;
+            position: absolute;
+            width: 100%;
+            height: 100%;
+        }
+
+        .query-error {
+            display: none;
+            z-index: 10;
+            position: absolute;
+            color: var(--error-color);
+            padding: 2rem;
+        }
+
+        .query-editor textarea {
+            grid-row: 1;
+            grid-column: 1 / span 2;
+            z-index: 11;
+            padding: 0.5rem;
+            outline: none;
+            border: none;
+            font-size: 12pt;
+            border-bottom: 1px solid var(--edit-title-border);
+            background: var(--chart-background);
+            color: var(--text-color);
+            resize: none;
+            margin: 0;
+        }
+
+        .query-editor input {
+            grid-row: 2;
+            padding: 0.5rem;
+        }
+
+        .edit-title {
+            background: var(--edit-title-background);
+        }
+
+        .edit-confirm {
+            background: var(--button-background-color);
+            color: var(--button-text-color);
+            font-weight: bold;
+            cursor: pointer;
+        }
+        .edit-confirm:hover {
+            filter: contrast(125%);
+        }
+
+        .nowrap {
+            white-space: pre;
+        }
+
+        /* Source: https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.min.css
+         * It is copy-pasted to lower the number of requests.
+         */
+        .uplot, .uplot *, .uplot *::before, .uplot *::after {box-sizing: border-box;}.uplot {font-family: system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";line-height: 1.5;width: min-content;}.u-title {text-align: center;font-size: 18px;font-weight: bold;}.u-wrap {position: relative;user-select: none;}.u-over, .u-under {position: absolute;}.u-under {overflow: hidden;}.uplot canvas {display: block;position: relative;width: 100%;height: 100%;}.u-axis {position: absolute;}.u-legend {font-size: 14px;margin: auto;text-align: center;}.u-inline {display: block;}.u-inline * {display: inline-block;}.u-inline tr {margin-right: 16px;}.u-legend th {font-weight: 600;}.u-legend th > * {vertical-align: middle;display: inline-block;}.u-legend .u-marker {width: 1em;height: 1em;margin-right: 4px;background-clip: padding-box !important;}.u-inline.u-live th::after {content: ":";vertical-align: middle;}.u-inline:not(.u-live) .u-value {display: none;}.u-series > * {padding: 4px;}.u-series th {cursor: pointer;}.u-legend .u-off > * {opacity: 0.3;}.u-select {background: rgba(0,0,0,0.07);position: absolute;pointer-events: none;}.u-cursor-x, .u-cursor-y {position: absolute;left: 0;top: 0;pointer-events: none;will-change: transform;z-index: 100;}.u-hz .u-cursor-x, .u-vt .u-cursor-y {height: 100%;border-right: 1px dashed #607D8B;}.u-hz .u-cursor-y, .u-vt .u-cursor-x {width: 100%;border-bottom: 1px dashed #607D8B;}.u-cursor-pt {position: absolute;top: 0;left: 0;border-radius: 50%;border: 0 solid;pointer-events: none;will-change: transform;z-index: 100;/*this has to be !important since we set inline "background" shorthand */background-clip: padding-box !important;}.u-axis.u-off, .u-select.u-off, .u-cursor-x.u-off, .u-cursor-y.u-off, .u-cursor-pt.u-off {display: none;}
+    </style>
+</head>
+<body>
+<div class="inputs">
+    <form id="params">
+        <div id="connection-params">
+            <input spellcheck="false" id="url" type="text" value="" placeholder="URL" />
+            <input spellcheck="false" id="user" type="text" value="" placeholder="user" />
+            <input spellcheck="false" id="password" type="password" placeholder="password" />
+        </div>
+        <div>
+            <input id="add" type="button" value="Add chart">
+            <span class="nowrap themes"><span id="toggle-dark">🌚</span><span id="toggle-light">🌞</span></span>
+            <div id="chart-params"></div>
+        </div>
+    </form>
+</div>
+<div id="charts"></div>
+<script>
+
+/** Implementation note: it might be more natural to use some reactive framework.
+  * But for now it is small enough to avoid it. As a bonus we have less number of dependencies,
+  * which is better for maintainability.
+  *
+  * TODO:
+  * - zoom on the graphs should work on touch devices;
+  * - add mass edit capability (edit the JSON config as a whole);
+  * - compress the state for URL's #hash;
+  * - save/load JSON configuration;
+  * - footer with "about" or a link to source code;
+  * - allow to configure a table on a server to save the dashboards;
+  * - multiple lines on chart;
+  * - if a query returned one value, display this value instead of a diagram;
+  * - if a query returned something unusual, display the table;
+  */
+
+let host = 'https://play.clickhouse.com/';
+let user = 'explorer';
+let password = '';
+
+/// If it is hosted on server, assume that it is the address of ClickHouse.
+if (location.protocol != 'file:') {
+    host = location.origin;
+    user = 'default';
+}
+
+/// This is just a demo configuration of the dashboard.
+
+let queries = [
+    {
+      "title": "Queries/second",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_Query)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "CPU Usage (cores)",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUVirtualTimeMicroseconds) / 1000000
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Queries Running",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_Query)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Merges Running",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_Merge)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Selected Bytes/second",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_SelectedBytes)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "IO Wait",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSIOWaitMicroseconds) / 1000000
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "CPU Wait",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSCPUWaitMicroseconds) / 1000000
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "OS CPU Usage (Userspace)",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+AND metric = 'OSUserTimeNormalized'
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "OS CPU Usage (Kernel)",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+AND metric = 'OSSystemTimeNormalized'
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Read From Disk",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSReadBytes)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Read From Filesystem",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_OSReadChars)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Memory (tracked)",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(CurrentMetric_MemoryTracking)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Load Average (15 minutes)",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+AND metric = 'LoadAverage15'
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Selected Rows/second",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_SelectedRows)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Inserted Rows/second",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(ProfileEvent_InsertedRows)
+FROM system.metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Total MergeTree Parts",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+AND metric = 'TotalPartsOfMergeTreeTables'
+GROUP BY t
+ORDER BY t`
+    },
+    {
+      "title": "Max Parts For Partition",
+      "query": `SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, max(value)
+FROM system.asynchronous_metric_log
+WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
+AND metric = 'MaxPartCountForPartition'
+GROUP BY t
+ORDER BY t`
+    }
+];
+
+/// Query parameters with predefined default values.
+/// All other parameters will be automatically found in the queries.
+let params = {
+    "rounding": "60",
+    "seconds": "86400"
+};
+
+let theme = 'light';
+
+function setTheme(new_theme) {
+    theme = new_theme;
+    document.documentElement.setAttribute('data-theme', theme);
+    window.localStorage.setItem('theme', theme);
+    drawAll();
+}
+
+document.getElementById('toggle-light').addEventListener('click', e => setTheme('light'));
+document.getElementById('toggle-dark').addEventListener('click', e => setTheme('dark'));
+
+/// uPlot objects will go here.
+let plots = [];
+/// chart div's will be here.
+let charts = document.getElementById('charts');
+
+/// This is not quite correct (we cannot really parse SQL with regexp) but tolerable.
+const query_param_regexp = /\{(\w+):[^}]+\}/g;
+
+/// Automatically parse more parameters from the queries.
+function findParamsInQuery(query, new_params) {
+    for (let match of query.matchAll(query_param_regexp)) {
+        const name = match[1];
+        new_params[name] = params[name] || '';
+    }
+}
+
+function findParamsInQueries() {
+    let new_params = {}
+    queries.forEach(q => findParamsInQuery(q.query, new_params));
+    params = new_params;
+}
+
+function insertParam(name, value) {
+    let param_wrapper = document.createElement('span');
+    param_wrapper.className = 'nowrap';
+
+    let param_name = document.createElement('span');
+    param_name.className = 'param_name';
+    let param_name_text = document.createTextNode(`${name}: `);
+    param_name.appendChild(param_name_text);
+
+    let param_value = document.createElement('input');
+    param_value.className = 'param';
+    param_value.name = `${name}`;
+    param_value.type = 'text';
+    param_value.value = value;
+    param_value.spellcheck = false;
+
+    param_wrapper.appendChild(param_name);
+    param_wrapper.appendChild(param_value);
+    document.getElementById('chart-params').appendChild(param_wrapper);
+}
+
+function buildParams() {
+    let params_elem = document.getElementById('chart-params');
+    while (params_elem.firstChild) {
+        params_elem.removeChild(params_elem.lastChild);
+    }
+
+    for (let [name, value] of Object.entries(params)) {
+        insertParam(name, value);
+    }
+
+    let run = document.createElement('input');
+    run.id = 'run';
+    run.type = 'submit';
+    run.value = 'Ok';
+
+    document.getElementById('chart-params').appendChild(run);
+}
+
+function updateParams() {
+    [...document.getElementsByClassName('param')].forEach(e => { params[e.name] = e.value });
+}
+
+function getParamsForURL() {
+    let url = '';
+    for (let [name, value] of Object.entries(params)) {
+        url += `&param_${encodeURIComponent(name)}=${encodeURIComponent(value)}`;
+    };
+    return url;
+}
+
+function insertChart(i) {
+    let q = queries[i];
+    let chart = document.createElement('div');
+    chart.className = 'chart';
+
+    let chart_title = document.createElement('div');
+    let title_text = document.createTextNode('');
+    chart_title.appendChild(title_text);
+    chart_title.className = 'title';
+    chart.appendChild(chart_title);
+
+    let query_error = document.createElement('div');
+    query_error.className = 'query-error';
+    query_error.appendChild(document.createTextNode(''));
+    chart.appendChild(query_error);
+
+    let query_editor = document.createElement('div');
+    query_editor.className = 'query-editor';
+
+    let query_editor_textarea = document.createElement('textarea');
+    query_editor_textarea.spellcheck = false;
+    query_editor_textarea.value = q.query;
+    query_editor_textarea.placeholder = 'Query';
+    query_editor.appendChild(query_editor_textarea);
+
+    let query_editor_title = document.createElement('input');
+    query_editor_title.type = 'text';
+    query_editor_title.value = q.title;
+    query_editor_title.placeholder = 'Chart title';
+    query_editor_title.className = 'edit-title';
+    query_editor.appendChild(query_editor_title);
+
+    let query_editor_confirm = document.createElement('input');
+    query_editor_confirm.type = 'submit';
+    query_editor_confirm.value = 'Ok';
+    query_editor_confirm.className = 'edit-confirm';
+
+    function editConfirm() {
+        query_editor.style.display = 'none';
+        query_error.style.display = 'none';
+        q.title = query_editor_title.value;
+        q.query = query_editor_textarea.value;
+        title_text.data = '';
+        findParamsInQuery(q.query, params);
+        buildParams();
+        draw(i, chart, getParamsForURL(), q.query);
+        saveState();
+    }
+
+    query_editor_confirm.addEventListener('click', editConfirm);
+
+    /// Ctrl+Enter (or Cmd+Enter on Mac) will also confirm editing.
+    query_editor.addEventListener('keydown', e => {
+        if ((event.metaKey || event.ctrlKey) && (event.keyCode == 13 || event.keyCode == 10)) {
+            editConfirm();
+        }
+    });
+
+    query_editor.addEventListener('keyup', e => {
+        if (e.key == 'Escape') {
+            query_editor.style.display = 'none';
+        }
+    });
+
+    query_editor.appendChild(query_editor_confirm);
+
+    chart.appendChild(query_editor);
+
+    let edit_buttons = document.createElement('div');
+    edit_buttons.className = 'chart-buttons';
+
+    let edit = document.createElement('a');
+    let edit_text = document.createTextNode('✎');
+    edit.appendChild(edit_text);
+
+    function editStart() {
+        query_editor.style.display = 'grid';
+        query_editor_textarea.focus();
+    }
+
+    edit.addEventListener('click', e => editStart());
+    if (!q.query) {
+        editStart();
+    }
+
+    let trash = document.createElement('a');
+    let trash_text = document.createTextNode('✕');
+    trash.appendChild(trash_text);
+    trash.addEventListener('click', e => {
+        /// Indices may change after deletion of other element, hence captured "i" may become incorrect.
+        let idx = [...charts.querySelectorAll('.chart')].findIndex(child => chart == child);
+        if (plots[idx]) {
+            plots[idx].destroy();
+            plots[idx] = null;
+        }
+        plots.splice(idx, 1);
+        charts.removeChild(chart);
+        queries.splice(idx, 1);
+        findParamsInQueries();
+        buildParams();
+        resize();
+        saveState();
+    });
+
+    edit_buttons.appendChild(edit);
+    edit_buttons.appendChild(trash);
+
+    chart.appendChild(edit_buttons);
+
+    chart.addEventListener('mouseenter', e => { edit_buttons.style.display = 'block'; });
+    chart.addEventListener('mouseleave', e => { edit_buttons.style.display = 'none'; });
+
+    charts.appendChild(chart);
+};
+
+document.getElementById('add').addEventListener('click', e => {
+    queries.push({ title: '', query: '' });
+    insertChart(plots.length);
+    plots.push(null);
+    resize();
+});
+
+function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) {
+    let legendEl;
+
+    function init(u, opts) {
+        legendEl = u.root.querySelector(".u-legend");
+
+        legendEl.classList.remove("u-inline");
+        className && legendEl.classList.add(className);
+
+        uPlot.assign(legendEl.style, {
+            textAlign: "left",
+            pointerEvents: "none",
+            display: "none",
+            position: "absolute",
+            left: 0,
+            top: 0,
+            zIndex: 100,
+            boxShadow: "2px 2px 10px rgba(0,0,0,0.1)",
+            ...style
+        });
+
+        // hide series color markers
+        const idents = legendEl.querySelectorAll(".u-marker");
+
+        for (let i = 0; i < idents.length; i++)
+            idents[i].style.display = "none";
+
+        const overEl = u.over;
+
+        overEl.appendChild(legendEl);
+
+        overEl.addEventListener("mouseenter", () => {legendEl.style.display = null;});
+        overEl.addEventListener("mouseleave", () => {legendEl.style.display = "none";});
+    }
+
+    function update(u) {
+        let { left, top } = u.cursor;
+        left -= legendEl.clientWidth / 2;
+        top -= legendEl.clientHeight / 2;
+        legendEl.style.transform = "translate(" + left + "px, " + top + "px)";
+    }
+
+    return {
+        hooks: {
+            init: init,
+            setCursor: update,
+        }
+    };
+}
+
+async function draw(idx, chart, url_params, query) {
+    if (plots[idx]) {
+        plots[idx].destroy();
+        plots[idx] = null;
+    }
+
+    host = document.getElementById('url').value;
+    user = document.getElementById('user').value;
+    password = document.getElementById('password').value;
+
+    let url = `${host}?default_format=JSONCompactColumns`
+    if (user) {
+        url += `&user=${encodeURIComponent(user)}`;
+    }
+    if (password) {
+        url += `&password=${encodeURIComponent(password)}`;
+    }
+
+    let response, data, error;
+    try {
+        response = await fetch(url + url_params, { method: "POST", body: query });
+        data = await response.text();
+        if (response.ok) {
+            data = JSON.parse(data);
+        } else {
+            error = data;
+        }
+    } catch (e) {
+        console.log(e);
+        error = e.toString();
+    }
+
+    if (!error) {
+        if (!Array.isArray(data)) {
+            error = "Query should return an array.";
+        } else if (data.length == 0) {
+            error = "Query returned empty result.";
+        } else if (data.length != 2) {
+            error = "Query should return exactly two columns: unix timestamp and value.";
+        } else if (!Array.isArray(data[0]) || !Array.isArray(data[1]) || data[0].length != data[1].length) {
+            error = "Wrong data format of the query.";
+        }
+    }
+
+    let error_div = chart.querySelector('.query-error');
+    let title_div = chart.querySelector('.title');
+    if (error) {
+        error_div.firstChild.data = error;
+        title_div.style.display = 'none';
+        error_div.style.display = 'block';
+        return;
+    } else {
+        error_div.firstChild.data = '';
+        error_div.style.display = 'none';
+        title_div.style.display = 'block';
+    }
+
+    const [line_color, fill_color, grid_color, axes_color] = theme != 'dark'
+        ? ["#F88", "#FEE", "#EED", "#2c3235"]
+        : ["#864", "#045", "#2c3235", "#c7d0d9"];
+
+    let sync = uPlot.sync("sync");
+
+    const max_value = Math.max(...data[1]);
+
+    const opts = {
+        width: chart.clientWidth,
+        height: chart.clientHeight,
+        axes: [ { stroke: axes_color,
+                  grid: { width: 1 / devicePixelRatio, stroke: grid_color },
+                  ticks: { width: 1 / devicePixelRatio, stroke: grid_color } },
+                { stroke: axes_color,
+                  grid: { width: 1 / devicePixelRatio, stroke: grid_color },
+                  ticks: { width: 1 / devicePixelRatio, stroke: grid_color } } ],
+        series: [ { label: "x" },
+                  { label: "y", stroke: line_color, fill: fill_color } ],
+        padding: [ null, null, null, (Math.round(max_value * 100) / 100).toString().length * 6 - 10 ],
+        plugins: [ legendAsTooltipPlugin() ],
+        cursor: {
+            sync: {
+                key: "sync",
+            }
+        }
+    };
+
+    plots[idx] = new uPlot(opts, data, chart);
+    sync.sub(plots[idx]);
+
+    /// Set title
+    const title = queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] );
+    chart.querySelector('.title').firstChild.data = title;
+}
+
+async function drawAll() {
+    let params = getParamsForURL();
+    const charts = document.getElementsByClassName('chart');
+    for (let i = 0; i < queries.length; ++i) {
+        draw(i, charts[i], params, queries[i].query);
+    }
+}
+
+function resize() {
+    plots.forEach(plot => {
+        if (plot) {
+            let chart = plot.over.closest('.chart');
+            plot.setSize({ width: chart.clientWidth, height: chart.clientHeight });
+        }
+    });
+}
+
+new ResizeObserver(resize).observe(document.body);
+
+document.getElementById('params').onsubmit = function(event) {
+    updateParams();
+    drawAll();
+    saveState();
+    event.preventDefault();
+}
+
+
+function saveState() {
+    const state = { host: host, user: user, queries: queries, params: params };
+    history.pushState(state, '',
+        window.location.pathname + (window.location.search || '') + '#' + btoa(JSON.stringify(state)));
+}
+
+function regenerate() {
+    document.getElementById('url').value = host;
+    document.getElementById('user').value = user;
+    document.getElementById('password').value = password;
+
+    findParamsInQueries();
+    buildParams();
+
+    plots.forEach(elem => elem && elem.destroy());
+    plots = queries.map(e => null);
+
+    while (charts.firstChild) {
+        charts.removeChild(charts.lastChild);
+    }
+
+    for (let i = 0; i < queries.length; ++i) {
+        insertChart(i);
+    }
+}
+
+window.onpopstate = function(event) {
+    if (!event.state) { return; }
+    ({host, user, queries, params} = event.state);
+
+    regenerate();
+    drawAll();
+};
+
+if (window.location.hash) {
+    try {
+        ({host, user, queries, params} = JSON.parse(atob(window.location.hash.substring(1))));
+    } catch {}
+}
+
+regenerate();
+
+let new_theme = window.localStorage.getItem('theme');
+if (new_theme && new_theme != theme) {
+    setTheme(new_theme);
+} else {
+    drawAll();
+}
+
+</script>
+</body>
+</html>
--- a/programs/server/js/uplot.js
+++ b/programs/server/js/uplot.js
--- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp
+++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp
@ -160,7 +160,7 @@ try
    if (options.empty() || options.count("help"))
    {
        std::cout << description << std::endl;
-        exit(0);
+        exit(0); // NOLINT(concurrency-mt-unsafe)
    }

    String metadata_path;
--- a/programs/su/su.cpp
+++ b/programs/su/su.cpp
@ -108,7 +108,7 @@ try
    if (argc < 3)
    {
        std::cout << "Usage: ./clickhouse su user:group ..." << std::endl;
-        exit(0);
+        exit(0); // NOLINT(concurrency-mt-unsafe)
    }

    std::string_view user_and_group = argv[1];
--- a/src/Bridge/IBridge.cpp
+++ b/src/Bridge/IBridge.cpp
@ -179,7 +179,7 @@ void IBridge::initialize(Application & self)
    limit.rlim_max = limit.rlim_cur = gb;
    if (setrlimit(RLIMIT_RSS, &limit))
        LOG_WARNING(log, "Unable to set maximum RSS to 1GB: {} (current rlim_cur={}, rlim_max={})",
-                    errnoToString(errno), limit.rlim_cur, limit.rlim_max);
+                    errnoToString(), limit.rlim_cur, limit.rlim_max);

    if (!getrlimit(RLIMIT_RSS, &limit))
        LOG_INFO(log, "RSS limit: cur={}, max={}", limit.rlim_cur, limit.rlim_max);
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -1952,7 +1952,7 @@ void ClientBase::runInteractive()

    if (home_path.empty())
    {
-        const char * home_path_cstr = getenv("HOME");
+        const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe)
        if (home_path_cstr)
            home_path = home_path_cstr;
    }
@ -1962,7 +1962,7 @@ void ClientBase::runInteractive()
        history_file = config().getString("history_file");
    else
    {
-        auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
+        auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE"); // NOLINT(concurrency-mt-unsafe)
        if (history_file_from_env)
            history_file = history_file_from_env;
        else if (!home_path.empty())
@ -2260,13 +2260,13 @@ void ClientBase::init(int argc, char ** argv)
    if (options.count("version") || options.count("V"))
    {
        showClientVersion();
-        exit(0);
+        exit(0); // NOLINT(concurrency-mt-unsafe)
    }

    if (options.count("version-clean"))
    {
        std::cout << VERSION_STRING;
-        exit(0);
+        exit(0); // NOLINT(concurrency-mt-unsafe)
    }

    /// Output of help message.
@ -2274,7 +2274,7 @@ void ClientBase::init(int argc, char ** argv)
        || (options.count("host") && options["host"].as<std::string>() == "elp")) /// If user writes -help instead of --help.
    {
        printHelpMessage(options_description);
-        exit(0);
+        exit(0); // NOLINT(concurrency-mt-unsafe)
    }

    /// Common options for clickhouse-client and clickhouse-local.
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@ -269,7 +269,7 @@ public:
    bool isFixedAndContiguous() const override { return data->isFixedAndContiguous(); }
    bool valuesHaveFixedSize() const override { return data->valuesHaveFixedSize(); }
    size_t sizeOfValueIfFixed() const override { return data->sizeOfValueIfFixed(); }
-    StringRef getRawData() const override { return data->getRawData(); }
+    std::string_view getRawData() const override { return data->getRawData(); }

    /// Not part of the common interface.

--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@ -71,9 +71,9 @@ public:
        data.resize_assume_reserved(data.size() - n);
    }

-    StringRef getRawData() const override
+    std::string_view getRawData() const override
    {
-        return StringRef(reinterpret_cast<const char*>(data.data()), byteSize());
+        return {reinterpret_cast<const char*>(data.data()), byteSize()};
    }

    StringRef getDataAt(size_t n) const override
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@ -209,7 +209,7 @@ public:

    bool isFixedAndContiguous() const override { return true; }
    size_t sizeOfValueIfFixed() const override { return n; }
-    StringRef getRawData() const override { return StringRef(chars.data(), chars.size()); }
+    std::string_view getRawData() const override { return {reinterpret_cast<const char *>(chars.data()), chars.size()}; }

    /// Specialized part of interface, not from IColumn.
    void insertString(const String & string) { insertData(string.c_str(), string.size()); }
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@ -332,9 +332,9 @@ public:
    bool isFixedAndContiguous() const override { return true; }
    size_t sizeOfValueIfFixed() const override { return sizeof(T); }

-    StringRef getRawData() const override
+    std::string_view getRawData() const override
    {
-        return StringRef(reinterpret_cast<const char*>(data.data()), byteSize());
+        return {reinterpret_cast<const char*>(data.data()), byteSize()};
    }

    StringRef getDataAt(size_t n) const override
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@ -507,7 +507,7 @@ public:
    [[nodiscard]] virtual bool isFixedAndContiguous() const { return false; }

    /// If isFixedAndContiguous, returns the underlying data array, otherwise throws an exception.
-    [[nodiscard]] virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }
+    [[nodiscard]] virtual std::string_view getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }

    /// If valuesHaveFixedSize, returns size of value, otherwise throw an exception.
    [[nodiscard]] virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); }
--- a/src/Common/ColumnsHashing.h
+++ b/src/Common/ColumnsHashing.h
@ -41,12 +41,12 @@ struct HashMethodOneNumber
    /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
    HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
    {
-        vec = key_columns[0]->getRawData().data;
+        vec = key_columns[0]->getRawData().data();
    }

    explicit HashMethodOneNumber(const IColumn * column)
    {
-        vec = column->getRawData().data;
+        vec = column->getRawData().data();
    }

    /// Creates context. Method is called once and result context is used in all threads.
@ -577,7 +577,7 @@ struct HashMethodKeysFixed
            columns_data.reset(new const char*[keys_size]);

            for (size_t i = 0; i < keys_size; ++i)
-                columns_data[i] = Base::getActualColumns()[i]->getRawData().data;
+                columns_data[i] = Base::getActualColumns()[i]->getRawData().data();
        }
 #endif
    }
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@ -419,7 +419,7 @@ void ConfigProcessor::doIncludesRecursive(
        XMLDocumentPtr env_document;
        auto get_env_node = [&](const std::string & name) -> const Node *
        {
-            const char * env_val = std::getenv(name.c_str());
+            const char * env_val = std::getenv(name.c_str()); // NOLINT(concurrency-mt-unsafe) // this is safe on Linux glibc/Musl, but potentially not safe on other platforms
            if (env_val == nullptr)
                return nullptr;

--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@ -86,13 +86,10 @@ static void splitHostAndPort(const std::string & host_and_port, std::string & ou

 static DNSResolver::IPAddresses hostByName(const std::string & host)
 {
-    /// Family: AF_UNSPEC
-    /// AI_ALL is required for checking if client is allowed to connect from an address
-    auto flags = Poco::Net::DNS::DNS_HINT_AI_V4MAPPED | Poco::Net::DNS::DNS_HINT_AI_ALL;
    /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured.
    /// It should not affect client address checking, since client cannot connect from IPv6 address
    /// if server has no IPv6 addresses.
-    flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG;
+    auto flags = Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG;

    DNSResolver::IPAddresses addresses;

--- a/src/Common/DateLUT.cpp
+++ b/src/Common/DateLUT.cpp
@ -30,12 +30,12 @@ std::string determineDefaultTimeZone()
 {
    namespace fs = std::filesystem;

-    const char * tzdir_env_var = std::getenv("TZDIR");
+    const char * tzdir_env_var = std::getenv("TZDIR"); // NOLINT(concurrency-mt-unsafe) // ok, because it does not run concurrently with other getenv calls
    fs::path tz_database_path = tzdir_env_var ? tzdir_env_var : "/usr/share/zoneinfo/";

    fs::path tz_file_path;
    std::string error_prefix;
-    const char * tz_env_var = std::getenv("TZ");
+    const char * tz_env_var = std::getenv("TZ"); // NOLINT(concurrency-mt-unsafe) // ok, because it does not run concurrently with other getenv calls

    /// In recent tzdata packages some files now are symlinks and canonical path resolution
    /// may give wrong timezone names - store the name as it is, if possible.
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@ -152,12 +152,12 @@ Exception::FramePointers Exception::getStackFramePointers() const

 void throwFromErrno(const std::string & s, int code, int the_errno)
 {
-    throw ErrnoException(s + ", " + errnoToString(code, the_errno), code, the_errno);
+    throw ErrnoException(s + ", " + errnoToString(the_errno), code, the_errno);
 }

 void throwFromErrnoWithPath(const std::string & s, const std::string & path, int code, int the_errno)
 {
-    throw ErrnoException(s + ", " + errnoToString(code, the_errno), code, the_errno, path);
+    throw ErrnoException(s + ", " + errnoToString(the_errno), code, the_errno, path);
 }

 static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string & start_of_message)
--- a/src/Common/FileSegment.cpp
+++ b/src/Common/FileSegment.cpp
@ -430,19 +430,20 @@ void FileSegment::completeBatchAndResetDownloader()
    cv.notify_all();
 }

-void FileSegment::completeWithState(State state, bool auto_resize)
+void FileSegment::completeWithState(State state)
 {
    std::lock_guard cache_lock(cache->mutex);
    std::lock_guard segment_lock(mutex);

    assertNotDetached(segment_lock);

-    bool is_downloader = isDownloaderImpl(segment_lock);
-    if (!is_downloader)
+    auto caller_id = getCallerId();
+    if (caller_id != downloader_id)
    {
-        cv.notify_all();
-        throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
-                        "File segment can be completed only by downloader or downloader's FileSegmentsHodler");
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "File segment completion can be done only by downloader. (CallerId: {}, downloader id: {}",
+            caller_id, downloader_id);
    }

    if (state != State::DOWNLOADED
@ -450,140 +451,48 @@ void FileSegment::completeWithState(State state, bool auto_resize)
        && state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
    {
        cv.notify_all();
-        throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
-                        "Cannot complete file segment with state: {}", stateToString(state));
-    }
-
-    if (state == State::DOWNLOADED)
-    {
-        if (auto_resize && downloaded_size != range().size())
-        {
-            LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), downloaded_size);
-            assert(downloaded_size <= range().size());
-            segment_range = Range(segment_range.left, segment_range.left + downloaded_size - 1);
-        }
-
-        /// Update states and finalize cache write buffer.
-        setDownloaded(segment_lock);
-
-        if (downloaded_size != range().size())
-            throw Exception(
-                ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
-                "Cannot complete file segment as DOWNLOADED, because downloaded size ({}) does not match expected size ({})",
-                downloaded_size, range().size());
+        throw Exception(
+            ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
+            "Cannot complete file segment with state: {}", stateToString(state));
    }

    download_state = state;
-
-    try
-    {
-        completeImpl(cache_lock, segment_lock);
-    }
-    catch (...)
-    {
-        if (!downloader_id.empty() && is_downloader)
-            downloader_id.clear();
-
-        cv.notify_all();
-        throw;
-    }
-
-    cv.notify_all();
+    completeBasedOnCurrentState(cache_lock, segment_lock);
 }

-void FileSegment::completeBasedOnCurrentState(std::lock_guard<std::mutex> & cache_lock)
+void FileSegment::completeWithoutState(std::lock_guard<std::mutex> & cache_lock)
 {
    std::lock_guard segment_lock(mutex);
+    completeBasedOnCurrentState(cache_lock, segment_lock);
+}

+void FileSegment::completeBasedOnCurrentState(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock)
+{
    if (is_detached)
        return;

-    assertNotDetached(segment_lock);
-
-    completeBasedOnCurrentStateUnlocked(cache_lock, segment_lock);
-}
-
-void FileSegment::completeBasedOnCurrentStateUnlocked(
-    std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock)
-{
+    bool is_downloader = isDownloaderImpl(segment_lock);
    bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
+    bool can_update_segment_state = is_downloader || is_last_holder;
+    size_t current_downloaded_size = getDownloadedSize(segment_lock);

-    if (is_last_holder && download_state == State::SKIP_CACHE)
-    {
-        cache->remove(key(), offset(), cache_lock, segment_lock);
-        return;
-    }
-
-    if (download_state == State::SKIP_CACHE || is_detached)
-        return;
-
-    if (isDownloaderImpl(segment_lock)
-        && download_state != State::DOWNLOADED
-        && getDownloadedSize(segment_lock) == range().size())
-    {
-        setDownloaded(segment_lock);
-    }
-
-    assertNotDetached(segment_lock);
-
-    if (download_state == State::DOWNLOADING || download_state == State::EMPTY)
-    {
-        /// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
-        /// downloader or the only owner of the segment.
-
-        bool can_update_segment_state = isDownloaderImpl(segment_lock) || is_last_holder;
-
-        if (can_update_segment_state)
-            download_state = State::PARTIALLY_DOWNLOADED;
-    }
-
-    try
-    {
-        completeImpl(cache_lock, segment_lock);
-    }
-    catch (...)
-    {
-        if (!downloader_id.empty() && isDownloaderImpl(segment_lock))
-            downloader_id.clear();
-
-        cv.notify_all();
-        throw;
-    }
-
-    cv.notify_all();
-}
-
-void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock)
-{
-    bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
-
-    if (is_last_holder
-        && (download_state == State::PARTIALLY_DOWNLOADED || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION))
-    {
-        size_t current_downloaded_size = getDownloadedSize(segment_lock);
-        if (current_downloaded_size == 0)
+    SCOPE_EXIT({
+        if (is_downloader)
        {
-            download_state = State::SKIP_CACHE;
-            LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString());
-            cache->remove(key(), offset(), cache_lock, segment_lock);
+            cv.notify_all();
        }
+    });
+
+    LOG_TEST(log, "Complete without state (is_last_holder: {}). File segment info: {}", is_last_holder, getInfoForLogImpl(segment_lock));
+
+    if (can_update_segment_state)
+    {
+        if (current_downloaded_size == range().size())
+            setDownloaded(segment_lock);
        else
-        {
-            /**
-            * Only last holder of current file segment can resize the cell,
-            * because there is an invariant that file segments returned to users
-            * in FileSegmentsHolder represent a contiguous range, so we can resize
-            * it only when nobody needs it.
-            */
-            download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
-            /// Resize this file segment by creating a copy file segment with DOWNLOADED state,
-            /// but current file segment should remain PARRTIALLY_DOWNLOADED_NO_CONTINUATION and with detached state,
-            /// because otherwise an invariant that getOrSet() returns a contiguous range of file segments will be broken
-            /// (this will be crucial for other file segment holder, not for current one).
-            cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock);
-        }
+            download_state = State::PARTIALLY_DOWNLOADED;

-        markAsDetached(segment_lock);
+        resetDownloaderImpl(segment_lock);

        if (cache_writer)
        {
@ -593,10 +502,62 @@ void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lo
        }
    }

-    if (!downloader_id.empty() && (isDownloaderImpl(segment_lock) || is_last_holder))
+    switch (download_state)
    {
-        LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
-        downloader_id.clear();
+        case State::SKIP_CACHE:
+        {
+            if (is_last_holder)
+                cache->remove(key(), offset(), cache_lock, segment_lock);
+
+            return;
+        }
+        case State::DOWNLOADED:
+        {
+            assert(downloaded_size == range().size());
+            assert(is_downloaded);
+            break;
+        }
+        case State::DOWNLOADING:
+        case State::EMPTY:
+        {
+            assert(!is_last_holder);
+            break;
+        }
+        case State::PARTIALLY_DOWNLOADED:
+        case State::PARTIALLY_DOWNLOADED_NO_CONTINUATION:
+        {
+            if (is_last_holder)
+            {
+                if (current_downloaded_size == 0)
+                {
+                    LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString());
+
+                    download_state = State::SKIP_CACHE;
+                    cache->remove(key(), offset(), cache_lock, segment_lock);
+                }
+                else
+                {
+                    LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size);
+
+                    /**
+                    * Only last holder of current file segment can resize the cell,
+                    * because there is an invariant that file segments returned to users
+                    * in FileSegmentsHolder represent a contiguous range, so we can resize
+                    * it only when nobody needs it.
+                    */
+                    download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
+
+                    /// Resize this file segment by creating a copy file segment with DOWNLOADED state,
+                    /// but current file segment should remain PARRTIALLY_DOWNLOADED_NO_CONTINUATION and with detached state,
+                    /// because otherwise an invariant that getOrSet() returns a contiguous range of file segments will be broken
+                    /// (this will be crucial for other file segment holder, not for current one).
+                    cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock);
+                }
+
+                markAsDetached(segment_lock);
+            }
+            break;
+        }
    }

    LOG_TEST(log, "Completed file segment: {}", getInfoForLogImpl(segment_lock));
@ -793,7 +754,7 @@ FileSegmentsHolder::~FileSegmentsHolder()
            /// under the same mutex, because complete() checks for segment pointers.
            std::lock_guard cache_lock(cache->mutex);

-            file_segment->completeBasedOnCurrentState(cache_lock);
+            file_segment->completeWithoutState(cache_lock);

            file_segment_it = file_segments.erase(current_file_segment_it);
        }
@ -859,13 +820,20 @@ void FileSegmentRangeWriter::completeFileSegment(FileSegment & file_segment)
        /// was initially set with a margin as `max_file_segment_size`. => We need to always
        /// resize to actual size after download finished.
        file_segment.getOrSetDownloader();
-        file_segment.completeWithState(FileSegment::State::DOWNLOADED, /* auto_resize */true);
+
+        assert(file_segment.downloaded_size <= file_segment.range().size());
+        file_segment.segment_range = FileSegment::Range(
+            file_segment.segment_range.left, file_segment.segment_range.left + file_segment.downloaded_size - 1);
+        file_segment.reserved_size = file_segment.downloaded_size;
+
+        file_segment.completeWithState(FileSegment::State::DOWNLOADED);
+
        on_complete_file_segment_func(file_segment);
    }
    else
    {
        std::lock_guard cache_lock(cache->mutex);
-        file_segment.completeBasedOnCurrentState(cache_lock);
+        file_segment.completeWithoutState(cache_lock);
    }
 }

--- a/src/Common/FileSegment.h
+++ b/src/Common/FileSegment.h
@ -142,7 +142,7 @@ public:

    void completeBatchAndResetDownloader();

-    void completeWithState(State state, bool auto_resize = false);
+    void completeWithState(State state);

    String getInfoForLog() const;

@ -195,12 +195,8 @@ private:
    /// FileSegmentsHolder. complete() might check if the caller of the method
    /// is the last alive holder of the segment. Therefore, complete() and destruction
    /// of the file segment pointer must be done under the same cache mutex.
-    void completeBasedOnCurrentState(std::lock_guard<std::mutex> & cache_lock);
-    void completeBasedOnCurrentStateUnlocked(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock);
-
-    void completeImpl(
-        std::lock_guard<std::mutex> & cache_lock,
-        std::lock_guard<std::mutex> & segment_lock);
+    void completeBasedOnCurrentState(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock);
+    void completeWithoutState(std::lock_guard<std::mutex> & cache_lock);

    void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);

--- a/src/Common/PipeFDs.cpp
+++ b/src/Common/PipeFDs.cpp
@ -106,7 +106,7 @@ void LazyPipeFDs::tryIncreaseSize(int desired_size)
    {
        if (errno == EINVAL)
        {
-            LOG_INFO(log, "Cannot get pipe capacity, {}. Very old Linux kernels have no support for this fcntl.", errnoToString(ErrorCodes::CANNOT_FCNTL));
+            LOG_INFO(log, "Cannot get pipe capacity, {}. Very old Linux kernels have no support for this fcntl.", errnoToString());
            /// It will work nevertheless.
        }
        else
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@ -81,7 +81,6 @@ namespace ErrorCodes
    extern const int CANNOT_SET_SIGNAL_HANDLER;
    extern const int CANNOT_CREATE_TIMER;
    extern const int CANNOT_SET_TIMER_PERIOD;
-    extern const int CANNOT_DELETE_TIMER;
    extern const int NOT_IMPLEMENTED;
 }

@ -188,7 +187,7 @@ void QueryProfilerBase<ProfilerImpl>::tryCleanup()
    if (timer_id.has_value())
    {
        if (timer_delete(*timer_id))
-            LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString(ErrorCodes::CANNOT_DELETE_TIMER));
+            LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString());
        timer_id.reset();
    }

--- a/src/Common/SharedLibrary.cpp
+++ b/src/Common/SharedLibrary.cpp
@ -17,7 +17,7 @@ SharedLibrary::SharedLibrary(std::string_view path, int flags)
 {
    handle = dlopen(path.data(), flags);
    if (!handle)
-        throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror());
+        throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror()); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror

    updatePHDRCache();

@ -33,11 +33,11 @@ SharedLibrary::~SharedLibrary()

 void * SharedLibrary::getImpl(std::string_view name, bool no_throw)
 {
-    dlerror();
+    dlerror(); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror

    auto * res = dlsym(handle, name.data());

-    if (char * error = dlerror())
+    if (char * error = dlerror()) // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror
    {
        if (no_throw)
            return nullptr;
--- a/src/Common/ShellCommand.cpp
+++ b/src/Common/ShellCommand.cpp
@ -76,7 +76,7 @@ ShellCommand::~ShellCommand()

        int retcode = kill(pid, SIGTERM);
        if (retcode != 0)
-            LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString(retcode));
+            LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString());
    }
    else
    {
@ -201,8 +201,8 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
        // by the child process, which might not expect this.
        sigset_t mask;
        sigemptyset(&mask);
-        sigprocmask(0, nullptr, &mask);
-        sigprocmask(SIG_UNBLOCK, &mask, nullptr);
+        sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe)
+        sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe)

        execv(filename, argv);
        /// If the process is running, then `execv` does not return here.
--- a/src/Common/StatusFile.cpp
+++ b/src/Common/StatusFile.cpp
@ -23,7 +23,6 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int CANNOT_OPEN_FILE;
-    extern const int CANNOT_CLOSE_FILE;
    extern const int CANNOT_TRUNCATE_FILE;
    extern const int CANNOT_SEEK_THROUGH_FILE;
 }
@ -98,10 +97,10 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
 StatusFile::~StatusFile()
 {
    if (0 != close(fd))
-        LOG_ERROR(&Poco::Logger::get("StatusFile"), "Cannot close file {}, {}", path, errnoToString(ErrorCodes::CANNOT_CLOSE_FILE));
+        LOG_ERROR(&Poco::Logger::get("StatusFile"), "Cannot close file {}, {}", path, errnoToString());

    if (0 != unlink(path.c_str()))
-        LOG_ERROR(&Poco::Logger::get("StatusFile"), "Cannot unlink file {}, {}", path, errnoToString(ErrorCodes::CANNOT_CLOSE_FILE));
+        LOG_ERROR(&Poco::Logger::get("StatusFile"), "Cannot unlink file {}, {}", path, errnoToString());
 }

 }
--- a/src/Common/ThreadFuzzer.cpp
+++ b/src/Common/ThreadFuzzer.cpp
@ -82,7 +82,7 @@ ThreadFuzzer::ThreadFuzzer()
 template <typename T>
 static void initFromEnv(T & what, const char * name)
 {
-    const char * env = getenv(name);
+    const char * env = getenv(name); // NOLINT(concurrency-mt-unsafe)
    if (!env)
        return;
    what = parse<T>(env);
@ -91,7 +91,7 @@ static void initFromEnv(T & what, const char * name)
 template <typename T>
 static void initFromEnv(std::atomic<T> & what, const char * name)
 {
-    const char * env = getenv(name);
+    const char * env = getenv(name); // NOLINT(concurrency-mt-unsafe)
    if (!env)
        return;
    what.store(parse<T>(env), std::memory_order_relaxed);
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@ -301,7 +301,7 @@ static void enablePerfEvent(int event_fd)
    {
        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
            "Can't enable perf event with file descriptor {}: '{}' ({})",
-            event_fd, errnoToString(errno), errno);
+            event_fd, errnoToString(), errno);
    }
 }

@ -311,7 +311,7 @@ static void disablePerfEvent(int event_fd)
    {
        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
            "Can't disable perf event with file descriptor {}: '{}' ({})",
-            event_fd, errnoToString(errno), errno);
+            event_fd, errnoToString(), errno);
    }
 }

@ -321,7 +321,7 @@ static void releasePerfEvent(int event_fd)
    {
        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
            "Can't close perf event file descriptor {}: {} ({})",
-            event_fd, errnoToString(errno), errno);
+            event_fd, errnoToString(), errno);
    }
 }

@ -339,7 +339,7 @@ static bool validatePerfEventDescriptor(int & fd)
    {
        LOG_WARNING(&Poco::Logger::get("PerfEvents"),
            "Error while checking availability of event descriptor {}: {} ({})",
-            fd, errnoToString(errno), errno);
+            fd, errnoToString(), errno);

        disablePerfEvent(fd);
        releasePerfEvent(fd);
@ -446,7 +446,7 @@ bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_ev
            LOG_WARNING(&Poco::Logger::get("PerfEvents"),
                "Failed to open perf event {} (event_type={}, event_config={}): "
                "'{}' ({})", event_info.settings_name, event_info.event_type,
-                event_info.event_config, errnoToString(errno), errno);
+                event_info.event_config, errnoToString(), errno);
        }
    }

@ -532,7 +532,7 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile
        {
            LOG_WARNING(&Poco::Logger::get("PerfEvents"),
                "Can't read event value from file descriptor {}: '{}' ({})",
-                fd, errnoToString(errno), errno);
+                fd, errnoToString(), errno);
            current_values[i] = {};
        }
    }
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@ -120,7 +120,7 @@ ThreadStatus::ThreadStatus()

        if (0 != sigaltstack(&altstack_description, nullptr))
        {
-            LOG_WARNING(log, "Cannot set alternative signal stack for thread, {}", errnoToString(errno));
+            LOG_WARNING(log, "Cannot set alternative signal stack for thread, {}", errnoToString());
        }
        else
        {
@ -128,7 +128,7 @@ ThreadStatus::ThreadStatus()
            struct sigaction action{};
            if (0 != sigaction(SIGSEGV, nullptr, &action))
            {
-                LOG_WARNING(log, "Cannot obtain previous signal action to set alternative signal stack for thread, {}", errnoToString(errno));
+                LOG_WARNING(log, "Cannot obtain previous signal action to set alternative signal stack for thread, {}", errnoToString());
            }
            else if (!(action.sa_flags & SA_ONSTACK))
            {
@ -136,7 +136,7 @@ ThreadStatus::ThreadStatus()

                if (0 != sigaction(SIGSEGV, &action, nullptr))
                {
-                    LOG_WARNING(log, "Cannot set action with alternative signal stack for thread, {}", errnoToString(errno));
+                    LOG_WARNING(log, "Cannot set action with alternative signal stack for thread, {}", errnoToString());
                }
            }
        }
--- a/src/Common/ZooKeeper/examples/CMakeLists.txt
+++ b/src/Common/ZooKeeper/examples/CMakeLists.txt
@ -6,6 +6,3 @@ target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zoo

 clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp)
 target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log)
-
-clickhouse_add_executable (zookeeper_impl zookeeper_impl.cpp)
-target_link_libraries (zookeeper_impl PRIVATE clickhouse_common_zookeeper_no_log)
--- a/src/Common/ZooKeeper/examples/zookeeper_impl.cpp
+++ b/src/Common/ZooKeeper/examples/zookeeper_impl.cpp
@ -1,26 +0,0 @@
-#include <Common/ZooKeeper/ZooKeeperImpl.h>
-#include <iostream>
-
-
-int main()
-try
-{
-    Coordination::ZooKeeper zookeeper({Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{"localhost:2181"}, false}}, "", "", "", {30, 0}, {0, 50000}, {0, 50000}, nullptr);
-
-    zookeeper.create("/test", "hello", false, false, {}, [](const Coordination::CreateResponse & response)
-    {
-        if (response.error != Coordination::Error::ZOK)
-            std::cerr << "Error: " << Coordination::errorMessage(response.error) << "\n";
-        else
-            std::cerr << "Path created: " << response.path_created << "\n";
-    });
-
-    sleep(100);
-
-    return 0;
-}
-catch (...)
-{
-    DB::tryLogCurrentException(__PRETTY_FUNCTION__);
-    return 1;
-}
--- a/src/Common/examples/arena_with_free_lists.cpp
+++ b/src/Common/examples/arena_with_free_lists.cpp
@ -13,6 +13,8 @@
 #include <array>
 #include <sys/resource.h>
 #include <base/bit_cast.h>
+#include <Common/randomSeed.h>
+#include <pcg_random.hpp>

 #include <base/StringRef.h>
 #include <base/arraySize.h>
@ -218,6 +220,7 @@ int main(int argc, char ** argv)
    }

    std::cerr << std::fixed << std::setprecision(2);
+    pcg64 rng(randomSeed());

    size_t n = parse<size_t>(argv[1]);
    std::vector<std::string> data;
@ -281,8 +284,8 @@ int main(int argc, char ** argv)
        size_t bytes = 0;
        for (size_t i = 0, size = data.size(); i < size; ++i)
        {
-            size_t index_from = lrand48() % size;
-            size_t index_to = lrand48() % size;
+            size_t index_from = rng() % size;
+            size_t index_to = rng() % size;

            arena.free(const_cast<char *>(refs[index_to].data), refs[index_to].size);
            const auto & s = data[index_from];
@ -318,8 +321,8 @@ int main(int argc, char ** argv)
        size_t bytes = 0;
        for (size_t i = 0, size = data.size(); i < size; ++i)
        {
-            size_t index_from = lrand48() % size;
-            size_t index_to = lrand48() % cache_size;
+            size_t index_from = rng() % size;
+            size_t index_to = rng() % cache_size;

            dictionary.setAttributeValue(attr, index_to, data[index_from]);

--- a/src/Common/examples/int_hashes_perf.cpp
+++ b/src/Common/examples/int_hashes_perf.cpp
@ -8,9 +8,11 @@

 #include <iostream>
 #include <iomanip>
+#include <pcg_random.hpp>
 #include <Poco/Exception.h>
 #include <Common/HashTable/Hash.h>
 #include <Common/Stopwatch.h>
+#include <Common/randomSeed.h>
 #include <Core/Defines.h>


@ -266,9 +268,9 @@ int main(int argc, char ** argv)
    {
        Stopwatch watch;

-        srand48(rdtsc());
+        pcg64 rng(randomSeed());
        for (size_t i = 0; i < BUF_SIZE; ++i)
-            data[i] = lrand48();
+            data[i] = rng();

        watch.stop();
        double elapsed = watch.elapsedSeconds();
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@ -23,6 +23,7 @@
 #include <Common/LockMemoryExceptionInThread.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Common/Stopwatch.h>
+#include <Common/getMultipleKeysFromConfig.h>

 namespace DB
 {
@ -259,7 +260,7 @@ void KeeperServer::forceRecovery()
    raft_instance->update_params(params);
 }

-void KeeperServer::launchRaftServer(bool enable_ipv6)
+void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6)
 {
    nuraft::raft_params params;
    params.heart_beat_interval_
@ -311,10 +312,26 @@ void KeeperServer::launchRaftServer(bool enable_ipv6)

    nuraft::ptr<nuraft::logger> logger = nuraft::cs_new<LoggerWrapper>("RaftInstance", coordination_settings->raft_logs_level);
    asio_service = nuraft::cs_new<nuraft::asio_service>(asio_opts, logger);
-    asio_listener = asio_service->create_rpc_listener(state_manager->getPort(), logger, enable_ipv6);

-    if (!asio_listener)
-        return;
+    // we use the same config as for the CH replicas because it is for internal communication between Keeper instances
+    std::vector<std::string> listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host");
+
+    if (listen_hosts.empty())
+    {
+        auto asio_listener = asio_service->create_rpc_listener(state_manager->getPort(), logger, enable_ipv6);
+        if (!asio_listener)
+            return;
+        asio_listeners.emplace_back(std::move(asio_listener));
+    }
+    else
+    {
+        for (const auto & listen_host : listen_hosts)
+        {
+            auto asio_listener = asio_service->create_rpc_listener(listen_host, state_manager->getPort(), logger);
+            if (asio_listener)
+                asio_listeners.emplace_back(std::move(asio_listener));
+        }
+    }

    nuraft::ptr<nuraft::delayed_task_scheduler> scheduler = asio_service;
    nuraft::ptr<nuraft::rpc_client_factory> rpc_cli_factory = asio_service;
@ -324,17 +341,21 @@ void KeeperServer::launchRaftServer(bool enable_ipv6)

    /// raft_server creates unique_ptr from it
    nuraft::context * ctx
-        = new nuraft::context(casted_state_manager, casted_state_machine, asio_listener, logger, rpc_cli_factory, scheduler, params);
+        = new nuraft::context(casted_state_manager, casted_state_machine, asio_listeners, logger, rpc_cli_factory, scheduler, params);

    raft_instance = nuraft::cs_new<KeeperRaftServer>(ctx, init_options);

+    if (!raft_instance)
+        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
+
    raft_instance->start_server(init_options.skip_initial_election_timeout_);

    nuraft::ptr<nuraft::raft_server> casted_raft_server = raft_instance;
-    asio_listener->listen(casted_raft_server);

-    if (!raft_instance)
-        throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance");
+    for (const auto & asio_listener : asio_listeners)
+    {
+        asio_listener->listen(casted_raft_server);
+    }
 }

 void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6)
@ -364,7 +385,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo

    last_local_config = state_manager->parseServersConfiguration(config, true).cluster_config;

-    launchRaftServer(enable_ipv6);
+    launchRaftServer(config, enable_ipv6);

    keeper_context->server_state = KeeperContext::Phase::RUNNING;
 }
@ -388,10 +409,13 @@ void KeeperServer::shutdownRaftServer()

    raft_instance.reset();

-    if (asio_listener)
+    for (const auto & asio_listener : asio_listeners)
    {
-        asio_listener->stop();
-        asio_listener->shutdown();
+        if (asio_listener)
+        {
+            asio_listener->stop();
+            asio_listener->shutdown();
+        }
    }

    if (asio_service)
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@ -30,7 +30,7 @@ private:
    struct KeeperRaftServer;
    nuraft::ptr<KeeperRaftServer> raft_instance;
    nuraft::ptr<nuraft::asio_service> asio_service;
-    nuraft::ptr<nuraft::rpc_listener> asio_listener;
+    std::vector<nuraft::ptr<nuraft::rpc_listener>> asio_listeners;
    // because some actions can be applied
    // when we are sure that there are no requests currently being
    // processed (e.g. recovery) we do all write actions
@ -52,7 +52,7 @@ private:

    /// Almost copy-paste from nuraft::launcher, but with separated server init and start
    /// Allows to avoid race conditions.
-    void launchRaftServer(bool enable_ipv6);
+    void launchRaftServer(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6);

    void shutdownRaftServer();

--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@ -1,4 +1,5 @@
 #include <cerrno>
+#include <base/errnoToString.h>
 #include <future>
 #include <Coordination/KeeperSnapshotManager.h>
 #include <Coordination/KeeperStateMachine.h>
@ -11,6 +12,7 @@
 #include <Common/ProfileEvents.h>
 #include "Coordination/KeeperStorage.h"

+
 namespace ProfileEvents
 {
    extern const Event KeeperCommits;
@ -446,7 +448,7 @@ static int bufferFromFile(Poco::Logger * log, const std::string & path, nuraft::
    LOG_INFO(log, "Opening file {} for read_logical_snp_obj", path);
    if (fd < 0)
    {
-        LOG_WARNING(log, "Error opening {}, error: {}, errno: {}", path, std::strerror(errno), errno);
+        LOG_WARNING(log, "Error opening {}, error: {}, errno: {}", path, errnoToString(), errno);
        return errno;
    }
    auto file_size = ::lseek(fd, 0, SEEK_END);
@ -454,7 +456,7 @@ static int bufferFromFile(Poco::Logger * log, const std::string & path, nuraft::
    auto * chunk = reinterpret_cast<nuraft::byte *>(::mmap(nullptr, file_size, PROT_READ, MAP_FILE | MAP_SHARED, fd, 0));
    if (chunk == MAP_FAILED)
    {
-        LOG_WARNING(log, "Error mmapping {}, error: {}, errno: {}", path, std::strerror(errno), errno);
+        LOG_WARNING(log, "Error mmapping {}, error: {}, errno: {}", path, errnoToString(), errno);
        ::close(fd);
        return errno;
    }
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@ -137,7 +137,7 @@ struct SimpliestRaftServer
        if (!raft_instance)
        {
            std::cerr << "Failed to initialize launcher" << std::endl;
-            exit(-1);
+            _exit(1);
        }

        std::cout << "init Raft instance " << server_id;
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -589,7 +589,6 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
    M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
    M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must be done via disk config), but allows to bypass cache for some queries if intended", 0) \
-    M(UInt64, filesystem_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \
    M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \
    M(Bool, enable_filesystem_cache_log, false, "Allows to record the filesystem caching log for each query", 0) \
    M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "", 0) \
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@ -4,12 +4,14 @@

 #include <Daemon/BaseDaemon.h>
 #include <Daemon/SentryWriter.h>
+#include <base/errnoToString.h>

 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 #include <sys/resource.h>
+
 #if defined(OS_LINUX)
    #include <sys/prctl.h>
 #endif
@ -315,13 +317,13 @@ private:
        {
            LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
                VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
-                thread_num, strsignal(sig), sig);
+                thread_num, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
        }
        else
        {
            LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
                VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
-                thread_num, query_id, query, strsignal(sig), sig);
+                thread_num, query_id, query, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context)
        }

        String error_message;
@ -665,7 +667,7 @@ void BaseDaemon::initialize(Application & self)
    if (config().has("timezone"))
    {
        const std::string config_timezone = config().getString("timezone");
-        if (0 != setenv("TZ", config_timezone.data(), 1))
+        if (0 != setenv("TZ", config_timezone.data(), 1)) // NOLINT(concurrency-mt-unsafe) // ok if not called concurrently with other setenv/getenv
            throw Poco::Exception("Cannot setenv TZ variable");

        tzset();
@ -940,13 +942,13 @@ void BaseDaemon::handleSignal(int signal_id)
        onInterruptSignals(signal_id);
    }
    else
-        throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0);
+        throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
 }

 void BaseDaemon::onInterruptSignals(int signal_id)
 {
    is_cancelled = true;
-    LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id));
+    LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id)); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context

    if (sigint_signals_counter >= 2)
    {
@ -1064,7 +1066,7 @@ void BaseDaemon::setupWatchdog()
                    break;
            }
            else if (errno != EINTR)
-                throw Poco::Exception("Cannot waitpid, errno: " + std::string(strerror(errno)));
+                throw Poco::Exception("Cannot waitpid, errno: " + errnoToString());
        } while (true);

        if (errno == ECHILD)
--- a/src/Daemon/SentryWriter.cpp
+++ b/src/Daemon/SentryWriter.cpp
@ -146,7 +146,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta
    if (initialized)
    {
        sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str());
-        sentry_set_tag("signal", strsignal(sig));
+        sentry_set_tag("signal", strsignal(sig)); // NOLINT(concurrency-mt-unsafe) // not thread-safe but ok in this context
        sentry_set_extra("signal_number", sentry_value_new_int32(sig));

        #if defined(__ELF__) && !defined(OS_FREEBSD)
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@ -222,9 +222,6 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
 {
    auto range = file_segment->range();

-    size_t wait_download_max_tries = settings.filesystem_cache_max_wait_sec;
-    size_t wait_download_tries = 0;
-
    auto download_state = file_segment->state();
    LOG_TEST(log, "getReadBufferForFileSegment: {}", file_segment->getInfoForLog());

@ -274,16 +271,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
                    return getCacheReadBuffer(range.left);
                }

-                if (wait_download_tries++ < wait_download_max_tries)
-                {
-                    download_state = file_segment->wait();
-                }
-                else
-                {
-                    LOG_DEBUG(log, "Retries to wait for file segment download exceeded ({})", wait_download_tries);
-                    download_state = FileSegment::State::SKIP_CACHE;
-                }
-
+                download_state = file_segment->wait();
                continue;
            }
            case FileSegment::State::DOWNLOADED:
--- a/src/Disks/IO/ThreadPoolReader.cpp
+++ b/src/Disks/IO/ThreadPoolReader.cpp
@ -165,8 +165,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
                {
                    ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
                    promise.set_exception(std::make_exception_ptr(ErrnoException(
-                        fmt::format("Cannot read from file {}, {}", fd,
-                            errnoToString(ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, errno)),
+                        fmt::format("Cannot read from file {}, {}", fd, errnoToString()),
                        ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR, errno)));
                    return future;
                }
--- a/src/Functions/reinterpretAs.cpp
+++ b/src/Functions/reinterpretAs.cpp
@ -301,7 +301,7 @@ private:
        ColumnFixedString::Chars & data_to = dst.getChars();
        data_to.resize(n * rows);

-        memcpy(data_to.data(), src.getRawData().data, data_to.size());
+        memcpy(data_to.data(), src.getRawData().data(), data_to.size());
    }

    static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst)
--- a/src/IO/Archives/ZipArchiveReader.cpp
+++ b/src/IO/Archives/ZipArchiveReader.cpp
@ -4,6 +4,7 @@
 #include <IO/Archives/ZipArchiveWriter.h>
 #include <IO/ReadBufferFromFileBase.h>
 #include <Common/quoteString.h>
+#include <base/errnoToString.h>
 #include <unzip.h>


@ -553,11 +554,11 @@ void ZipArchiveReader::checkResult(int code) const
    if (code >= UNZ_OK)
        return;

-    String message = "Code= ";
+    String message = "Code = ";
    switch (code)
    {
        case UNZ_OK: return;
-        case UNZ_ERRNO: message += "ERRNO, errno= " + String{strerror(errno)}; break;
+        case UNZ_ERRNO: message += "ERRNO, errno = " + errnoToString(); break;
        case UNZ_PARAMERROR: message += "PARAMERROR"; break;
        case UNZ_BADZIPFILE: message += "BADZIPFILE"; break;
        case UNZ_INTERNALERROR: message += "INTERNALERROR"; break;
--- a/src/IO/Archives/ZipArchiveWriter.cpp
+++ b/src/IO/Archives/ZipArchiveWriter.cpp
@ -3,6 +3,7 @@
 #if USE_MINIZIP
 #include <IO/WriteBufferFromFileBase.h>
 #include <Common/quoteString.h>
+#include <base/errnoToString.h>
 #include <zip.h>
 #include <boost/algorithm/string/predicate.hpp>

@ -380,10 +381,10 @@ void ZipArchiveWriter::checkResult(int code) const
    if (code >= ZIP_OK)
        return;

-    String message = "Code= ";
+    String message = "Code = ";
    switch (code)
    {
-        case ZIP_ERRNO: message += "ERRNO, errno= " + String{strerror(errno)}; break;
+        case ZIP_ERRNO: message += "ERRNO, errno = " + errnoToString(); break;
        case ZIP_PARAMERROR: message += "PARAMERROR"; break;
        case ZIP_BADZIPFILE: message += "BADZIPFILE"; break;
        case ZIP_INTERNALERROR: message += "INTERNALERROR"; break;
--- a/src/IO/HadoopSnappyReadBuffer.cpp
+++ b/src/IO/HadoopSnappyReadBuffer.cpp
@ -183,23 +183,28 @@ bool HadoopSnappyReadBuffer::nextImpl()
    if (eof)
        return false;

-    if (!in_available)
+    do
    {
-        in->nextIfAtEnd();
-        in_available = in->buffer().end() - in->position();
-        in_data = in->position();
+        if (!in_available)
+        {
+            in->nextIfAtEnd();
+            in_available = in->buffer().end() - in->position();
+            in_data = in->position();
+        }
+
+        if (decoder->result == Status::NEEDS_MORE_INPUT && (!in_available || in->eof()))
+        {
+            throw Exception(String("hadoop snappy decode error:") + statusToString(decoder->result), ErrorCodes::SNAPPY_UNCOMPRESS_FAILED);
+        }
+
+        out_capacity = internal_buffer.size();
+        out_data = internal_buffer.begin();
+        decoder->result = decoder->readBlock(&in_available, &in_data, &out_capacity, &out_data);
+
+        in->position() = in->buffer().end() - in_available;
    }
+    while (decoder->result == Status::NEEDS_MORE_INPUT);

-    if (decoder->result == Status::NEEDS_MORE_INPUT && (!in_available || in->eof()))
-    {
-        throw Exception(String("hadoop snappy decode error:") + statusToString(decoder->result), ErrorCodes::SNAPPY_UNCOMPRESS_FAILED);
-    }
-
-    out_capacity = internal_buffer.size();
-    out_data = internal_buffer.begin();
-    decoder->result = decoder->readBlock(&in_available, &in_data, &out_capacity, &out_data);
-
-    in->position() = in->buffer().end() - in_available;
    working_buffer.resize(internal_buffer.size() - out_capacity);

    if (decoder->result == Status::OK)
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@ -80,7 +80,6 @@ struct ReadSettings
    size_t remote_fs_read_backoff_max_tries = 4;

    bool enable_filesystem_cache = true;
-    size_t filesystem_cache_max_wait_sec = 1;
    bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
    bool enable_filesystem_cache_log = false;
    bool is_file_cache_persistent = false; /// Some files can be made non-evictable.
--- a/src/IO/examples/CMakeLists.txt
+++ b/src/IO/examples/CMakeLists.txt
@ -37,12 +37,6 @@ target_link_libraries (read_write_int PRIVATE clickhouse_common_io)
 clickhouse_add_executable (o_direct_and_dirty_pages o_direct_and_dirty_pages.cpp)
 target_link_libraries (o_direct_and_dirty_pages PRIVATE clickhouse_common_io)

-clickhouse_add_executable (hashing_write_buffer hashing_write_buffer.cpp)
-target_link_libraries (hashing_write_buffer PRIVATE clickhouse_common_io)
-
-clickhouse_add_executable (hashing_read_buffer hashing_read_buffer.cpp)
-target_link_libraries (hashing_read_buffer PRIVATE clickhouse_common_io)
-
 clickhouse_add_executable (io_operators io_operators.cpp)
 target_link_libraries (io_operators PRIVATE clickhouse_common_io)

--- a/src/IO/examples/hashing_buffer.h
+++ b/src/IO/examples/hashing_buffer.h
@ -1,21 +0,0 @@
-#pragma once
-#include <IO/HashingWriteBuffer.h>
-#include <IO/WriteBufferFromFile.h>
-
-#define FAIL(msg) do { std::cout << msg; exit(1); } while (false)
-
-
-static CityHash_v1_0_2::uint128 referenceHash(const char * data, size_t len)
-{
-    const size_t block_size = DBMS_DEFAULT_HASHING_BLOCK_SIZE;
-    CityHash_v1_0_2::uint128 state(0, 0);
-    size_t pos;
-
-    for (pos = 0; pos + block_size <= len; pos += block_size)
-        state = CityHash_v1_0_2::CityHash128WithSeed(data + pos, block_size, state);
-
-    if (pos < len)
-        state = CityHash_v1_0_2::CityHash128WithSeed(data + pos, len - pos, state);
-
-    return state;
-}
--- a/src/IO/examples/hashing_read_buffer.cpp
+++ b/src/IO/examples/hashing_read_buffer.cpp
@ -1,69 +0,0 @@
-#include <IO/ReadBufferFromIStream.h>
-#include <IO/HashingReadBuffer.h>
-#include <IO/WriteBufferFromOStream.h>
-#include "hashing_buffer.h"
-#include <iostream>
-#include <pcg_random.hpp>
-
-
-static void test(size_t data_size)
-{
-    pcg64 rng;
-
-    std::vector<char> vec(data_size);
-    char * data = vec.data();
-
-    for (size_t i = 0; i < data_size; ++i)
-        data[i] = rng() & 255;
-
-    CityHash_v1_0_2::uint128 reference = referenceHash(data, data_size);
-
-    std::vector<size_t> block_sizes = {56, 128, 513, 2048, 3055, 4097, 4096};
-    for (size_t read_buffer_block_size : block_sizes)
-    {
-        std::cout << "block size " << read_buffer_block_size << std::endl;
-        std::stringstream io;       // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        io.exceptions(std::ios::failbit);
-        DB::WriteBufferFromOStream out_impl(io);
-        DB::HashingWriteBuffer out(out_impl);
-        out.write(data, data_size);
-        out.next();
-
-        DB::ReadBufferFromIStream source(io, read_buffer_block_size);
-        DB::HashingReadBuffer buf(source);
-
-        std::vector<char> read_buf(data_size);
-        buf.read(read_buf.data(), data_size);
-
-        bool failed_to_read = false;
-        for (size_t i = 0; i < data_size; ++i)
-            if (read_buf[i] != vec[i])
-                failed_to_read = true;
-
-        if (failed_to_read)
-        {
-            std::cout.write(data, data_size);
-            std::cout << std::endl;
-            std::cout.write(read_buf.data(), data_size);
-            std::cout << std::endl;
-            FAIL("Fail to read data");
-        }
-
-        if (buf.getHash() != reference)
-            FAIL("failed on data size " << data_size << " reading by blocks of size " << read_buffer_block_size);
-        if (buf.getHash() != out.getHash())
-            FAIL("Hash of HashingReadBuffer doesn't match with hash of HashingWriteBuffer on data size " << data_size << " reading by blocks of size " << read_buffer_block_size);
-    }
-}
-
-int main()
-{
-    test(5);
-    test(100);
-    test(2048);
-    test(2049);
-    test(100000);
-    test(1 << 17);
-
-    return 0;
-}
--- a/src/IO/examples/hashing_write_buffer.cpp
+++ b/src/IO/examples/hashing_write_buffer.cpp
@ -1,86 +0,0 @@
-#include <IO/HashingWriteBuffer.h>
-#include <IO/WriteBufferFromFile.h>
-#include <pcg_random.hpp>
-
-#include "hashing_buffer.h"
-
-static void test(size_t data_size)
-{
-    pcg64 rng;
-
-    std::vector<char> vec(data_size);
-    char * data = vec.data();
-
-    for (size_t i = 0; i < data_size; ++i)
-        data[i] = rng() & 255;
-
-    CityHash_v1_0_2::uint128 reference = referenceHash(data, data_size);
-
-    DB::WriteBufferFromFile sink("/dev/null", 1 << 16);
-
-    {
-        DB::HashingWriteBuffer buf(sink);
-
-        for (size_t pos = 0; pos < data_size;)
-        {
-            size_t len = std::min(static_cast<size_t>(rng() % 10000 + 1), data_size - pos);
-            buf.write(data + pos, len);
-            buf.next();
-            pos += len;
-        }
-
-        if (buf.getHash() != reference)
-            FAIL("failed on data size " << data_size << " writing rngom chunks of up to 10000 bytes");
-    }
-
-    {
-        DB::HashingWriteBuffer buf(sink);
-
-        for (size_t pos = 0; pos < data_size;)
-        {
-            size_t len = std::min(static_cast<size_t>(rng() % 5 + 1), data_size - pos);
-            buf.write(data + pos, len);
-            buf.next();
-            pos += len;
-        }
-
-        if (buf.getHash() != reference)
-            FAIL("failed on data size " << data_size << " writing rngom chunks of up to 5 bytes");
-    }
-
-    {
-        DB::HashingWriteBuffer buf(sink);
-
-        for (size_t pos = 0; pos < data_size;)
-        {
-            size_t len = std::min(static_cast<size_t>(2048 + rng() % 3 - 1), data_size - pos);
-            buf.write(data + pos, len);
-            buf.next();
-            pos += len;
-        }
-
-        if (buf.getHash() != reference)
-            FAIL("failed on data size " << data_size << " writing rngom chunks of 2048 +-1 bytes");
-    }
-
-    {
-        DB::HashingWriteBuffer buf(sink);
-
-        buf.write(data, data_size);
-
-        if (buf.getHash() != reference)
-            FAIL("failed on data size " << data_size << " writing all at once");
-    }
-}
-
-int main()
-{
-    test(5);
-    test(100);
-    test(2048);
-    test(2049);
-    test(100000);
-    test(1 << 17);
-
-    return 0;
-}
--- a/src/IO/examples/parse_int_perf.cpp
+++ b/src/IO/examples/parse_int_perf.cpp
@ -1,5 +1,6 @@
 #include <iostream>
 #include <iomanip>
+#include <pcg_random.hpp>

 #include <base/types.h>

@ -7,7 +8,6 @@
 #include <IO/WriteHelpers.h>
 #include <IO/WriteIntText.h>
 #include <IO/WriteBufferFromVector.h>
-#include <Compression/CompressedReadBuffer.h>

 #include <Common/Stopwatch.h>

@ -27,6 +27,8 @@ static UInt64 rdtsc()

 int main(int argc, char ** argv)
 {
+    pcg64 rng;
+
    try
    {
        if (argc < 2)
@ -47,7 +49,7 @@ int main(int argc, char ** argv)
            Stopwatch watch;

            for (size_t i = 0; i < n; ++i)
-                data[i] = lrand48();// / lrand48();// ^ (lrand48() << 24) ^ (lrand48() << 48);
+                data[i] = rng();

            watch.stop();
            std::cerr << std::fixed << std::setprecision(2)
--- a/src/IO/examples/var_uint.cpp
+++ b/src/IO/examples/var_uint.cpp
@ -8,32 +8,8 @@
 #include <Poco/HexBinaryEncoder.h>


-static void parse_trash_string_as_uint_must_fail(const std::string & str)
-{
-    using namespace DB;
-
-    unsigned x = 0xFF;
-
-    try
-    {
-        x = parse<unsigned>(str);
-    }
-    catch (...)
-    {
-        /// Ok
-        return;
-    }
-
-    std::cerr << "Parsing must fail, but finished successfully x=" << x;
-    exit(-1);
-}
-
-
 int main(int argc, char ** argv)
 {
-    parse_trash_string_as_uint_must_fail("trash");
-    parse_trash_string_as_uint_must_fail("-1");
-
    if (argc != 2)
    {
        std::cerr << "Usage: " << std::endl
--- a/src/IO/tests/gtest_hadoop_snappy_decoder.cpp
+++ b/src/IO/tests/gtest_hadoop_snappy_decoder.cpp
@ -60,7 +60,8 @@ TEST(HadoopSnappyDecoder, repeatNeedMoreInput)
    String output;
    WriteBufferFromString out(output);
    copyData(read_buffer, out);
+    out.finalize();
    UInt128 hashcode = sipHash128(output.c_str(), output.size());
    String hashcode_str = getHexUIntLowercase(hashcode);
-    ASSERT_EQ(hashcode_str, "593afe14f61866915cc00b8c7bd86046");
+    ASSERT_EQ(hashcode_str, "673e5b065186cec146789451c2a8f703");
 }
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -3451,7 +3451,6 @@ ReadSettings Context::getReadSettings() const
    res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms;
    res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries;
    res.enable_filesystem_cache = settings.enable_filesystem_cache;
-    res.filesystem_cache_max_wait_sec = settings.filesystem_cache_max_wait_sec;
    res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache;
    res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log;
    res.enable_filesystem_cache_on_lower_level = settings.enable_filesystem_cache_on_lower_level;
--- a/src/Interpreters/ExpressionJIT.cpp
+++ b/src/Interpreters/ExpressionJIT.cpp
@ -113,14 +113,14 @@ public:
                const auto & null_map_column = nullable_column->getNullMapColumn();

                auto nested_column_raw_data = nested_column.getRawData();
-                __msan_unpoison(nested_column_raw_data.data, nested_column_raw_data.size);
+                __msan_unpoison(nested_column_raw_data.data(), nested_column_raw_data.size());

                auto null_map_column_raw_data = null_map_column.getRawData();
-                __msan_unpoison(null_map_column_raw_data.data, null_map_column_raw_data.size);
+                __msan_unpoison(null_map_column_raw_data.data(), null_map_column_raw_data.size());
            }
            else
            {
-                __msan_unpoison(result_column->getRawData().data, result_column->getRawData().size);
+                __msan_unpoison(result_column->getRawData().data(), result_column->getRawData().size());
            }

            #endif
--- a/src/Interpreters/JIT/compileFunction.cpp
+++ b/src/Interpreters/JIT/compileFunction.cpp
@ -47,11 +47,11 @@ ColumnData getColumnData(const IColumn * column)

    if (const auto * nullable = typeid_cast<const ColumnNullable *>(column))
    {
-        result.null_data = nullable->getNullMapColumn().getRawData().data;
+        result.null_data = nullable->getNullMapColumn().getRawData().data();
        column = & nullable->getNestedColumn();
    }

-    result.data = column->getRawData().data;
+    result.data = column->getRawData().data();

    return result;
 }
--- a/src/Interpreters/MergeJoin.cpp
+++ b/src/Interpreters/MergeJoin.cpp
@ -735,7 +735,7 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)

    /// Back thread even with no data. We have some unfinished data in buffer.
    if (!not_processed && left_blocks_buffer)
-        not_processed = std::make_shared<NotProcessed>(NotProcessed{{}, 0, 0, 0});
+        not_processed = std::make_shared<NotProcessed>(NotProcessed{{}, 0, 0, 0, 0});

    if (needConditionJoinColumn())
        block.erase(deriveTempName(mask_column_name_left, JoinTableSide::Left));
@ -759,6 +759,7 @@ void MergeJoin::joinSortedBlock(Block & block, ExtraBlockPtr & not_processed)
    {
        auto & continuation = static_cast<NotProcessed &>(*not_processed);
        left_cursor.nextN(continuation.left_position);
+        left_key_tail = continuation.left_key_tail;
        skip_right = continuation.right_position;
        starting_right_block = continuation.right_block;
        not_processed.reset();
@ -778,7 +779,10 @@ void MergeJoin::joinSortedBlock(Block & block, ExtraBlockPtr & not_processed)
                if (intersection < 0)
                    break; /// (left) ... (right)
                if (intersection > 0)
+                {
+                    skip_right = 0;
                    continue; /// (right) ... (left)
+                }
            }

            /// Use skip_right as ref. It would be updated in join.
@ -787,7 +791,7 @@ void MergeJoin::joinSortedBlock(Block & block, ExtraBlockPtr & not_processed)
            if (!leftJoin<is_all>(left_cursor, block, right_block, left_columns, right_columns, left_key_tail))
            {
                not_processed = extraBlock<is_all>(block, std::move(left_columns), std::move(right_columns),
-                                                   left_cursor.position(), skip_right, i);
+                                                   left_cursor.position(), left_key_tail, skip_right, i);
                return;
            }
        }
@ -811,7 +815,10 @@ void MergeJoin::joinSortedBlock(Block & block, ExtraBlockPtr & not_processed)
                if (intersection < 0)
                    break; /// (left) ... (right)
                if (intersection > 0)
+                {
+                    skip_right = 0;
                    continue; /// (right) ... (left)
+                }
            }

            /// Use skip_right as ref. It would be updated in join.
@ -822,7 +829,7 @@ void MergeJoin::joinSortedBlock(Block & block, ExtraBlockPtr & not_processed)
                if (!allInnerJoin(left_cursor, block, right_block, left_columns, right_columns, left_key_tail))
                {
                    not_processed = extraBlock<is_all>(block, std::move(left_columns), std::move(right_columns),
-                                                       left_cursor.position(), skip_right, i);
+                                                       left_cursor.position(), left_key_tail, skip_right, i);
                    return;
                }
            }
@ -884,7 +891,7 @@ bool MergeJoin::leftJoin(MergeJoinCursor & left_cursor, const Block & left_block
            {
                right_cursor.nextN(range.right_length);
                right_block_info.skip = right_cursor.position();
-                left_cursor.nextN(range.left_length);
+                left_key_tail = range.left_length;
                return false;
            }
        }
@ -991,15 +998,15 @@ void MergeJoin::addRightColumns(Block & block, MutableColumns && right_columns)
 /// Split block into processed (result) and not processed. Not processed block would be joined next time.
 template <bool is_all>
 ExtraBlockPtr MergeJoin::extraBlock(Block & processed, MutableColumns && left_columns, MutableColumns && right_columns,
-                                    size_t left_position [[maybe_unused]], size_t right_position [[maybe_unused]],
-                                    size_t right_block_number [[maybe_unused]])
+                                    size_t left_position [[maybe_unused]], size_t left_key_tail [[maybe_unused]],
+                                    size_t right_position [[maybe_unused]], size_t right_block_number [[maybe_unused]])
 {
    ExtraBlockPtr not_processed;

    if constexpr (is_all)
    {
        not_processed = std::make_shared<NotProcessed>(
-            NotProcessed{{processed.cloneEmpty()}, left_position, right_position, right_block_number});
+            NotProcessed{{processed.cloneEmpty()}, left_position, left_key_tail, right_position, right_block_number});
        not_processed->block.swap(processed);

        changeLeftColumns(processed, std::move(left_columns));
--- a/src/Interpreters/MergeJoin.h
+++ b/src/Interpreters/MergeJoin.h
@ -45,6 +45,7 @@ private:
    struct NotProcessed : public ExtraBlock
    {
        size_t left_position;
+        size_t left_key_tail;
        size_t right_position;
        size_t right_block;
    };
@ -123,7 +124,8 @@ private:

    template <bool is_all>
    ExtraBlockPtr extraBlock(Block & processed, MutableColumns && left_columns, MutableColumns && right_columns,
-                             size_t left_position, size_t right_position, size_t right_block_number);
+                             size_t left_position, size_t left_key_tail, size_t right_position,
+                             size_t right_block_number);

    void mergeRightBlocks();

--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@ -416,7 +416,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
        LOG_TRACE(log, "Resetting nice");

        if (0 != setpriority(PRIO_PROCESS, thread_id, 0))
-            LOG_ERROR(log, "Cannot 'setpriority' back to zero: {}", errnoToString(ErrorCodes::CANNOT_SET_THREAD_PRIORITY, errno));
+            LOG_ERROR(log, "Cannot 'setpriority' back to zero: {}", errnoToString());

        os_thread_priority = 0;
    }
--- a/src/Parsers/ASTQueryWithOutput.cpp
+++ b/src/Parsers/ASTQueryWithOutput.cpp
@ -1,4 +1,5 @@
 #include <Parsers/ASTQueryWithOutput.h>
+#include <Parsers/ASTSetQuery.h>

 namespace DB
 {
@ -40,7 +41,7 @@ void ASTQueryWithOutput::formatImpl(const FormatSettings & s, FormatState & stat
        format->formatImpl(s, state, frame);
    }

-    if (settings_ast)
+    if (settings_ast && assert_cast<ASTSetQuery *>(settings_ast.get())->print_in_format)
    {
        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "SETTINGS " << (s.hilite ? hilite_none : "");
        settings_ast->formatImpl(s, state, frame);
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@ -192,7 +192,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
        limitOffset()->formatImpl(s, state, frame);
    }

-    if (settings())
+    if (settings() && assert_cast<ASTSetQuery *>(settings().get())->print_in_format)
    {
        s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "SETTINGS " << (s.hilite ? hilite_none : "");
        settings()->formatImpl(s, state, frame);
--- a/src/Parsers/ASTSetQuery.h
+++ b/src/Parsers/ASTSetQuery.h
@ -14,6 +14,12 @@ class ASTSetQuery : public IAST
 public:
    bool is_standalone = true; /// If false, this AST is a part of another query, such as SELECT.

+    /// To support overriding certain settings in a **subquery**, we add a ASTSetQuery with Settings to all subqueries, containing
+    /// the list of all settings that affect them (specifically or globally to the whole query).
+    /// We use `print_in_format` to avoid printing these nodes when they were left unchanged from the parent copy
+    /// See more: https://github.com/ClickHouse/ClickHouse/issues/38895
+    bool print_in_format = true;
+
    SettingsChanges changes;
    NameToNameMap query_parameters;

--- a/src/Parsers/ParserQueryWithOutput.cpp
+++ b/src/Parsers/ParserQueryWithOutput.cpp
@ -142,7 +142,9 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
        // Pass them manually, to apply in InterpreterSelectQuery::initSettings()
        if (query->as<ASTSelectWithUnionQuery>())
        {
-            QueryWithOutputSettingsPushDownVisitor::Data data{query_with_output.settings_ast};
+            auto settings = query_with_output.settings_ast->clone();
+            assert_cast<ASTSetQuery *>(settings.get())->print_in_format = false;
+            QueryWithOutputSettingsPushDownVisitor::Data data{settings};
            QueryWithOutputSettingsPushDownVisitor(data).visit(query);
        }
    }
--- a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp
+++ b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp
@ -12,6 +12,8 @@
 #include <AggregateFunctions/registerAggregateFunctions.h>
 #include <Processors/Merges/Algorithms/Graphite.h>
 #include <Common/Config/ConfigProcessor.h>
+#include <base/errnoToString.h>
+

 using namespace DB;

@ -43,7 +45,7 @@ static ConfigProcessor::LoadedConfig loadConfigurationFromString(std::string & s
    int fd = mkstemp(tmp_file);
    if (fd == -1)
    {
-        throw std::runtime_error(strerror(errno));
+        throw std::runtime_error(errnoToString());
    }
    try
    {
@ -61,7 +63,7 @@ static ConfigProcessor::LoadedConfig loadConfigurationFromString(std::string & s
        {
            int err = errno;
            (void)remove(tmp_file);
-            throw std::runtime_error(strerror(err));
+            throw std::runtime_error(errnoToString(err));
        }
        ConfigProcessor::LoadedConfig config = loadConfiguration(config_path);
        (void)remove(tmp_file);
--- a/src/Server/CertificateReloader.cpp
+++ b/src/Server/CertificateReloader.cpp
@ -23,12 +23,6 @@ int callSetCertificate(SSL * ssl, [[maybe_unused]] void * arg)
 }


-namespace ErrorCodes
-{
-    extern const int CANNOT_STAT;
-}
-
-
 /// This is callback for OpenSSL. It will be called on every connection to obtain a certificate and private key.
 int CertificateReloader::setCertificate(SSL * ssl)
 {
@ -118,7 +112,7 @@ bool CertificateReloader::File::changeIfModified(std::string new_path, Poco::Log
    if (ec)
    {
        LOG_ERROR(logger, "Cannot obtain modification time for {} file {}, skipping update. {}",
-            description, new_path, errnoToString(ErrorCodes::CANNOT_STAT, ec.value()));
+            description, new_path, errnoToString(ec.value()));
        return false;
    }

--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@ -169,10 +169,20 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS
    replicas_status_handler->allowGetAndHeadRequest();
    factory.addHandler(replicas_status_handler);

-    auto web_ui_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<WebUIRequestHandler>>(server, "play.html");
-    web_ui_handler->attachNonStrictPath("/play");
-    web_ui_handler->allowGetAndHeadRequest();
-    factory.addHandler(web_ui_handler);
+    auto play_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<WebUIRequestHandler>>(server);
+    play_handler->attachNonStrictPath("/play");
+    play_handler->allowGetAndHeadRequest();
+    factory.addHandler(play_handler);
+
+    auto dashboard_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<WebUIRequestHandler>>(server);
+    dashboard_handler->attachNonStrictPath("/dashboard");
+    dashboard_handler->allowGetAndHeadRequest();
+    factory.addHandler(dashboard_handler);
+
+    auto js_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<WebUIRequestHandler>>(server);
+    js_handler->attachNonStrictPath("/js/");
+    js_handler->allowGetAndHeadRequest();
+    factory.addHandler(js_handler);
 }

 void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics)
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@ -8,12 +8,14 @@
 #include <IO/HTTPCommon.h>
 #include <Common/getResource.h>

+#include <re2/re2.h>
+

 namespace DB
 {

-WebUIRequestHandler::WebUIRequestHandler(IServer & server_, std::string resource_name_)
-    : server(server_), resource_name(std::move(resource_name_))
+WebUIRequestHandler::WebUIRequestHandler(IServer & server_)
+    : server(server_)
 {
 }

@ -28,8 +30,38 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
        response.setChunkedTransferEncoding(true);

    setResponseDefaultHeaders(response, keep_alive_timeout);
-    response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
-    *response.send() << getResource(resource_name);
+
+    if (request.getURI().starts_with("/play"))
+    {
+        response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
+        *response.send() << getResource("play.html");
+    }
+    else if (request.getURI().starts_with("/dashboard"))
+    {
+        response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
+
+        std::string html(getResource("dashboard.html"));
+
+        /// Replace a link to external JavaScript file to embedded file.
+        /// This allows to open the HTML without running a server and to host it on server.
+        /// Note: we can embed the JavaScript file inline to the HTML,
+        /// but we don't do it to keep the "view-source" perfectly readable.
+
+        static re2::RE2 uplot_url = R"(https://[^\s"'`]+u[Pp]lot[^\s"'`]*\.js)";
+        RE2::Replace(&html, uplot_url, "/js/uplot.js");
+
+        *response.send() << html;
+    }
+    else if (request.getURI() == "/js/uplot.js")
+    {
+        response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
+        *response.send() << getResource("js/uplot.js");
+    }
+    else
+    {
+        response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND);
+        *response.send() << "Not found.\n";
+    }
 }

 }
--- a/src/Server/WebUIRequestHandler.h
+++ b/src/Server/WebUIRequestHandler.h
@ -13,11 +13,10 @@ class WebUIRequestHandler : public HTTPRequestHandler
 {
 private:
    IServer & server;
-    std::string resource_name;
+
 public:
-    WebUIRequestHandler(IServer & server_, std::string resource_name_);
+    WebUIRequestHandler(IServer & server_);
    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
 };

 }
-
--- a/src/Storages/HDFS/HDFSCommon.cpp
+++ b/src/Storages/HDFS/HDFSCommon.cpp
@ -7,15 +7,17 @@
 #if USE_HDFS
 #include <Common/ShellCommand.h>
 #include <Common/Exception.h>
-#include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <Common/logger_useful.h>
+
 #if USE_KRB5
-#include <Access/KerberosInit.h>
-#endif // USE_KRB5
+    #include <Access/KerberosInit.h>
+#endif
+

 namespace DB
 {
+
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
@ -26,10 +28,15 @@ namespace ErrorCodes
    #endif // USE_KRB5
 }

-const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs";
-const String HDFS_URL_REGEXP = "^hdfs://[^/]*/.*";
+static constexpr std::string_view CONFIG_PREFIX = "hdfs";
+static constexpr std::string_view HDFS_URL_REGEXP = "^hdfs://[^/]*/.*";
+
+
+HDFSFileInfo::~HDFSFileInfo()
+{
+    hdfsFreeFileInfo(file_info, length);
+}

-std::once_flag init_libhdfs3_conf_flag;

 void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config,
    const String & prefix, bool isUser)
@ -111,23 +118,6 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A
    if (host.empty())
        throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);

-    // Shall set env LIBHDFS3_CONF *before* HDFSBuilderWrapper construction.
-    std::call_once(init_libhdfs3_conf_flag, [&config]()
-    {
-        String libhdfs3_conf = config.getString(HDFSBuilderWrapper::CONFIG_PREFIX + ".libhdfs3_conf", "");
-        if (!libhdfs3_conf.empty())
-        {
-            if (std::filesystem::path{libhdfs3_conf}.is_relative() && !std::filesystem::exists(libhdfs3_conf))
-            {
-                const String config_path = config.getString("config-file", "config.xml");
-                const auto config_dir = std::filesystem::path{config_path}.remove_filename();
-                if (std::filesystem::exists(config_dir / libhdfs3_conf))
-                    libhdfs3_conf = std::filesystem::absolute(config_dir / libhdfs3_conf);
-            }
-            setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), 1);
-        }
-    });
-
    HDFSBuilderWrapper builder;
    if (builder.get() == nullptr)
        throw Exception("Unable to create builder to connect to HDFS: " +
@ -157,14 +147,14 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A
        hdfsBuilderSetNameNodePort(builder.get(), port);
    }

-    if (config.has(HDFSBuilderWrapper::CONFIG_PREFIX))
+    if (config.has(std::string(CONFIG_PREFIX)))
    {
-        builder.loadFromConfig(config, HDFSBuilderWrapper::CONFIG_PREFIX);
+        builder.loadFromConfig(config, std::string(CONFIG_PREFIX));
    }

    if (!user.empty())
    {
-        String user_config_prefix = HDFSBuilderWrapper::CONFIG_PREFIX + "_" + user;
+        String user_config_prefix = std::string(CONFIG_PREFIX) + "_" + user;
        if (config.has(user_config_prefix))
        {
            builder.loadFromConfig(config, user_config_prefix, true);
@ -208,7 +198,7 @@ String getNameNodeCluster(const String &hdfs_url)

 void checkHDFSURL(const String & url)
 {
-    if (!re2::RE2::FullMatch(url, HDFS_URL_REGEXP))
+    if (!re2::RE2::FullMatch(url, std::string(HDFS_URL_REGEXP)))
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}. It should have structure 'hdfs://<host_name>:<port>/<path>'", url);
 }

--- a/src/Storages/HDFS/HDFSCommon.h
+++ b/src/Storages/HDFS/HDFSCommon.h
@ -40,11 +40,7 @@ struct HDFSFileInfo
    HDFSFileInfo(HDFSFileInfo && other) = default;
    HDFSFileInfo & operator=(const HDFSFileInfo & other) = delete;
    HDFSFileInfo & operator=(HDFSFileInfo && other) = default;
-
-    ~HDFSFileInfo()
-    {
-        hdfsFreeFileInfo(file_info, length);
-    }
+    ~HDFSFileInfo();
 };


--- a/src/Storages/NATS/NATSConnection.cpp
+++ b/src/Storages/NATS/NATSConnection.cpp
@ -18,7 +18,7 @@ NATSConnectionManager::NATSConnectionManager(const NATSConfiguration & configura
    , log(log_)
    , event_handler(loop.getLoop(), log)
 {
-    const char * val = std::getenv("CLICKHOUSE_NATS_TLS_SECURE");
+    const char * val = std::getenv("CLICKHOUSE_NATS_TLS_SECURE"); // NOLINT(concurrency-mt-unsafe) // this is safe on Linux glibc/Musl, but potentially not safe on other platforms
    std::string tls_secure = val == nullptr ? std::string("1") : std::string(val);
    if (tls_secure == "0")
        skip_verification = true;
--- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp
+++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp
@ -2,7 +2,7 @@
 #include <Storages/RocksDB/StorageEmbeddedRocksDB.h>
 #include <IO/WriteBufferFromString.h>

-#include <rocksdb/db.h>
+#include <rocksdb/utilities/db_ttl.h>


 namespace DB
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@ -1,3 +1,4 @@
+#include <Storages/checkAndGetLiteralArgument.h>
 #include <Storages/RocksDB/StorageEmbeddedRocksDB.h>
 #include <Storages/RocksDB/EmbeddedRocksDBSink.h>

@ -21,9 +22,9 @@
 #include <Common/Exception.h>
 #include <base/sort.h>

-#include <rocksdb/db.h>
 #include <rocksdb/table.h>
 #include <rocksdb/convenience.h>
+#include <rocksdb/utilities/db_ttl.h>

 #include <cstddef>
 #include <filesystem>
@ -164,10 +165,12 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_,
        const StorageInMemoryMetadata & metadata_,
        bool attach,
        ContextPtr context_,
-        const String & primary_key_)
+        const String & primary_key_,
+        Int32 ttl_)
    : IStorage(table_id_)
    , WithContext(context_->getGlobalContext())
    , primary_key{primary_key_}
+    , ttl(ttl_)
 {
    setInMemoryMetadata(metadata_);
    rocksdb_dir = context_->getPath() + relative_data_path_;
@ -193,7 +196,6 @@ void StorageEmbeddedRocksDB::initDB()
 {
    rocksdb::Status status;
    rocksdb::Options base;
-    rocksdb::DB * db;

    base.create_if_missing = true;
    base.compression = rocksdb::CompressionType::kZSTD;
@ -264,15 +266,28 @@ void StorageEmbeddedRocksDB::initDB()
        }
    }

-    status = rocksdb::DB::Open(merged, rocksdb_dir, &db);
-
-    if (!status.ok())
+    if (ttl > 0)
    {
-        throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to open rocksdb path at: {}: {}",
-            rocksdb_dir, status.ToString());
+        rocksdb::DBWithTTL * db;
+        status = rocksdb::DBWithTTL::Open(merged, rocksdb_dir, &db, ttl);
+        if (!status.ok())
+        {
+            throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}",
+                rocksdb_dir, status.ToString());
+        }
+        rocksdb_ptr = std::unique_ptr<rocksdb::DBWithTTL>(db);
+    }
+    else
+    {
+        rocksdb::DB * db;
+        status = rocksdb::DB::Open(merged, rocksdb_dir, &db);
+        if (!status.ok())
+        {
+            throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}",
+                rocksdb_dir, status.ToString());
+        }
+        rocksdb_ptr = std::unique_ptr<rocksdb::DB>(db);
    }
-    /// It's ok just to wrap db with unique_ptr, from rdb documentation: "when you are done with a database, just delete the database object"
-    rocksdb_ptr = std::unique_ptr<rocksdb::DB>(db);
 }

 Pipe StorageEmbeddedRocksDB::read(
@ -335,10 +350,16 @@ SinkToStoragePtr StorageEmbeddedRocksDB::write(
 static StoragePtr create(const StorageFactory::Arguments & args)
 {
    // TODO custom RocksDBSettings, table function
-    if (!args.engine_args.empty())
-        throw Exception(
-            "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)",
-            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+    auto engine_args = args.engine_args;
+    if (engine_args.size() > 1)
+    {
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} requires at most 1 parameter. ({} given). Correct usage: EmbeddedRocksDB([ttl])",
+            args.engine_name, engine_args.size());
+    }
+
+    Int32 ttl{0};
+    if (!engine_args.empty())
+        ttl = checkAndGetLiteralArgument<UInt64>(engine_args[0], "ttl");

    StorageInMemoryMetadata metadata;
    metadata.setColumns(args.columns);
@ -353,7 +374,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
    {
        throw Exception("StorageEmbeddedRocksDB must require one column in primary key", ErrorCodes::BAD_ARGUMENTS);
    }
-    return std::make_shared<StorageEmbeddedRocksDB>(args.table_id, args.relative_data_path, metadata, args.attach, args.getContext(), primary_key_names[0]);
+    return std::make_shared<StorageEmbeddedRocksDB>(args.table_id, args.relative_data_path, metadata, args.attach, args.getContext(), primary_key_names[0], ttl);
 }

 std::shared_ptr<rocksdb::Statistics> StorageEmbeddedRocksDB::getRocksDBStatistics() const
@ -449,6 +470,7 @@ void registerStorageEmbeddedRocksDB(StorageFactory & factory)
 {
    StorageFactory::StorageFeatures features{
        .supports_sort_order = true,
+        .supports_ttl = true,
        .supports_parallel_insert = true,
    };

--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
@ -32,7 +32,8 @@ public:
        const StorageInMemoryMetadata & metadata,
        bool attach,
        ContextPtr context_,
-        const String & primary_key_);
+        const String & primary_key_,
+        Int32 ttl_ = 0);

    std::string getName() const override { return "EmbeddedRocksDB"; }

@ -80,6 +81,7 @@ private:
    RocksDBPtr rocksdb_ptr;
    mutable std::shared_mutex rocksdb_ptr_mx;
    String rocksdb_dir;
+    Int32 ttl;

    void initDB();
 };
--- a/Show More
+++ b/Show More