Merge remote-tracking branch 'origin/master' into HEAD

2024-11-25 09:02:00 +00:00 · 2020-08-06 00:15:06 +03:00 · 2020-08-06 00:15:06 +03:00 · 3b1bacf390
commit 3b1bacf390
parent 5c2d7fc569 a97cfbd618
54 changed files with 2179 additions and 2782 deletions
--- a/base/common/tests/gtest_json_test.cpp
+++ b/base/common/tests/gtest_json_test.cpp
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@ -51,6 +51,7 @@
 #include <Common/getMultipleKeysFromConfig.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/Config/ConfigProcessor.h>
 #include <Common/MemorySanitizer.h>
 #include <Common/SymbolIndex.h>
 #if !defined(ARCADIA_BUILD)
@ -76,6 +77,15 @@ static void call_default_signal_handler(int sig)
    raise(sig);
 }
 const char * msan_strsignal(int sig)
 {
    // Apparently strsignal is not instrumented by MemorySanitizer, so we
    // have to unpoison it to avoid msan reports inside fmt library when we
    // print it.
    const char * signal_name = strsignal(sig);
    __msan_unpoison_string(signal_name);
    return signal_name;
 }
 static constexpr size_t max_query_id_size = 127;
@ -280,12 +290,14 @@ private:
        if (query_id.empty())
        {
            LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
-                VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, strsignal(sig), sig);
+                VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
                thread_num, msan_strsignal(sig), sig);
        }
        else
        {
            LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
-                VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, query_id, strsignal(sig), sig);
+                VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
                thread_num, query_id, msan_strsignal(sig), sig);
        }
        String error_message;
@ -833,13 +845,13 @@ void BaseDaemon::handleSignal(int signal_id)
        onInterruptSignals(signal_id);
    }
    else
-        throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0);
+        throw DB::Exception(std::string("Unsupported signal: ") + msan_strsignal(signal_id), 0);
 }
 void BaseDaemon::onInterruptSignals(int signal_id)
 {
    is_cancelled = true;
-    LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id));
+    LOG_INFO(&logger(), "Received termination signal ({})", msan_strsignal(signal_id));
    if (sigint_signals_counter >= 2)
    {
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@ -20,6 +20,12 @@ endif ()
 option (WEVERYTHING "Enables -Weverything option with some exceptions. This is intended for exploration of new compiler warnings that may be found to be useful. Only makes sense for clang." ON)
 # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
 # Only in release build because debug has too large stack frames.
 if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE))
    add_warning(frame-larger-than=16384)
 endif ()
 if (COMPILER_CLANG)
    add_warning(pedantic)
    no_warning(vla-extension)
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -53,7 +53,7 @@ mkdir -p /etc/clickhouse-server
 mkdir -p /etc/clickhouse-client
 mkdir -p /etc/clickhouse-server/config.d
 mkdir -p /etc/clickhouse-server/users.d
-mkdir -p /var/log/clickhouse-server
+ln -s /test_output /var/log/clickhouse-server
 cp $CLICKHOUSE_DIR/programs/server/config.xml /etc/clickhouse-server/
 cp $CLICKHOUSE_DIR/programs/server/users.xml /etc/clickhouse-server/
@ -66,7 +66,6 @@ ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config
 ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/custom_settings_prefixes.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
 ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
@ -84,6 +83,10 @@ ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
 ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
 # Keep original query_masking_rules.xml
 ln -s --backup=simple --suffix=_original.xml /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
 clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
 counter=0
@ -161,15 +164,15 @@ clickhouse-test -j 4 --no-long --testname --shard --zookeeper --skip ${TESTS_TO_
 kill_clickhouse () {
-    kill `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'` 2>/dev/null
+    killall clickhouse-server ||:
    for i in {1..10}
    do
-        if ! kill -0 `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'`; then
+        if ! killall -0 clickhouse-server; then
            echo "No clickhouse process"
            break
        else
-            echo "Process" `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'` "still alive"
+            echo "Clickhouse server process" $(pgrep -f clickhouse-server) "still alive"
            sleep 10
        fi
    done
@ -202,5 +205,3 @@ if [[ ! -z "$FAILED_TESTS" ]]; then
 else
    echo "No failed tests"
 fi
 mv /var/log/clickhouse-server/* /test_output
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -17,6 +17,8 @@ parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
    help='Which report to build')
 args = parser.parse_args()
 tables = []
 errors_explained = []
 report_errors = []
 error_tests = 0
 slow_average_tests = 0
@ -145,19 +147,40 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
 table_anchor = 0
 row_anchor = 0
-def nextTableAnchor():
+def currentTableAnchor():
    global table_anchor
    return f'{table_anchor}'
 def newTableAnchor():
    global table_anchor
    table_anchor += 1
-    return str(table_anchor)
+    return currentTableAnchor()
 def currentRowAnchor():
    global row_anchor
    global table_anchor
    return f'{table_anchor}.{row_anchor}'
 def nextRowAnchor():
    global row_anchor
    global table_anchor
    return f'{table_anchor}.{row_anchor + 1}'
 def setRowAnchor(anchor_row_part):
    global row_anchor
    global table_anchor
    row_anchor = anchor_row_part
    return currentRowAnchor()
 def advanceRowAnchor():
    global row_anchor
    global table_anchor
    row_anchor += 1
-    return str(table_anchor) + "." + str(row_anchor)
+    return currentRowAnchor()
 def tr(x):
-    a = nextRowAnchor()
+    a = advanceRowAnchor()
    #return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
    return '<tr id={a}>{x}</tr>'.format(a=a, x=str(x))
@ -180,8 +203,10 @@ def tableHeader(r):
    return tr(''.join([th(f) for f in r]))
 def tableStart(title):
    anchor = nextTableAnchor();
    cls = '-'.join(title.lower().split(' ')[:3]);
    global table_anchor
    table_anchor = cls
    anchor = currentTableAnchor()
    return f"""
        <h2 id="{anchor}">
            <a class="cancela" href="#{anchor}">{title}</a>
@ -211,20 +236,23 @@ def htmlRows(n):
        result += tableRow(row)
    return result
-def printSimpleTable(caption, columns, rows):
+def addSimpleTable(caption, columns, rows, pos=None):
    global tables
    text = ''
    if not rows:
        return
-    print(tableStart(caption))
+    text += tableStart(caption)
-    print(tableHeader(columns))
+    text += tableHeader(columns)
    for row in rows:
-        print(tableRow(row))
+        text += tableRow(row)
-    print(tableEnd())
+    text += tableEnd()
    tables.insert(pos if pos else len(tables), text)
-def print_tested_commits():
+def add_tested_commits():
    global report_errors
    try:
-        printSimpleTable('Tested commits', ['Old', 'New'],
+        addSimpleTable('Tested commits', ['Old', 'New'],
            [['<pre>{}</pre>'.format(x) for x in
                [open('left-commit.txt').read(),
                 open('right-commit.txt').read()]]])
@ -235,7 +263,8 @@ def print_tested_commits():
                *sys.exc_info()[:2])[-1])
        pass
-def print_report_errors():
+def add_report_errors():
    global tables
    global report_errors
    # Add the errors reported by various steps of comparison script
    try:
@ -246,67 +275,89 @@ def print_report_errors():
                *sys.exc_info()[:2])[-1])
        pass
-    if len(report_errors):
+    if not report_errors:
-        print(tableStart('Errors while building the report'))
+        return
-        print(tableHeader(['Error']))
+
-        for x in report_errors:
+    text = tableStart('Errors while building the report')
-            print(tableRow([x]))
+    text += tableHeader(['Error'])
-        print(tableEnd())
+    for x in report_errors:
        text += tableRow([x])
    text += tableEnd()
    # Insert after Tested Commits
    tables.insert(1, text)
    errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']);
 def add_errors_explained():
    global tables
    addSimpleTable('Error summary', ['Description'], errors_explained, 1)
 if args.report == 'main':
    print(header_template.format())
-    print_tested_commits()
+    add_tested_commits()
    run_error_rows = tsvRows('run-errors.tsv')
    error_tests += len(run_error_rows)
-    printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
+    addSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
    if run_error_rows:
        errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']);
    slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
    error_tests += len(slow_on_client_rows)
-    printSimpleTable('Slow on client',
+    addSimpleTable('Slow on client',
                     ['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
                     slow_on_client_rows)
    if slow_on_client_rows:
        errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']);
    unmarked_short_rows = tsvRows('report/unmarked-short-queries.tsv')
    error_tests += len(unmarked_short_rows)
-    printSimpleTable('Short queries not marked as short',
+    addSimpleTable('Short queries not marked as short',
        ['New client time, s', 'Test', '#', 'Query'],
        unmarked_short_rows)
    if unmarked_short_rows:
        errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have short duration but are not explicitly marked as "short"</a>']);
-    def print_partial():
+    def add_partial():
        rows = tsvRows('report/partial-queries-report.tsv')
        if not rows:
            return
-        global unstable_partial_queries, slow_average_tests
+
-        print(tableStart('Partial queries'))
+        global unstable_partial_queries, slow_average_tests, tables
        text = tableStart('Partial queries')
        columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query']
-        print(tableHeader(columns))
+        text += tableHeader(columns)
        attrs = ['' for c in columns]
        for row in rows:
            if float(row[1]) > 0.10:
                attrs[1] = f'style="background: {color_bad}"'
                unstable_partial_queries += 1
                errors_explained.append([f'<a href="#{nextRowAnchor()}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>'])
            else:
                attrs[1] = ''
            if float(row[0]) > allowed_single_run_time:
                attrs[0] = f'style="background: {color_bad}"'
                errors_explained.append([f'<a href="#{nextRowAnchor()}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run} seconds"</a>'])
                slow_average_tests += 1
            else:
                attrs[0] = ''
-            print(tableRow(row, attrs))
+            text += tableRow(row, attrs)
-        print(tableEnd())
+        text += tableEnd()
        tables.append(text)
-    print_partial()
+    add_partial()
-    def print_changes():
+    def add_changes():
        rows = tsvRows('report/changed-perf.tsv')
        if not rows:
            return
-        global faster_queries, slower_queries
+        global faster_queries, slower_queries, tables
-        print(tableStart('Changes in performance'))
+        text = tableStart('Changes in performance')
        columns = [
            'Old,&nbsp;s',                                          # 0
            'New,&nbsp;s',                                          # 1
@ -319,7 +370,7 @@ if args.report == 'main':
            'Query',                                           # 8
            ]
-        print(tableHeader(columns))
+        text += tableHeader(columns)
        attrs = ['' for c in columns]
        attrs[5] = None
@ -331,18 +382,19 @@ if args.report == 'main':
                else:
                    slower_queries += 1
                    attrs[2] = attrs[3] = f'style="background: {color_bad}"'
                    errors_explained.append([f'<a href="#{nextRowAnchor()}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>'])
            else:
                attrs[2] = attrs[3] = ''
-            print(tableRow(row, attrs))
+            text += tableRow(row, attrs)
-        print(tableEnd())
+        text += tableEnd()
        tables.append(text)
-    print_changes()
+    add_changes()
-    def print_unstable_queries():
+    def add_unstable_queries():
-        global unstable_queries
+        global unstable_queries, very_unstable_queries, tables
        global very_unstable_queries
        unstable_rows = tsvRows('report/unstable-queries.tsv')
        if not unstable_rows:
@ -361,8 +413,8 @@ if args.report == 'main':
            'Query' #7
        ]
-        print(tableStart('Unstable queries'))
+        text = tableStart('Unstable queries')
-        print(tableHeader(columns))
+        text += tableHeader(columns)
        attrs = ['' for c in columns]
        attrs[4] = None
@ -373,21 +425,22 @@ if args.report == 'main':
            else:
                attrs[3] = ''
-            print(tableRow(r, attrs))
+            text += tableRow(r, attrs)
-        print(tableEnd())
+        text += tableEnd()
        tables.append(text)
-    print_unstable_queries()
+    add_unstable_queries()
    skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
-    printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
+    addSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
-    printSimpleTable('Test performance changes',
+    addSimpleTable('Test performance changes',
        ['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'],
        tsvRows('report/test-perf-changes.tsv'))
-    def print_test_times():
+    def add_test_times():
-        global slow_average_tests
+        global slow_average_tests, tables
        rows = tsvRows('report/test-times.tsv')
        if not rows:
            return
@ -403,8 +456,8 @@ if args.report == 'main':
            'Shortest query<br>(sum for all runs),&nbsp;s',       #7
            ]
-        print(tableStart('Test times'))
+        text = tableStart('Test times')
-        print(tableHeader(columns))
+        text += tableHeader(columns)
        nominal_runs = 13  # FIXME pass this as an argument
        total_runs = (nominal_runs + 1) * 2  # one prewarm run, two servers
@ -414,22 +467,25 @@ if args.report == 'main':
                # FIXME should be 15s max -- investigate parallel_insert
                slow_average_tests += 1
                attrs[6] = f'style="background: {color_bad}"'
                errors_explained.append([f'<a href="./all-queries.html#all-query-times.0">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up'])
            else:
                attrs[6] = ''
            if float(r[5]) > allowed_single_run_time * total_runs:
                slow_average_tests += 1
                attrs[5] = f'style="background: {color_bad}"'
                errors_explained.append([f'<a href="./all-queries.html#all-query-times.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
            else:
                attrs[5] = ''
-            print(tableRow(r, attrs))
+            text += tableRow(r, attrs)
-        print(tableEnd())
+        text += tableEnd()
        tables.append(text)
-    print_test_times()
+    add_test_times()
-    def print_benchmark_results():
+    def add_benchmark_results():
        if not os.path.isfile('benchmark/website-left.json'):
            return
@ -479,26 +535,33 @@ if args.report == 'main':
            all_rows.append([row, attrs])
-        print(tableStart('Concurrent benchmarks'))
+        text = tableStart('Concurrent benchmarks')
-        print(tableHeader(header))
+        text += tableHeader(header)
        for row, attrs in all_rows:
-            print(tableRow(row, attrs))
+            text += tableRow(row, attrs)
-        print(tableEnd())
+        text += tableEnd()
        global tables
        tables.append(text)
    try:
-        print_benchmark_results()
+        add_benchmark_results()
    except:
        report_errors.append(
            traceback.format_exception_only(
                *sys.exc_info()[:2])[-1])
        pass
-    printSimpleTable('Metric changes',
+    addSimpleTable('Metric changes',
        ['Metric', 'Old median value', 'New median value',
            'Relative difference', 'Times difference'],
        tsvRows('metrics/changes.tsv'))
-    print_report_errors()
+    add_report_errors()
    add_errors_explained()
    for t in tables:
        print(t)
    print("""
    <p class="links">
@ -559,9 +622,9 @@ elif args.report == 'all-queries':
    print(header_template.format())
-    print_tested_commits()
+    add_tested_commits()
-    def print_all_queries():
+    def add_all_queries():
        rows = tsvRows('report/all-queries.tsv')
        if not rows:
            return
@ -579,8 +642,8 @@ elif args.report == 'all-queries':
            'Query',                                  #9
            ]
-        print(tableStart('All query times'))
+        text = tableStart('All query times')
-        print(tableHeader(columns))
+        text += tableHeader(columns)
        attrs = ['' for c in columns]
        attrs[0] = None
@ -606,13 +669,15 @@ elif args.report == 'all-queries':
                attrs[2] = ''
                attrs[3] = ''
-            print(tableRow(r, attrs))
+            text += tableRow(r, attrs)
-        print(tableEnd())
+        text += tableEnd()
        tables.append(text)
-    print_all_queries()
+    add_all_queries()
-
+    add_report_errors()
-    print_report_errors()
+    for t in tables:
        print(t)
    print("""
    <p class="links">
--- a/docs/en/sql-reference/aggregate-functions/reference/index.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/index.md
@ -28,7 +28,7 @@ ClickHouse-specific aggregate functions:
 -   [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
 -   [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
 -   [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)
-   [topK](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
+-   [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
 -   [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
 -   [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md)
 -   [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -157,7 +157,7 @@ private:
    std::string query_id;
    bool continue_on_errors;
    bool print_stacktrace;
-    Settings settings;
+    const Settings & settings;
    SharedContextHolder shared_context;
    Context global_context;
    QueryProcessingStage::Enum query_processing_stage;
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@ -1323,7 +1323,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
    /// Try start processing, create node about it
    {
        String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id);
-        CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path);
+        CleanStateClock new_clean_state_clock(zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path);
        if (clean_state_clock != new_clean_state_clock)
        {
            LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number));
@ -1360,7 +1360,8 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        LOG_DEBUG(log, "Create destination tables. Query: {}", query);
        UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, PoolMode::GET_MANY);
-        LOG_DEBUG(log, "Destination tables {} have been created on {} shards of {}", getQuotedTable(task_table.table_push), shards, task_table.cluster_push->getShardCount());
+        LOG_DEBUG(log, "Destination tables {} have been created on {} shards of {}",
            getQuotedTable(task_table.table_push), shards, task_table.cluster_push->getShardCount());
    }
    /// Do the copying
@ -1391,18 +1392,18 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
        try
        {
            std::unique_ptr<Context> context_select = std::make_unique<Context>(context);
            context_select->setSettings(task_cluster->settings_pull);
            std::unique_ptr<Context> context_insert = std::make_unique<Context>(context);
            context_insert->setSettings(task_cluster->settings_push);
            /// Custom INSERT SELECT implementation
            Context context_select = context;
            context_select.setSettings(task_cluster->settings_pull);
            Context context_insert = context;
            context_insert.setSettings(task_cluster->settings_push);
            BlockInputStreamPtr input;
            BlockOutputStreamPtr output;
            {
-                BlockIO io_select = InterpreterFactory::get(query_select_ast, context_select)->execute();
+                BlockIO io_select = InterpreterFactory::get(query_select_ast, *context_select)->execute();
-                BlockIO io_insert = InterpreterFactory::get(query_insert_ast, context_insert)->execute();
+                BlockIO io_insert = InterpreterFactory::get(query_insert_ast, *context_insert)->execute();
                input = io_select.getInputStream();
                output = io_insert.out;
--- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
@ -154,12 +154,13 @@ public:
        if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
            throw Exception("Too large array size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
-        auto & value = this->data(place).value;
+        if (size > 0)
-
+        {
-        value.resize(size, arena);
+            auto & value = this->data(place).value;
-        buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
+            value.resize(size, arena);
-
+            buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
-        this->data(place).sum = value.back();
+            this->data(place).sum = value.back();
        }
    }
    void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@ -149,7 +149,7 @@ static void getNotEnoughMemoryMessage(std::string & msg)
 #if defined(__linux__)
    try
    {
-        static constexpr size_t buf_size = 4096;
+        static constexpr size_t buf_size = 1024;
        char buf[buf_size];
        UInt64 max_map_count = 0;
--- a/src/Common/MemorySanitizer.h
+++ b/src/Common/MemorySanitizer.h
@ -8,11 +8,13 @@
 #define __msan_unpoison(X, Y)
 #define __msan_test_shadow(X, Y) (false)
 #define __msan_print_shadow(X, Y)
 #define __msan_unpoison_string(X)
 #if defined(__has_feature)
 #   if __has_feature(memory_sanitizer)
 #       undef __msan_unpoison
 #       undef __msan_test_shadow
 #       undef __msan_print_shadow
 #       undef __msan_unpoison_string
 #       include <sanitizer/msan_interface.h>
 #   endif
 #endif
--- a/src/Common/RadixSort.h
+++ b/src/Common/RadixSort.h
@ -252,7 +252,7 @@ private:
        /// There are loops of NUM_PASSES. It is very important that they are unfolded at compile-time.
        /// For each of the NUM_PASSES bit ranges of the key, consider how many times each value of this bit range met.
-        CountType histograms[HISTOGRAM_SIZE * NUM_PASSES] = {0};
+        std::unique_ptr<CountType[]> histograms{new CountType[HISTOGRAM_SIZE * NUM_PASSES]{}};
        typename Traits::Allocator allocator;
@ -358,7 +358,7 @@ private:
        /// The beginning of every i-1-th bucket. 0th element will be equal to 1st.
        /// Last element will point to array end.
-        Element * prev_buckets[HISTOGRAM_SIZE + 1];
+        std::unique_ptr<Element *[]> prev_buckets{new Element*[HISTOGRAM_SIZE + 1]};
        /// The beginning of every i-th bucket (the same array shifted by one).
        Element ** buckets = &prev_buckets[1];
@ -375,7 +375,7 @@ private:
        ///  also it corresponds with the results from https://github.com/powturbo/TurboHist
        static constexpr size_t UNROLL_COUNT = 8;
-        CountType count[HISTOGRAM_SIZE * UNROLL_COUNT]{};
+        std::unique_ptr<CountType[]> count{new CountType[HISTOGRAM_SIZE * UNROLL_COUNT]{}};
        size_t unrolled_size = size / UNROLL_COUNT * UNROLL_COUNT;
        for (Element * elem = arr; elem < arr + unrolled_size; elem += UNROLL_COUNT)
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@ -318,7 +318,7 @@ protected:
    /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
      *  storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
-    VolnitskyTraits::Offset hash[VolnitskyTraits::hash_size]; /// Hash table.
+    std::unique_ptr<VolnitskyTraits::Offset[]> hash; /// Hash table.
    const bool fallback; /// Do we need to use the fallback algorithm.
@ -340,7 +340,7 @@ public:
        if (fallback)
            return;
-        memset(hash, 0, sizeof(hash));
+        hash = std::unique_ptr<VolnitskyTraits::Offset[]>(new VolnitskyTraits::Offset[VolnitskyTraits::hash_size]{});
        auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); };
        /// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
@ -419,7 +419,7 @@ private:
        VolnitskyTraits::Offset off;
    };
-    OffsetId hash[VolnitskyTraits::hash_size];
+    std::unique_ptr<OffsetId[]> hash;
    /// step for each bunch of strings
    size_t step;
@ -434,6 +434,7 @@ public:
    MultiVolnitskyBase(const std::vector<StringRef> & needles_) : needles{needles_}, step{0}, last{0}
    {
        fallback_searchers.reserve(needles.size());
        hash = std::unique_ptr<OffsetId[]>(new OffsetId[VolnitskyTraits::hash_size]);   /// No zero initialization, it will be done later.
    }
    /**
@ -454,7 +455,7 @@ public:
        if (last == needles.size())
            return false;
-        memset(hash, 0, sizeof(hash));
+        memset(hash.get(), 0, VolnitskyTraits::hash_size * sizeof(OffsetId));
        fallback_needles.clear();
        step = std::numeric_limits<size_t>::max();
--- a/src/Common/filesystemHelpers.cpp
+++ b/src/Common/filesystemHelpers.cpp
@ -79,8 +79,8 @@ String getFilesystemName([[maybe_unused]] const String & mount_point)
        throw DB::Exception("Cannot open /etc/mtab to get name of filesystem", ErrorCodes::SYSTEM_ERROR);
    mntent fs_info;
    constexpr size_t buf_size = 4096;     /// The same as buffer used for getmntent in glibc. It can happen that it's not enough
-    char buf[buf_size];
+    std::vector<char> buf(buf_size);
-    while (getmntent_r(mounted_filesystems, &fs_info, buf, buf_size) && fs_info.mnt_dir != mount_point)
+    while (getmntent_r(mounted_filesystems, &fs_info, buf.data(), buf_size) && fs_info.mnt_dir != mount_point)
        ;
    endmntent(mounted_filesystems);
    if (fs_info.mnt_dir != mount_point)
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@ -235,8 +235,7 @@ void DatabaseOrdinary::alterTable(const Context & context, const StorageID & tab
    String statement;
    {
-        char in_buf[METADATA_FILE_BUFFER_SIZE];
+        ReadBufferFromFile in(table_metadata_path, METADATA_FILE_BUFFER_SIZE);
        ReadBufferFromFile in(table_metadata_path, METADATA_FILE_BUFFER_SIZE, -1, in_buf);
        readStringUntilEOF(statement, in);
    }
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@ -48,11 +48,11 @@ struct NgramDistanceImpl
    /// Max codepoints to store at once. 16 is for batching usage and PODArray has this padding.
    static constexpr size_t simultaneously_codepoints_num = default_padding + N - 1;
-    /** This fits mostly in L2 cache all the time.
+    /** map_size of this fits mostly in L2 cache all the time.
      * Actually use UInt16 as addings and subtractions do not UB overflow. But think of it as a signed
      * integer array.
      */
-    using NgramStats = UInt16[map_size];
+    using NgramCount = UInt16;
    static ALWAYS_INLINE UInt16 calculateASCIIHash(const CodePoint * code_points)
    {
@ -169,8 +169,8 @@ struct NgramDistanceImpl
    static ALWAYS_INLINE inline size_t calculateNeedleStats(
        const char * data,
        const size_t size,
-        NgramStats & ngram_stats,
+        NgramCount * ngram_stats,
-        [[maybe_unused]] UInt16 * ngram_storage,
+        [[maybe_unused]] NgramCount * ngram_storage,
        size_t (*read_code_points)(CodePoint *, const char *&, const char *),
        UInt16 (*hash_functor)(const CodePoint *))
    {
@ -202,7 +202,7 @@ struct NgramDistanceImpl
    static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
        const char * data,
        const size_t size,
-        NgramStats & ngram_stats,
+        NgramCount * ngram_stats,
        size_t & distance,
        [[maybe_unused]] UInt16 * ngram_storage,
        size_t (*read_code_points)(CodePoint *, const char *&, const char *),
@ -256,7 +256,7 @@ struct NgramDistanceImpl
    static void constantConstant(std::string data, std::string needle, Float32 & res)
    {
-        NgramStats common_stats = {};
+        std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
        /// We use unsafe versions of getting ngrams, so I decided to use padded strings.
        const size_t needle_size = needle.size();
@ -264,11 +264,11 @@ struct NgramDistanceImpl
        needle.resize(needle_size + default_padding);
        data.resize(data_size + default_padding);
-        size_t second_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats, nullptr);
+        size_t second_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats.get(), nullptr);
        size_t distance = second_size;
        if (data_size <= max_string_size)
        {
-            size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats, distance, nullptr);
+            size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats.get(), distance, nullptr);
            /// For !symmetric version we should not use first_size.
            if constexpr (symmetric)
                res = distance * 1.f / std::max(first_size + second_size, size_t(1));
@ -295,7 +295,7 @@ struct NgramDistanceImpl
        size_t prev_haystack_offset = 0;
        size_t prev_needle_offset = 0;
-        NgramStats common_stats = {};
+        std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
        /// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb).
        /// And we can reuse these storages in one thread because we care only about what was written to first places.
@ -316,7 +316,7 @@ struct NgramDistanceImpl
                    calculateNeedleStats<true>,
                    needle,
                    needle_size,
-                    common_stats,
+                    common_stats.get(),
                    needle_ngram_storage.get());
                size_t distance = needle_stats_size;
@ -326,7 +326,7 @@ struct NgramDistanceImpl
                    calculateHaystackStatsAndMetric<true>,
                    haystack,
                    haystack_size,
-                    common_stats,
+                    common_stats.get(),
                    distance,
                    haystack_ngram_storage.get());
@ -378,7 +378,7 @@ struct NgramDistanceImpl
            const size_t needle_offsets_size = needle_offsets.size();
            size_t prev_offset = 0;
-            NgramStats common_stats = {};
+            std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
            std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]);
            std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]);
@ -394,7 +394,7 @@ struct NgramDistanceImpl
                        calculateNeedleStats<true>,
                        needle,
                        needle_size,
-                        common_stats,
+                        common_stats.get(),
                        needle_ngram_storage.get());
                    size_t distance = needle_stats_size;
@ -403,7 +403,7 @@ struct NgramDistanceImpl
                        calculateHaystackStatsAndMetric<true>,
                        haystack.data(),
                        haystack_size,
-                        common_stats,
+                        common_stats.get(),
                        distance,
                        haystack_ngram_storage.get());
@ -430,17 +430,16 @@ struct NgramDistanceImpl
        PaddedPODArray<Float32> & res)
    {
        /// zeroing our map
-        NgramStats common_stats = {};
+        std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
-        /// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb).
+        /// We can reuse these storages in one thread because we care only about what was written to first places.
-        /// And we can reuse these storages in one thread because we care only about what was written to first places.
+        std::unique_ptr<UInt16[]> ngram_storage(new NgramCount[max_string_size]);
        std::unique_ptr<UInt16[]> ngram_storage(new UInt16[max_string_size]);
        /// We use unsafe versions of getting ngrams, so I decided to use padded_data even in needle case.
        const size_t needle_size = needle.size();
        needle.resize(needle_size + default_padding);
-        const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats, nullptr);
+        const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats.get(), nullptr);
        size_t distance = needle_stats_size;
        size_t prev_offset = 0;
@ -453,7 +452,7 @@ struct NgramDistanceImpl
                size_t haystack_stats_size = dispatchSearcher(
                    calculateHaystackStatsAndMetric<true>,
                    reinterpret_cast<const char *>(haystack),
-                    haystack_size, common_stats,
+                    haystack_size, common_stats.get(),
                    distance,
                    ngram_storage.get());
                /// For !symmetric version we should not use haystack_stats_size.
--- a/src/Functions/array/arrayEnumerateExtended.h
+++ b/src/Functions/array/arrayEnumerateExtended.h
@ -58,8 +58,8 @@ public:
    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override;
 private:
-    /// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
+    /// Initially allocate a piece of memory for 64 elements. NOTE: This is just a guess.
-    static constexpr size_t INITIAL_SIZE_DEGREE = 9;
+    static constexpr size_t INITIAL_SIZE_DEGREE = 6;
    template <typename T>
    struct MethodOneNumber
--- a/src/Functions/array/arrayEnumerateRanked.h
+++ b/src/Functions/array/arrayEnumerateRanked.h
@ -118,8 +118,8 @@ public:
    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override;
 private:
-    /// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
+    /// Initially allocate a piece of memory for 64 elements. NOTE: This is just a guess.
-    static constexpr size_t INITIAL_SIZE_DEGREE = 9;
+    static constexpr size_t INITIAL_SIZE_DEGREE = 6;
    void executeMethodImpl(
        const std::vector<const ColumnArray::Offsets *> & offsets_by_depth,
--- a/src/Functions/array/arrayIntersect.cpp
+++ b/src/Functions/array/arrayIntersect.cpp
@ -55,8 +55,8 @@ public:
 private:
    const Context & context;
-    /// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
+    /// Initially allocate a piece of memory for 64 elements. NOTE: This is just a guess.
-    static constexpr size_t INITIAL_SIZE_DEGREE = 9;
+    static constexpr size_t INITIAL_SIZE_DEGREE = 6;
    struct UnpackedArrays
    {
--- a/src/IO/AIOContextPool.cpp
+++ b/src/IO/AIOContextPool.cpp
@ -42,12 +42,12 @@ void AIOContextPool::doMonitor()
 void AIOContextPool::waitForCompletion()
 {
    /// array to hold completion events
-    io_event events[max_concurrent_events];
+    std::vector<io_event> events(max_concurrent_events);
    try
    {
-        const auto num_events = getCompletionEvents(events, max_concurrent_events);
+        const auto num_events = getCompletionEvents(events.data(), max_concurrent_events);
-        fulfillPromises(events, num_events);
+        fulfillPromises(events.data(), num_events);
        notifyProducers(num_events);
    }
    catch (...)
--- a/src/IO/HashingReadBuffer.h
+++ b/src/IO/HashingReadBuffer.h
@ -33,7 +33,8 @@ private:
        working_buffer = in.buffer();
        pos = in.position();
-        calculateHash(working_buffer.begin(), working_buffer.size());
+        // `pos` may be different from working_buffer.begin() when using AIO.
        calculateHash(pos, working_buffer.end() - pos);
        return res;
    }
--- a/src/IO/PeekableReadBuffer.cpp
+++ b/src/IO/PeekableReadBuffer.cpp
@ -22,6 +22,8 @@ PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_
 void PeekableReadBuffer::reset()
 {
    checkStateCorrect();
    peeked_size = 0;
    checkpoint = nullptr;
    checkpoint_in_own_memory = false;
@ -31,6 +33,8 @@ void PeekableReadBuffer::reset()
    Buffer & sub_working = sub_buf.buffer();
    BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset());
    checkStateCorrect();
 }
 bool PeekableReadBuffer::peekNext()
@ -150,7 +154,7 @@ bool PeekableReadBuffer::nextImpl()
    /// Switch to reading from sub_buf (or just update it if already switched)
    Buffer & sub_working = sub_buf.buffer();
    BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset());
-    working_buffer_offset = sub_buf.offset();
+    nextimpl_working_buffer_offset = sub_buf.offset();
    checkStateCorrect();
    return res;
@ -159,7 +163,6 @@ bool PeekableReadBuffer::nextImpl()
 void PeekableReadBuffer::checkStateCorrect() const
 {
 #ifndef NDEBUG
    if (checkpoint)
    {
        if (checkpointInOwnMemory())
@ -190,7 +193,6 @@ void PeekableReadBuffer::checkStateCorrect() const
        throw DB::Exception("Pos in empty own buffer", ErrorCodes::LOGICAL_ERROR);
    if (unread_limit < memory.size())
        throw DB::Exception("Size limit exceed", ErrorCodes::LOGICAL_ERROR);
 #endif
 }
 void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append)
@ -245,11 +247,10 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append)
 void PeekableReadBuffer::makeContinuousMemoryFromCheckpointToPos()
 {
 #ifndef NDEBUG
    if (!checkpoint)
        throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
    checkStateCorrect();
-#endif
+
    if (!checkpointInOwnMemory() || currentlyReadFromOwnMemory())
        return;     /// is't already continuous
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@ -1,5 +1,6 @@
 #pragma once
 #include <cassert>
 #include <cstring>
 #include <algorithm>
 #include <memory>
@ -41,6 +42,11 @@ public:
      */
    ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {}
    // Copying the read buffers can be dangerous because they can hold a lot of
    // memory or open files, so better to disable the copy constructor to prevent
    // accidental copying.
    ReadBuffer(const ReadBuffer &) = delete;
    // FIXME: behavior differs greately from `BufferBase::set()` and it's very confusing.
    void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); }
@ -54,8 +60,8 @@ public:
        if (!res)
            working_buffer.resize(0);
-        pos = working_buffer.begin() + working_buffer_offset;
+        pos = working_buffer.begin() + nextimpl_working_buffer_offset;
-        working_buffer_offset = 0;
+        nextimpl_working_buffer_offset = 0;
        return res;
    }
@ -169,8 +175,10 @@ public:
    }
 protected:
-    /// The number of bytes to ignore from the initial position of `working_buffer` buffer.
+    /// The number of bytes to ignore from the initial position of `working_buffer`
-    size_t working_buffer_offset = 0;
+    /// buffer. Apparently this is an additional out-parameter for nextImpl(),
    /// not a real field.
    size_t nextimpl_working_buffer_offset = 0;
 private:
    /** Read the next data and fill a buffer with it.
--- a/src/IO/ReadBufferAIO.cpp
+++ b/src/IO/ReadBufferAIO.cpp
@ -298,7 +298,7 @@ void ReadBufferAIO::finalize()
    first_unread_pos_in_file += bytes_read;
    total_bytes_read += bytes_read;
-    working_buffer_offset = region_left_padding;
+    nextimpl_working_buffer_offset = region_left_padding;
    if (total_bytes_read == max_bytes_read)
        is_eof = true;
--- a/src/IO/ReadBufferFromFile.h
+++ b/src/IO/ReadBufferFromFile.h
@ -32,8 +32,6 @@ public:
    ReadBufferFromFile(int fd, const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        char * existing_memory = nullptr, size_t alignment = 0);
    ReadBufferFromFile(ReadBufferFromFile &&) = default;
    ~ReadBufferFromFile() override;
    /// Close file before destruction of object.
--- a/src/IO/ReadBufferFromFileBase.h
+++ b/src/IO/ReadBufferFromFileBase.h
@ -17,7 +17,6 @@ class ReadBufferFromFileBase : public BufferWithOwnMemory<SeekableReadBuffer>
 public:
    ReadBufferFromFileBase();
    ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment);
    ReadBufferFromFileBase(ReadBufferFromFileBase &&) = default;
    ~ReadBufferFromFileBase() override;
    virtual std::string getFileName() const = 0;
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@ -85,7 +85,7 @@ bool ReadBufferFromFileDescriptor::nextImpl()
        }
    }
-    pos_in_file += bytes_read;
+    file_offset_of_buffer_end += bytes_read;
    if (bytes_read)
    {
@ -102,22 +102,35 @@ bool ReadBufferFromFileDescriptor::nextImpl()
 /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen.
 off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
 {
-    off_t new_pos;
+    size_t new_pos;
    if (whence == SEEK_SET)
    {
        assert(offset >= 0);
        new_pos = offset;
    }
    else if (whence == SEEK_CUR)
-        new_pos = pos_in_file - (working_buffer.end() - pos) + offset;
+    {
        new_pos = file_offset_of_buffer_end - (working_buffer.end() - pos) + offset;
    }
    else
    {
        throw Exception("ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
    }
    /// Position is unchanged.
-    if (new_pos + (working_buffer.end() - pos) == pos_in_file)
+    if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end)
        return new_pos;
-    if (hasPendingData() && new_pos <= pos_in_file && new_pos >= pos_in_file - static_cast<off_t>(working_buffer.size()))
+    // file_offset_of_buffer_end corresponds to working_buffer.end(); it's a past-the-end pos,
    // so the second inequality is strict.
    if (file_offset_of_buffer_end - working_buffer.size() <= static_cast<size_t>(new_pos)
        && new_pos < file_offset_of_buffer_end)
    {
        /// Position is still inside buffer.
-        pos = working_buffer.begin() + (new_pos - (pos_in_file - working_buffer.size()));
+        pos = working_buffer.end() - file_offset_of_buffer_end + new_pos;
        assert(pos >= working_buffer.begin());
        assert(pos < working_buffer.end());
        return new_pos;
    }
    else
@ -130,7 +143,7 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
        if (-1 == res)
            throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(),
                                   ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
-        pos_in_file = new_pos;
+        file_offset_of_buffer_end = new_pos;
        watch.stop();
        ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
--- a/src/IO/ReadBufferFromFileDescriptor.h
+++ b/src/IO/ReadBufferFromFileDescriptor.h
@ -14,7 +14,7 @@ class ReadBufferFromFileDescriptor : public ReadBufferFromFileBase
 {
 protected:
    int fd;
-    off_t pos_in_file; /// What offset in file corresponds to working_buffer.end().
+    size_t file_offset_of_buffer_end; /// What offset in file corresponds to working_buffer.end().
    bool nextImpl() override;
@ -23,9 +23,7 @@ protected:
 public:
    ReadBufferFromFileDescriptor(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
-        : ReadBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_), pos_in_file(0) {}
+        : ReadBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_), file_offset_of_buffer_end(0) {}
    ReadBufferFromFileDescriptor(ReadBufferFromFileDescriptor &&) = default;
    int getFD() const
    {
@ -34,7 +32,7 @@ public:
    off_t getPosition() override
    {
-        return pos_in_file - (working_buffer.end() - pos);
+        return file_offset_of_buffer_end - (working_buffer.end() - pos);
    }
    /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen.
--- a/src/IO/ReadBufferFromHDFS.h
+++ b/src/IO/ReadBufferFromHDFS.h
@ -19,7 +19,6 @@ class ReadBufferFromHDFS : public BufferWithOwnMemory<ReadBuffer>
    std::unique_ptr<ReadBufferFromHDFSImpl> impl;
 public:
    ReadBufferFromHDFS(const std::string & hdfs_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
    ReadBufferFromHDFS(ReadBufferFromHDFS &&) = default;
    ~ReadBufferFromHDFS() override;
    bool nextImpl() override;
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@ -1100,9 +1100,14 @@ bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current)
        return true;
    saveUpToPosition(in, memory, current);
    bool loaded_more = !in.eof();
-    assert(in.position() == in.buffer().begin());
+    // A sanity check. Buffer position may be in the beginning of the buffer
    // (normal case), or have some offset from it (AIO).
    assert(in.position() >= in.buffer().begin());
    assert(in.position() <= in.buffer().end());
    current = in.position();
    return loaded_more;
 }
--- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp
@ -11,6 +11,8 @@ namespace ErrorCodes
 {
    extern const int INVALID_JOIN_ON_EXPRESSION;
    extern const int AMBIGUOUS_COLUMN_NAME;
    extern const int SYNTAX_ERROR;
    extern const int NOT_IMPLEMENTED;
    extern const int LOGICAL_ERROR;
 }
@ -54,47 +56,58 @@ void CollectJoinOnKeysMatcher::Data::asofToJoinKeys()
    addJoinKeys(asof_left_key, asof_right_key, {1, 2});
 }
 void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data)
 {
    if (func.name == "and")
        return; /// go into children
-    if (func.name == "equals")
+    if (func.name == "or")
        throw Exception("JOIN ON does not support OR. Unexpected '" + queryToString(ast) + "'", ErrorCodes::NOT_IMPLEMENTED);
    ASOF::Inequality inequality = ASOF::getInequality(func.name);
    if (func.name == "equals" || inequality != ASOF::Inequality::None)
    {
        if (func.arguments->children.size() != 2)
-        {
+            throw Exception("Function " + func.name + " takes two arguments, got '" + func.formatForErrorMessage() + "' instead",
-            throwSyntaxException("Function 'equals' takes two arguments, got '"
+                            ErrorCodes::SYNTAX_ERROR);
-                + func.formatForErrorMessage() + "' instead.");
+    }
-        }
+    else
        throw Exception("Expected equality or inequality, got '" + queryToString(ast) + "'", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
    if (func.name == "equals")
    {
        ASTPtr left = func.arguments->children.at(0);
        ASTPtr right = func.arguments->children.at(1);
        auto table_numbers = getTableNumbers(ast, left, right, data);
        data.addJoinKeys(left, right, table_numbers);
        return;
    }
-
+    else if (inequality != ASOF::Inequality::None)
    ASOF::Inequality inequality = ASOF::getInequality(func.name);
    if (data.is_asof && (inequality != ASOF::Inequality::None))
    {
        if (!data.is_asof)
            throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
                            ErrorCodes::NOT_IMPLEMENTED);
        if (data.asof_left_key || data.asof_right_key)
-            throwSyntaxException("ASOF JOIN expects exactly one inequality in ON section, unexpected " + queryToString(ast) + ".");
+            throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'",
                            ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
        ASTPtr left = func.arguments->children.at(0);
        ASTPtr right = func.arguments->children.at(1);
        auto table_numbers = getTableNumbers(ast, left, right, data);
        data.addAsofJoinKeys(left, right, table_numbers, inequality);
        return;
    }
    throwSyntaxException("Expected equals expression, got " + queryToString(ast) + ".");
 }
 void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out)
 {
-    if (const auto * ident = ast->as<ASTIdentifier>())
+    if (const auto * func = ast->as<ASTFunction>())
    {
        if (func->name == "arrayJoin")
            throw Exception("Not allowed function in JOIN ON. Unexpected '" + queryToString(ast) + "'",
                            ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
    }
    else if (const auto * ident = ast->as<ASTIdentifier>())
    {
        if (IdentifierSemantic::getColumnName(*ident))
            out.push_back(ident);
@ -122,8 +135,8 @@ std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr
        auto left_name = queryToString(*left_identifiers[0]);
        auto right_name = queryToString(*right_identifiers[0]);
-        throwSyntaxException("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name
+        throw Exception("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name
-                                + " are from the same table but from different arguments of equal function.");
+            + " are from the same table but from different arguments of equal function", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
    }
    return std::make_pair(left_idents_table, right_idents_table);
@ -214,12 +227,4 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIde
    return table_number;
 }
 [[noreturn]] void CollectJoinOnKeysMatcher::throwSyntaxException(const String & msg)
 {
    throw Exception("Invalid expression for JOIN ON. " + msg +
        " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) "
        "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]",
        ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
 }
 }
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@ -49,8 +49,7 @@ public:
    static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
    {
        if (auto * func = node->as<ASTFunction>())
-            if (func->name == "equals")
+            return func->name == "and";
                return false;
        return true;
    }
@ -61,8 +60,6 @@ private:
    static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
    static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases);
    static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data);
    [[noreturn]] static void throwSyntaxException(const String & msg);
 };
 /// Parse JOIN ON expression and collect ASTs for joined columns.
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -697,7 +697,7 @@ void executeQuery(
    const char * end;
    /// If 'istr' is empty now, fetch next data into buffer.
-    if (istr.buffer().size() == 0)
+    if (!istr.hasPendingData())
        istr.next();
    size_t max_query_size = context.getSettingsRef().max_query_size;
--- a/src/Parsers/parseQuery.cpp
+++ b/src/Parsers/parseQuery.cpp
@ -135,7 +135,13 @@ void writeCommonErrorMessage(
    out << ": failed at position " << (last_token.begin - begin + 1);
    if (last_token.type == TokenType::EndOfStream || last_token.type == TokenType::Semicolon)
    {
        out << " (end of query)";
    }
    else
    {
        out << " ('" << std::string(last_token.begin, last_token.end - last_token.begin) << "')";
    }
    /// If query is multiline.
    const char * nl = find_first_symbols<'\n'>(begin, end);
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@ -727,6 +727,11 @@ bool AvroConfluentRowInputFormat::readRow(MutableColumns & columns, RowReadExten
    {
        return false;
    }
    // skip tombstone records (kafka messages with null value)
    if (in.available() == 0)
    {
        return false;
    }
    SchemaId schema_id = readConfluentSchemaId(in);
    const auto & deserializer = getOrCreateDeserializer(schema_id);
    deserializer.deserializeRow(columns, *decoder, ext);
@ -734,6 +739,12 @@ bool AvroConfluentRowInputFormat::readRow(MutableColumns & columns, RowReadExten
    return true;
 }
 void AvroConfluentRowInputFormat::syncAfterError()
 {
    // skip until the end of current kafka message
    in.tryIgnore(in.available());
 }
 const AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(SchemaId schema_id)
 {
    auto it = deserializer_cache.find(schema_id);
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.h
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h
@ -129,6 +129,9 @@ public:
    String getName() const override { return "AvroConfluentRowInputFormat"; }
    class SchemaRegistry;
 protected:
    bool allowSyncAfterError() const override { return true; }
    void syncAfterError() override;
 private:
    std::shared_ptr<SchemaRegistry> schema_registry;
    using SchemaId = uint32_t;
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@ -1030,7 +1030,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
            commands_for_part.emplace_back(command);
    }
-    if (source_part->isStoredOnDisk() && !isStorageTouchedByMutations(storage_from_source_part, metadata_snapshot, commands_for_part, context_for_reading))
+    if (source_part->isStoredOnDisk() && !isStorageTouchedByMutations(
        storage_from_source_part, metadata_snapshot, commands_for_part, context_for_reading))
    {
        LOG_TRACE(log, "Part {} doesn't change up to mutation version {}", source_part->name, future_part.part_info.mutation);
        return data.cloneAndLoadDataPartOnSameDisk(source_part, "tmp_clone_", future_part.part_info, metadata_snapshot);
@ -1042,7 +1043,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
    BlockInputStreamPtr in = nullptr;
    Block updated_header;
-    std::optional<MutationsInterpreter> interpreter;
+    std::unique_ptr<MutationsInterpreter> interpreter;
    const auto data_settings = data.getSettings();
    MutationCommands for_interpreter;
@ -1057,7 +1058,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
    if (!for_interpreter.empty())
    {
-        interpreter.emplace(storage_from_source_part, metadata_snapshot, for_interpreter, context_for_reading, true);
+        interpreter = std::make_unique<MutationsInterpreter>(
            storage_from_source_part, metadata_snapshot, for_interpreter, context_for_reading, true);
        in = interpreter->execute();
        updated_header = interpreter->getUpdatedHeader();
        in->setProgressCallback(MergeProgressCallback(merge_entry, watch_prev_elapsed, stage_progress));
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@ -11,6 +11,7 @@ namespace ErrorCodes
 {
    extern const int CANNOT_READ_ALL_DATA;
    extern const int ARGUMENT_OUT_OF_BOUND;
    extern const int MEMORY_LIMIT_EXCEEDED;
 }
@ -43,66 +44,74 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
          settings.save_marks_in_cache,
          data_part->getColumns().size())
 {
-    size_t columns_num = columns.size();
+    try
    column_positions.resize(columns_num);
    read_only_offsets.resize(columns_num);
    auto name_and_type = columns.begin();
    for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
    {
-        const auto & [name, type] = getColumnFromPart(*name_and_type);
+        size_t columns_num = columns.size();
        auto position = data_part->getColumnPosition(name);
-        if (!position && typeid_cast<const DataTypeArray *>(type.get()))
+        column_positions.resize(columns_num);
        read_only_offsets.resize(columns_num);
        auto name_and_type = columns.begin();
        for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
        {
-            /// If array of Nested column is missing in part,
+            const auto & [name, type] = getColumnFromPart(*name_and_type);
-            ///  we have to read its offsets if they exist.
+            auto position = data_part->getColumnPosition(name);
-            position = findColumnForOffsets(name);
+
-            read_only_offsets[i] = (position != std::nullopt);
+            if (!position && typeid_cast<const DataTypeArray *>(type.get()))
            {
                /// If array of Nested column is missing in part,
                /// we have to read its offsets if they exist.
                position = findColumnForOffsets(name);
                read_only_offsets[i] = (position != std::nullopt);
            }
            column_positions[i] = std::move(position);
        }
-        column_positions[i] = std::move(position);
+        /// Do not use max_read_buffer_size, but try to lower buffer size with maximal size of granule to avoid reading much data.
        auto buffer_size = getReadBufferSize(data_part, marks_loader, column_positions, all_mark_ranges);
        if (!buffer_size || settings.max_read_buffer_size < buffer_size)
            buffer_size = settings.max_read_buffer_size;
        const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
        if (uncompressed_cache)
        {
            auto buffer = std::make_unique<CachedCompressedReadBuffer>(
                    fullPath(data_part->volume->getDisk(), full_data_path),
                    [this, full_data_path, buffer_size]()
                    {
                        return data_part->volume->getDisk()->readFile(
                                full_data_path,
                                buffer_size,
                                0,
                                settings.min_bytes_to_use_direct_io,
                                settings.min_bytes_to_use_mmap_io);
                    },
                    uncompressed_cache);
            if (profile_callback_)
                buffer->setProfileCallback(profile_callback_, clock_type_);
            cached_buffer = std::move(buffer);
            data_buffer = cached_buffer.get();
        }
        else
        {
            auto buffer =
                    std::make_unique<CompressedReadBufferFromFile>(
                            data_part->volume->getDisk()->readFile(
                                    full_data_path, buffer_size, 0, settings.min_bytes_to_use_direct_io, settings.min_bytes_to_use_mmap_io));
            if (profile_callback_)
                buffer->setProfileCallback(profile_callback_, clock_type_);
            non_cached_buffer = std::move(buffer);
            data_buffer = non_cached_buffer.get();
        }
    }
-
+    catch (...)
    /// Do not use max_read_buffer_size, but try to lower buffer size with maximal size of granule to avoid reading much data.
    auto buffer_size = getReadBufferSize(data_part, marks_loader, column_positions, all_mark_ranges);
    if (!buffer_size || settings.max_read_buffer_size < buffer_size)
        buffer_size = settings.max_read_buffer_size;
    const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
    if (uncompressed_cache)
    {
-        auto buffer = std::make_unique<CachedCompressedReadBuffer>(
+        storage.reportBrokenPart(data_part->name);
-            fullPath(data_part->volume->getDisk(), full_data_path),
+        throw;
            [this, full_data_path, buffer_size]()
            {
                return data_part->volume->getDisk()->readFile(
                    full_data_path,
                    buffer_size,
                    0,
                    settings.min_bytes_to_use_direct_io,
                    settings.min_bytes_to_use_mmap_io);
            },
            uncompressed_cache);
        if (profile_callback_)
            buffer->setProfileCallback(profile_callback_, clock_type_);
        cached_buffer = std::move(buffer);
        data_buffer = cached_buffer.get();
    }
    else
    {
        auto buffer =
            std::make_unique<CompressedReadBufferFromFile>(
                data_part->volume->getDisk()->readFile(
                    full_data_path, buffer_size, 0, settings.min_bytes_to_use_direct_io, settings.min_bytes_to_use_mmap_io));
        if (profile_callback_)
            buffer->setProfileCallback(profile_callback_, clock_type_);
        non_cached_buffer = std::move(buffer);
        data_buffer = non_cached_buffer.get();
    }
 }
@ -155,10 +164,18 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
            }
            catch (Exception & e)
            {
                if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
                    storage.reportBrokenPart(data_part->name);
                /// Better diagnostics.
                e.addMessage("(while reading column " + name + ")");
                throw;
            }
            catch (...)
            {
                storage.reportBrokenPart(data_part->name);
                throw;
            }
        }
        ++from_mark;
--- a/tests/integration/test_storage_kafka/test.py
+++ b/tests/integration/test_storage_kafka/test.py
@ -3,6 +3,7 @@ import random
 import threading
 import time
 import pytest
 import io
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
@ -16,6 +17,11 @@ from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer, BrokerConnecti
 from kafka.admin import NewTopic
 from kafka.protocol.admin import DescribeGroupsResponse_v1, DescribeGroupsRequest_v1
 from kafka.protocol.group import MemberAssignment
 import avro.schema
 from confluent.schemaregistry.client import CachedSchemaRegistryClient
 from confluent.schemaregistry.serializers.MessageSerializer import MessageSerializer
 import socket
 from google.protobuf.internal.encoder import _VarintBytes
@ -102,6 +108,22 @@ def kafka_produce_protobuf_messages(topic, start_index, num_messages):
    producer.flush()
    print("Produced {} messages for topic {}".format(num_messages, topic))
 def avro_confluent_message(schema_registry_client, value):
    # type: (CachedSchemaRegistryClient, dict) -> str
    serializer = MessageSerializer(schema_registry_client)
    schema = avro.schema.make_avsc_object({
        'name': 'row',
        'type': 'record',
        'fields': [
            {'name': 'id', 'type': 'long'},
            {'name': 'blockNo', 'type': 'int'},
            {'name': 'val1', 'type': 'string'},
            {'name': 'val2', 'type': 'float'},
            {'name': 'val3', 'type': 'int'}
        ]
    })
    return serializer.encode_record_with_schema('test_subject', schema, value)
@pytest.mark.timeout(180)
 def test_kafka_json_as_string(kafka_cluster):
@ -139,8 +161,8 @@ def test_kafka_formats(kafka_cluster):
                '{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n',
                '{"id":"1","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"2","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"3","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"4","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"5","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"6","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"7","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"8","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"9","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"10","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"11","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"12","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"13","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"14","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"15","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n',
                '{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n',
                '' # tolerates
            ],
            'supports_empty_value': True,
        },
        # JSONAsString doesn't fit to that test, and tested separately
        'JSONCompactEachRow' : {
@ -148,8 +170,8 @@ def test_kafka_formats(kafka_cluster):
                '["0", 0, "AM", 0.5, 1]\n',
                '["1", 0, "AM", 0.5, 1]\n["2", 0, "AM", 0.5, 1]\n["3", 0, "AM", 0.5, 1]\n["4", 0, "AM", 0.5, 1]\n["5", 0, "AM", 0.5, 1]\n["6", 0, "AM", 0.5, 1]\n["7", 0, "AM", 0.5, 1]\n["8", 0, "AM", 0.5, 1]\n["9", 0, "AM", 0.5, 1]\n["10", 0, "AM", 0.5, 1]\n["11", 0, "AM", 0.5, 1]\n["12", 0, "AM", 0.5, 1]\n["13", 0, "AM", 0.5, 1]\n["14", 0, "AM", 0.5, 1]\n["15", 0, "AM", 0.5, 1]\n',
                '["0", 0, "AM", 0.5, 1]\n',
                '' # tolerates
            ],
            'supports_empty_value': True,
        },
        'JSONCompactEachRowWithNamesAndTypes' : {
            'data_sample' : [
@ -180,16 +202,16 @@ def test_kafka_formats(kafka_cluster):
                '0,0,"AM",0.5,1\n',
                '1,0,"AM",0.5,1\n2,0,"AM",0.5,1\n3,0,"AM",0.5,1\n4,0,"AM",0.5,1\n5,0,"AM",0.5,1\n6,0,"AM",0.5,1\n7,0,"AM",0.5,1\n8,0,"AM",0.5,1\n9,0,"AM",0.5,1\n10,0,"AM",0.5,1\n11,0,"AM",0.5,1\n12,0,"AM",0.5,1\n13,0,"AM",0.5,1\n14,0,"AM",0.5,1\n15,0,"AM",0.5,1\n',
                '0,0,"AM",0.5,1\n',
                '' # tolerates
            ],
            'supports_empty_value': True,
        },
        'TSV' : {
            'data_sample' : [
                '0\t0\tAM\t0.5\t1\n',
                '1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n',
                '0\t0\tAM\t0.5\t1\n',
                '' # tolerates
            ],
            'supports_empty_value': True,
        },
        'CSVWithNames' : {
            'data_sample' : [
@ -211,16 +233,16 @@ def test_kafka_formats(kafka_cluster):
                "(0,0,'AM',0.5,1)",
                "(1,0,'AM',0.5,1),(2,0,'AM',0.5,1),(3,0,'AM',0.5,1),(4,0,'AM',0.5,1),(5,0,'AM',0.5,1),(6,0,'AM',0.5,1),(7,0,'AM',0.5,1),(8,0,'AM',0.5,1),(9,0,'AM',0.5,1),(10,0,'AM',0.5,1),(11,0,'AM',0.5,1),(12,0,'AM',0.5,1),(13,0,'AM',0.5,1),(14,0,'AM',0.5,1),(15,0,'AM',0.5,1)",
                "(0,0,'AM',0.5,1)",
                '' # tolerates
            ],
            'supports_empty_value': True,
        },
        'TSVWithNames' : {
            'data_sample' : [
                'id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n',
                'id\tblockNo\tval1\tval2\tval3\n1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n',
                'id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n',
                '' # tolerates
            ],
            'supports_empty_value': True,
        },
        'TSVWithNamesAndTypes' : {
            'data_sample' : [
@ -389,25 +411,26 @@ def test_kafka_formats(kafka_cluster):
        #     # ],
        # },
        # 'Avro' : {
        #     # TODO: Not working at all: avro::Exception, e.what() = EOF reached
        #     #./contrib/libcxx/src/support/runtime/stdexcept_default.ipp:33: std::runtime_error::runtime_error(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) @ 0x22ce2080 in /usr/bin/clickhouse
        #     #./contrib/avro/lang/c++/api/Exception.hh:36: avro::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) @ 0x1de48a6e in /usr/bin/clickhouse
        #     #./contrib/avro/lang/c++/api/Stream.hh:336: avro::StreamReader::more() @ 0x22717f56 in /usr/bin/clickhouse
        #     #./contrib/avro/lang/c++/api/Stream.hh:0: avro::StreamReader::readBytes(unsigned char*, unsigned long) @ 0x22717d22 in /usr/bin/clickhouse
        #     #./contrib/avro/lang/c++/impl/BinaryDecoder.cc:170: avro::BinaryDecoder::decodeFixed(unsigned long, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&) @ 0x227177cb in /usr/bin/clickhouse
        #     #./contrib/avro/lang/c++/api/Specific.hh:216: avro::codec_traits<std::__1::array<unsigned char, 4ul> >::decode(avro::Decoder&, std::__1::array<unsigned char, 4ul>&) @ 0x22743624 in /usr/bin/clickhouse
        #     #./contrib/avro/lang/c++/api/Specific.hh:342: void avro::decode<std::__1::array<unsigned char, 4ul> >(avro::Decoder&, std::__1::array<unsigned char, 4ul>&) @ 0x2272970d in /usr/bin/clickhouse
        #     #./contrib/avro/lang/c++/impl/DataFile.cc:487: avro::DataFileReaderBase::readHeader() @ 0x2272608d in /usr/bin/clickhouse
        #     #./contrib/avro/lang/c++/impl/DataFile.cc:280: avro::DataFileReaderBase::DataFileReaderBase(std::__1::unique_ptr<avro::InputStream, std::__1::default_delete<avro::InputStream> >) @ 0x22726923 in /usr/bin/clickhouse
        #     #./src/Processors/Formats/Impl/AvroRowInputFormat.cpp:571: DB::AvroRowInputFormat::AvroRowInputFormat(DB::Block const&, DB::ReadBuffer&, DB::RowInputFormatParams) @ 0x1de19c9b in /usr/bin/clickhouse
        #     'data_sample' : [
-        #         #'\x4f\x62\x6a\x01\x04\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x0c\x73\x6e\x61\x70\x70\x79\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x80\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x62\x79\x74\x65\x73\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x00\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83\x02\x20\x0a\x24\x00\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x80\xaa\x4a\xe3\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83',
+        #         '\x4f\x62\x6a\x01\x04\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x82\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x73\x74\x72\x69\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x08\x6e\x75\x6c\x6c\x00\x8d\x1f\xf2\x17\x71\xa4\x2e\xe4\xc9\x0a\x23\x67\x12\xaa\xc6\xc0\x02\x14\x00\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x8d\x1f\xf2\x17\x71\xa4\x2e\xe4\xc9\x0a\x23\x67\x12\xaa\xc6\xc0',
-        #         #'\x4f\x62\x6a\x01\x04\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x0c\x73\x6e\x61\x70\x70\x79\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x80\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x62\x79\x74\x65\x73\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x00\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83\x1e\x9e\x01\x96\x01\x28\x02\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x04\x15\x0a\x00\x06\x15\x0a\x00\x08\x15\x0a\x00\x0a\x15\x0a\x00\x0c\x15\x0a\x00\x0e\x15\x0a\x00\x10\x15\x0a\x00\x12\x15\x0a\x00\x14\x15\x0a\x00\x16\x15\x0a\x00\x18\x15\x0a\x00\x1a\x15\x0a\x00\x1c\x15\x0a\x24\x1e\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x49\x73\x4d\xca\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83',
+        #         '\x4f\x62\x6a\x01\x04\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x82\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x73\x74\x72\x69\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x08\x6e\x75\x6c\x6c\x00\xeb\x9d\x51\x82\xf2\x11\x3d\x0b\xc5\x92\x97\xb2\x07\x6d\x72\x5a\x1e\xac\x02\x02\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x04\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x06\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x08\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x0a\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x0c\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x0e\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x10\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x12\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x14\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x16\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x18\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x1a\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x1c\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x1e\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\xeb\x9d\x51\x82\xf2\x11\x3d\x0b\xc5\x92\x97\xb2\x07\x6d\x72\x5a',
-        #         #'\x4f\x62\x6a\x01\x04\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x0c\x73\x6e\x61\x70\x70\x79\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x80\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x62\x79\x74\x65\x73\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x00\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83\x02\x20\x0a\x24\x00\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x80\xaa\x4a\xe3\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83',
+        #         '\x4f\x62\x6a\x01\x04\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x82\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x73\x74\x72\x69\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x08\x6e\x75\x6c\x6c\x00\x73\x65\x4f\x7c\xd9\x33\xe1\x18\xdd\x30\xe8\x22\x2a\x58\x20\x6f\x02\x14\x00\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x73\x65\x4f\x7c\xd9\x33\xe1\x18\xdd\x30\xe8\x22\x2a\x58\x20\x6f',
        #         # ''
        #     ],
        # },
-        # TODO: test for AvroConfluence
+        'AvroConfluent' : {
            'data_sample': [
                avro_confluent_message(cluster.schema_registry_client, {'id':0L,'blockNo':0,'val1':unicode('AM'),'val2':0.5,"val3":1}),
                ''.join(map(lambda id: avro_confluent_message(cluster.schema_registry_client, {'id':id,'blockNo':0,'val1':unicode('AM'),'val2':0.5,"val3":1}), range(1,16))),
                avro_confluent_message(cluster.schema_registry_client, {'id':0L,'blockNo':0,'val1':unicode('AM'),'val2':0.5,"val3":1}),
            ],
            'extra_settings': ", format_avro_schema_registry_url='http://{}:{}'".format(
                cluster.schema_registry_host,
                cluster.schema_registry_port
            ),
            'supports_empty_value': True,
        }
        # 'Arrow' : {
        #     # Not working at all: DB::Exception: Error while opening a table: Invalid: File is too small: 0, Stack trace (when copying this message, always include the lines below):
        #     # /src/Common/Exception.cpp:37: DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int) @ 0x15c2d2a3 in /usr/bin/clickhouse
@ -452,10 +475,15 @@ def test_kafka_formats(kafka_cluster):
        # },
    }
-    for format_name in all_formats:
+    for format_name, format_opts in all_formats.items():
        print('Set up {}'.format(format_name))
        topic_name='format_tests_{}'.format(format_name)
-        kafka_produce(topic_name, all_formats[format_name]['data_sample'])
+        data_sample = format_opts['data_sample']
        data_prefix = []
        # prepend empty value when supported
        if format_opts.get('supports_empty_value', False):
            data_prefix = data_prefix + ['']
        kafka_produce(topic_name, data_prefix + data_sample)
        instance.query('''
            DROP TABLE IF EXISTS test.kafka_{format_name};
@ -476,34 +504,35 @@ def test_kafka_formats(kafka_cluster):
            CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv Engine=Log AS
                SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name};
-            '''.format(topic_name=topic_name, format_name=format_name, extra_settings=all_formats[format_name].get('extra_settings') or ''))
+            '''.format(topic_name=topic_name, format_name=format_name, extra_settings=format_opts.get('extra_settings') or ''))
    time.sleep(12)
-    for format_name in all_formats:
+    for format_name, format_opts in all_formats.items():
        print('Checking {}'.format(format_name))
        topic_name='format_tests_{}'.format(format_name)
-
+        # shift offsets by 1 if format supports empty value
        offsets = [1,2,3] if format_opts.get('supports_empty_value', False) else [0,1,2]
        result = instance.query('SELECT * FROM test.kafka_{format_name}_mv;'.format(format_name=format_name))
        expected = '''\
-0	0	AM	0.5	1	{topic_name}	0	0
+0	0	AM	0.5	1	{topic_name}	0	{offset_0}
-1	0	AM	0.5	1	{topic_name}	0	1
+1	0	AM	0.5	1	{topic_name}	0	{offset_1}
-2	0	AM	0.5	1	{topic_name}	0	1
+2	0	AM	0.5	1	{topic_name}	0	{offset_1}
-3	0	AM	0.5	1	{topic_name}	0	1
+3	0	AM	0.5	1	{topic_name}	0	{offset_1}
-4	0	AM	0.5	1	{topic_name}	0	1
+4	0	AM	0.5	1	{topic_name}	0	{offset_1}
-5	0	AM	0.5	1	{topic_name}	0	1
+5	0	AM	0.5	1	{topic_name}	0	{offset_1}
-6	0	AM	0.5	1	{topic_name}	0	1
+6	0	AM	0.5	1	{topic_name}	0	{offset_1}
-7	0	AM	0.5	1	{topic_name}	0	1
+7	0	AM	0.5	1	{topic_name}	0	{offset_1}
-8	0	AM	0.5	1	{topic_name}	0	1
+8	0	AM	0.5	1	{topic_name}	0	{offset_1}
-9	0	AM	0.5	1	{topic_name}	0	1
+9	0	AM	0.5	1	{topic_name}	0	{offset_1}
-10	0	AM	0.5	1	{topic_name}	0	1
+10	0	AM	0.5	1	{topic_name}	0	{offset_1}
-11	0	AM	0.5	1	{topic_name}	0	1
+11	0	AM	0.5	1	{topic_name}	0	{offset_1}
-12	0	AM	0.5	1	{topic_name}	0	1
+12	0	AM	0.5	1	{topic_name}	0	{offset_1}
-13	0	AM	0.5	1	{topic_name}	0	1
+13	0	AM	0.5	1	{topic_name}	0	{offset_1}
-14	0	AM	0.5	1	{topic_name}	0	1
+14	0	AM	0.5	1	{topic_name}	0	{offset_1}
-15	0	AM	0.5	1	{topic_name}	0	1
+15	0	AM	0.5	1	{topic_name}	0	{offset_1}
-0	0	AM	0.5	1	{topic_name}	0	2
+0	0	AM	0.5	1	{topic_name}	0	{offset_2}
-'''.format(topic_name=topic_name)
+'''.format(topic_name=topic_name, offset_0 = offsets[0], offset_1 = offsets[1], offset_2 = offsets[2])
        assert TSV(result) == TSV(expected), 'Proper result for format: {}'.format(format_name)
--- a/tests/queries/0_stateless/00930_arrayIntersect.sql
+++ b/tests/queries/0_stateless/00930_arrayIntersect.sql
@ -7,24 +7,24 @@ insert into array_intersect values ('2019-01-01', [1,2]);
 insert into array_intersect values ('2019-01-01', [1]);
 insert into array_intersect values ('2019-01-01', []);
-select arrayIntersect(arr, [1,2]) from array_intersect order by arr;
+select arraySort(arrayIntersect(arr, [1,2])) from array_intersect order by arr;
-select arrayIntersect(arr, []) from array_intersect order by arr;
+select arraySort(arrayIntersect(arr, [])) from array_intersect order by arr;
-select arrayIntersect([], arr) from array_intersect order by arr;
+select arraySort(arrayIntersect([], arr)) from array_intersect order by arr;
-select arrayIntersect([1,2], arr) from array_intersect order by arr;
+select arraySort(arrayIntersect([1,2], arr)) from array_intersect order by arr;
-select arrayIntersect([1,2], [1,2,3,4]) from array_intersect order by arr;
+select arraySort(arrayIntersect([1,2], [1,2,3,4])) from array_intersect order by arr;
-select arrayIntersect([], []) from array_intersect order by arr;
+select arraySort(arrayIntersect([], [])) from array_intersect order by arr;
 optimize table array_intersect;
-select arrayIntersect(arr, [1,2]) from array_intersect order by arr;
+select arraySort(arrayIntersect(arr, [1,2])) from array_intersect order by arr;
-select arrayIntersect(arr, []) from array_intersect order by arr;
+select arraySort(arrayIntersect(arr, [])) from array_intersect order by arr;
-select arrayIntersect([], arr) from array_intersect order by arr;
+select arraySort(arrayIntersect([], arr)) from array_intersect order by arr;
-select arrayIntersect([1,2], arr) from array_intersect order by arr;
+select arraySort(arrayIntersect([1,2], arr)) from array_intersect order by arr;
-select arrayIntersect([1,2], [1,2,3,4]) from array_intersect order by arr;
+select arraySort(arrayIntersect([1,2], [1,2,3,4])) from array_intersect order by arr;
-select arrayIntersect([], []) from array_intersect order by arr;
+select arraySort(arrayIntersect([], [])) from array_intersect order by arr;
 drop table if exists array_intersect;
 select '-';
-select arrayIntersect([-100], [156]);
+select arraySort(arrayIntersect([-100], [156]));
-select arrayIntersect([1], [257]);
+select arraySort(arrayIntersect([1], [257]));
--- a/tests/queries/0_stateless/00932_array_intersect_bug.reference
+++ b/tests/queries/0_stateless/00932_array_intersect_bug.reference
@ -5,5 +5,5 @@
 [2]
 []
 []
-[3,1,2]
+[1,2,3]
 []
--- a/tests/queries/0_stateless/00932_array_intersect_bug.sql
+++ b/tests/queries/0_stateless/00932_array_intersect_bug.sql
@ -1,9 +1,9 @@
-SELECT arrayIntersect(['a', 'b', 'c'], ['a', 'a']);
+SELECT arraySort(arrayIntersect(['a', 'b', 'c'], ['a', 'a']));
-SELECT arrayIntersect([1, 1], [2, 2]);
+SELECT arraySort(arrayIntersect([1, 1], [2, 2]));
-SELECT arrayIntersect([1, 1], [1, 2]);
+SELECT arraySort(arrayIntersect([1, 1], [1, 2]));
-SELECT arrayIntersect([1, 1, 1], [3], [2, 2, 2]);
+SELECT arraySort(arrayIntersect([1, 1, 1], [3], [2, 2, 2]));
-SELECT arrayIntersect([1, 2], [1, 2], [2]);
+SELECT arraySort(arrayIntersect([1, 2], [1, 2], [2]));
-SELECT arrayIntersect([1, 1], [2, 1], [2, 2], [1]);
+SELECT arraySort(arrayIntersect([1, 1], [2, 1], [2, 2], [1]));
-SELECT arrayIntersect([]);
+SELECT arraySort(arrayIntersect([]));
-SELECT arrayIntersect([1, 2, 3]);
+SELECT arraySort(arrayIntersect([1, 2, 3]));
-SELECT arrayIntersect([1, 1], [2, 1], [2, 2], [2, 2, 2]);
+SELECT arraySort(arrayIntersect([1, 1], [2, 1], [2, 2], [2, 2, 2]));
--- a/tests/queries/0_stateless/01323_add_scalars_in_time.reference
+++ b/tests/queries/0_stateless/01323_add_scalars_in_time.reference
@ -1,2 +1,2 @@
-[0,3,2]	id2
+[0,2,3]	id2
-[3,1,2]	id1
+[1,2,3]	id1
--- a/tests/queries/0_stateless/01323_add_scalars_in_time.sql
+++ b/tests/queries/0_stateless/01323_add_scalars_in_time.sql
@ -11,7 +11,7 @@ INSERT INTO tags(id, seqs) VALUES ('id1', [1,2,3]), ('id2', [0,2,3]), ('id1', [1
 WITH
    (SELECT [0, 1, 2, 3]) AS arr1
-SELECT arrayIntersect(argMax(seqs, create_time), arr1) AS common, id
+SELECT arraySort(arrayIntersect(argMax(seqs, create_time), arr1)) AS common, id
 FROM tags
 WHERE id LIKE 'id%'
 GROUP BY id;
--- a/tests/queries/0_stateless/01429_join_on_error_messages.reference
+++ b/tests/queries/0_stateless/01429_join_on_error_messages.reference
--- a/tests/queries/0_stateless/01429_join_on_error_messages.sql
+++ b/tests/queries/0_stateless/01429_join_on_error_messages.sql
@ -0,0 +1,11 @@
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON (arrayJoin([1]) = B.b); -- { serverError 403 }
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON (A.a = arrayJoin([1])); -- { serverError 403 }
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON equals(a); -- { serverError 62 }
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON less(a); -- { serverError 62 }
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b OR a = b; -- { serverError 48 }
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a > b; -- { serverError 48 }
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a < b; -- { serverError 48 }
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a >= b; -- { serverError 48 }
 SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a <= b; -- { serverError 48 }
--- a/tests/queries/0_stateless/01430_moving_sum_empty_state.reference
+++ b/tests/queries/0_stateless/01430_moving_sum_empty_state.reference
@ -0,0 +1 @@
 []
--- a/tests/queries/0_stateless/01430_moving_sum_empty_state.sql
+++ b/tests/queries/0_stateless/01430_moving_sum_empty_state.sql
@ -0,0 +1 @@
 SELECT groupArrayMovingSum(10)(0) FROM remote('127.0.0.{1,2}', numbers(0))
--- a/tests/testflows/rbac/requirements/requirements.md
+++ b/tests/testflows/rbac/requirements/requirements.md
--- a/tests/testflows/rbac/requirements/requirements.py
+++ b/tests/testflows/rbac/requirements/requirements.py
--- a/tests/testflows/rbac/tests/privileges/init.py
+++ b/tests/testflows/rbac/tests/privileges/init.py
@ -1,7 +0,0 @@
 #  Copyright 2020, Altinity LTD. All Rights Reserved.
 #
 #  All information contained herein is, and remains the property
 #  of Altinity LTD. Any dissemination of this information or
 #  reproduction of this material is strictly forbidden unless
 #  prior written permission is obtained from Altinity LTD.
 #
--- a/tests/testflows/rbac/tests/privileges/feature.py
+++ b/tests/testflows/rbac/tests/privileges/feature.py
@ -3,4 +3,5 @@ from testflows.core import *
@TestFeature
@Name("privileges")
 def feature(self):
    Feature(run=load("rbac.tests.privileges.insert", "feature"), flags=TE)
    Feature(run=load("rbac.tests.privileges.select", "feature"), flags=TE)
--- a/tests/testflows/rbac/tests/privileges/insert.py
+++ b/tests/testflows/rbac/tests/privileges/insert.py
@ -0,0 +1,538 @@
 from contextlib import contextmanager
 import json
 from testflows.core import *
 from testflows.asserts import error
 from rbac.requirements import *
 import rbac.tests.errors as errors
 table_types = {
    "MergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, (a, b), 111)",
    "ReplacingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplacingMergeTree(d, (a, b), 111)",
    "SummingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, (a, b), 111)",
    "AggregatingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, (a, b), 111)",
    "CollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, (a, b), 111, y);",
    "VersionedCollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32, version UInt64, sign Int8, INDEX a (b * y, d) TYPE minmax GRANULARITY 3) ENGINE = VersionedCollapsingMergeTree(sign, version) ORDER BY tuple()",
    "GraphiteMergeTree": "CREATE TABLE {name} (key UInt32, Path String, Time DateTime, d Date, a String, b UInt8, x String, y Int8, z UInt32, Value Float64, Version UInt32, col UInt64, INDEX a (key * Value, Time) TYPE minmax GRANULARITY 3) ENGINE = GraphiteMergeTree('graphite_rollup_example') ORDER BY tuple()"
 }
 table_requirements ={
    "MergeTree": RQ_SRS_006_RBAC_Privileges_Insert_MergeTree("1.0"),
    "ReplacingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_ReplacingMergeTree("1.0"),
    "SummingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_SummingMergeTree("1.0"),
    "AggregatingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_AggregatingMergeTree("1.0"),
    "CollapsingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_CollapsingMergeTree("1.0"),
    "VersionedCollapsingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_VersionedCollapsingMergeTree("1.0"),
    "GraphiteMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_GraphiteMergeTree("1.0"),
 }
@contextmanager
 def table(node, name, table_type="MergeTree"):
    try:
        with Given(f"I have a {table_type} table"):
            node.query(table_types[table_type].format(name=name))
        yield
    finally:
        with Finally("I drop the table"):
            node.query(f"DROP TABLE IF EXISTS {name}")
@contextmanager
 def user(node, name):
    try:
        names = name.split(",")
        for i in names:
            with Given("I have a user"):
                node.query(f"CREATE USER OR REPLACE {i}")
        yield
    finally:
        for i in names:
            with Finally("I drop the user"):
                node.query(f"DROP USER IF EXISTS {name}")
@contextmanager
 def role(node, role):
    try:
        roles = role.split(",")
        for j in roles:
            with Given("I have a role"):
                node.query(f"CREATE ROLE OR REPLACE {j}")
        yield
    finally:
        for j in roles:
            with Finally("I drop the role"):
                node.query(f"DROP ROLE IF EXISTS {role}")
 def input_output_equality_check(node, input_columns, input_data):
    data_list = [x.strip("'") for x in input_data.split(",")]
    input_dict = dict(zip(input_columns.split(","), data_list))
    output_dict = json.loads(node.query(f"select {input_columns} from merge_tree format JSONEachRow").output)
    output_dict = {k:str(v) for (k,v) in output_dict.items()}
    return input_dict == output_dict
@TestScenario
 def without_privilege(self, table_type, node=None):
    """Check that user without insert privilege on a table is not able to insert on that table.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"):
            with When("I run INSERT without privilege"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings = [("user","user0")],
                            exitcode=exitcode, message=message)
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_Grant("1.0"),
 )
 def user_with_privilege(self, table_type, node=None):
    """Check that user can insert into a table on which they have insert privilege and the inserted data is correct.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"):
            with When("I grant privilege"):
                node.query("GRANT INSERT ON merge_tree TO user0")
            with And("I use INSERT"):
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user0")])
            with Then("I check the insert functioned"):
                output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
                assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_Revoke("1.0"),
 )
 def user_with_revoked_privilege(self, table_type, node=None):
    """Check that user is unable to insert into a table after insert privilege on that table has been revoked from user.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"):
            with When("I grant privilege"):
                node.query("GRANT INSERT ON merge_tree TO user0")
            with And("I revoke privilege"):
                node.query("REVOKE INSERT ON merge_tree FROM user0")
            with And("I use INSERT"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')",
                            settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
 def user_with_privilege_on_columns(self, table_type):
    Scenario(run=user_column_privileges,
        examples=Examples("grant_columns revoke_columns insert_columns_fail insert_columns_pass data_fail data_pass table_type",
            [tuple(list(row)+[table_type]) for row in user_column_privileges.examples]))
@TestOutline(Scenario)
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_Column("1.0"),
 )
@Examples("grant_columns revoke_columns insert_columns_fail insert_columns_pass data_fail data_pass", [
    ("d", "d", "x", "d", '\'woo\'', '\'2020-01-01\''),
    ("d,a", "d", "x", "d", '\'woo\'', '\'2020-01-01\''),
    ("d,a,b", "d,a,b", "x", "d,b", '\'woo\'', '\'2020-01-01\',9'),
    ("d,a,b", "b", "y", "d,a,b", '9', '\'2020-01-01\',\'woo\',9')
 ])
 def user_column_privileges(self, grant_columns, insert_columns_pass, data_fail, data_pass, table_type,
                                            revoke_columns=None, insert_columns_fail=None, node=None):
    """Check that user is able to insert on granted columns
    and unable to insert on not granted or revoked columns.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"):
            with When("I grant insert privilege"):
                node.query(f"GRANT INSERT({grant_columns}) ON merge_tree TO user0")
            if insert_columns_fail is not None:
                with And("I insert into not granted column"):
                    exitcode, message = errors.not_enough_privileges(name="user0")
                    node.query(f"INSERT INTO merge_tree ({insert_columns_fail}) VALUES ({data_fail})",
                                settings=[("user","user0")], exitcode=exitcode, message=message)
            with And("I insert into granted column"):
                node.query(f"INSERT INTO merge_tree ({insert_columns_pass}) VALUES ({data_pass})",
                            settings=[("user","user0")])
            with Then("I check the insert functioned"):
                input_equals_output = input_output_equality_check(node, insert_columns_pass, data_pass)
                assert input_equals_output, error()
            if revoke_columns is not None:
                with When("I revoke insert privilege from columns"):
                    node.query(f"REVOKE INSERT({revoke_columns}) ON merge_tree FROM user0")
                with And("I insert into revoked columns"):
                    exitcode, message = errors.not_enough_privileges(name="user0")
                    node.query(f"INSERT INTO merge_tree ({insert_columns_pass}) VALUES ({data_pass})",
                                settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_Grant("1.0"),
 )
 def role_with_privilege(self, table_type, node=None):
    """Check that user can insert into a table after it is granted a role that
    has the insert privilege for that table.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"), role(node, "role0"):
            with When("I grant insert privilege to a role"):
                node.query("GRANT INSERT ON merge_tree TO role0")
            with And("I grant role to the user"):
                node.query("GRANT role0 TO user0")
            with And("I insert into a table"):
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user0")])
            with Then("I check that I can read inserted data"):
                output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
                assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_Revoke("1.0"),
 )
 def role_with_revoked_privilege(self, table_type, node=None):
    """Check that user with a role that has insert privilege on a table
    is unable to insert into that table after insert privilege
    has been revoked from the role.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"), role(node, "role0"):
            with When("I grant privilege to a role"):
                node.query("GRANT INSERT ON merge_tree TO role0")
            with And("I grant the role to a user"):
                node.query("GRANT role0 TO user0")
            with And("I revoke privilege from the role"):
                node.query("REVOKE INSERT ON merge_tree FROM role0")
            with And("I insert into the table"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')",
                            settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
 def user_with_revoked_role(self, table_type, node=None):
    """Check that user with a role that has insert privilege on a table
    is unable to insert into that table after the role with insert
    privilege has been revoked from the user.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"), role(node, "role0"):
            with When("I grant privilege to a role"):
                node.query("GRANT INSERT ON merge_tree TO role0")
            with And("I grant the role to a user"):
                node.query("GRANT role0 TO user0")
            with And("I revoke the role from the user"):
                node.query("REVOKE role0 FROM user0")
            with And("I insert into the table"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')",
                            settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
 def role_with_privilege_on_columns(self, table_type):
    Scenario(run=role_column_privileges,
        examples=Examples("grant_columns revoke_columns insert_columns_fail insert_columns_pass data_fail data_pass table_type",
            [tuple(list(row)+[table_type]) for row in role_column_privileges.examples]))
@TestOutline(Scenario)
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_Column("1.0"),
 )
@Examples("grant_columns revoke_columns insert_columns_fail insert_columns_pass data_fail data_pass", [
    ("d", "d", "x", "d", '\'woo\'', '\'2020-01-01\''),
    ("d,a", "d", "x", "d", '\'woo\'', '\'2020-01-01\''),
    ("d,a,b", "d,a,b", "x", "d,b", '\'woo\'', '\'2020-01-01\',9'),
    ("d,a,b", "b", "y", "d,a,b", '9', '\'2020-01-01\',\'woo\',9')
 ])
 def role_column_privileges(self, grant_columns, insert_columns_pass, data_fail, data_pass,
                            table_type, revoke_columns=None, insert_columns_fail=None, node=None):
    """Check that user with a role is able to insert on granted columns and unable
    to insert on not granted or revoked columns.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"), role(node, "role0"):
                with When("I grant insert privilege"):
                    node.query(f"GRANT INSERT({grant_columns}) ON merge_tree TO role0")
                with And("I grant the role to a user"):
                    node.query("GRANT role0 TO user0")
                if insert_columns_fail is not None:
                    with And("I insert into not granted column"):
                        exitcode, message = errors.not_enough_privileges(name="user0")
                        node.query(f"INSERT INTO merge_tree ({insert_columns_fail}) VALUES ({data_fail})",
                                    settings=[("user","user0")], exitcode=exitcode, message=message)
                with And("I insert into granted column"):
                    node.query(f"INSERT INTO merge_tree ({insert_columns_pass}) VALUES ({data_pass})",
                                settings=[("user","user0")])
                with Then("I check the insert functioned"):
                    input_equals_output = input_output_equality_check(node, insert_columns_pass, data_pass)
                    assert input_equals_output, error()
                if revoke_columns is not None:
                    with When("I revoke insert privilege from columns"):
                        node.query(f"REVOKE INSERT({revoke_columns}) ON merge_tree FROM role0")
                    with And("I insert into revoked columns"):
                        exitcode, message = errors.not_enough_privileges(name="user0")
                        node.query(f"INSERT INTO merge_tree ({insert_columns_pass}) VALUES ({data_pass})",
                                    settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_Cluster("1.0"),
 )
 def user_with_privilege_on_cluster(self, table_type, node=None):
    """Check that user is able to insert on a table with
    privilege granted on a cluster.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        try:
            with Given("I have a user on a cluster"):
                node.query("CREATE USER OR REPLACE user0 ON CLUSTER sharded_cluster")
            with When("I grant insert privilege on a cluster without the node with the table"):
                node.query("GRANT ON CLUSTER cluster23 INSERT ON merge_tree TO user0")
            with And("I insert into the table expecting a fail"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
            with And("I grant insert privilege on cluster including all nodes"):
                node.query("GRANT ON CLUSTER sharded_cluster INSERT ON merge_tree TO user0")
            with And("I revoke insert privilege on cluster without the table node"):
                node.query("REVOKE ON CLUSTER cluster23 INSERT ON merge_tree FROM user0")
            with And("I insert into the table"):
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user0")])
            with Then("I check that I can read inserted data"):
                output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
                assert output == '{"d":"2020-01-01"}', error()
        finally:
            with Finally("I drop the user"):
                node.query("DROP USER user0 ON CLUSTER sharded_cluster")
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Grant("1.0"),
 )
 def user_with_privilege_from_user_with_grant_option(self, table_type, node=None):
    """Check that user is able to insert on a table when granted privilege
    from another user with grant option.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0,user1"):
            with When("I grant privilege with grant option to user"):
                node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
            with And("I grant privilege on a column I dont have permission on"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("GRANT INSERT(b) ON merge_tree TO user1", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
            with And("I grant privilege to another user via grant option"):
                node.query("GRANT INSERT(d) ON merge_tree TO user1", settings=[("user","user0")])
            with And("I insert into a table"):
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user1")])
            with Then("I check that I can read inserted data"):
                output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
                assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Grant("1.0"),
 )
 def role_with_privilege_from_user_with_grant_option(self, table_type, node=None):
    """Check that user is able to insert on a table when granted a role with
    insert privilege that was granted by another user with grant option.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0,user1"), role(node, "role0"):
            with When("I grant privilege with grant option to user"):
                node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
            with And("I grant privilege on a column I dont have permission on"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("GRANT INSERT(b) ON merge_tree TO role0", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
            with And("I grant privilege to a role via grant option"):
                node.query("GRANT INSERT(d) ON merge_tree TO role0", settings=[("user","user0")])
            with And("I grant the role to another user"):
                node.query("GRANT role0 TO user1")
            with And("I insert into a table"):
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user1")])
            with Then("I check that I can read inserted data"):
                output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
                assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Grant("1.0"),
 )
 def user_with_privilege_from_role_with_grant_option(self, table_type, node=None):
    """Check that user is able to insert on a table when granted privilege from a role with grant option
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0,user1"), role(node, "role0"):
            with When("I grant privilege with grant option to a role"):
                node.query("GRANT INSERT(d) ON merge_tree TO role0 WITH GRANT OPTION")
            with When("I grant role to a user"):
                node.query("GRANT role0 TO user0")
            with And("I grant privilege on a column I dont have permission on"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("GRANT INSERT(b) ON merge_tree TO user1", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
            with And("I grant privilege to a user via grant option"):
                node.query("GRANT INSERT(d) ON merge_tree TO user1", settings=[("user","user0")])
            with And("I insert into a table"):
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user1")])
            with Then("I check that I can read inserted data"):
                output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
                assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Grant("1.0"),
 )
 def role_with_privilege_from_role_with_grant_option(self, table_type, node=None):
    """Check that a user is able to insert on a table with a role that was granted privilege
    by another role with grant option
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0,user1"), role(node, "role0,role1"):
            with When("I grant privilege with grant option to role"):
                node.query("GRANT INSERT(d) ON merge_tree TO role0 WITH GRANT OPTION")
            with And("I grant the role to a user"):
                node.query("GRANT role0 TO user0")
            with And("I grant privilege on a column I dont have permission on"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("GRANT INSERT(b) ON merge_tree TO role1", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
            with And("I grant privilege to another role via grant option"):
                node.query("GRANT INSERT(d) ON merge_tree TO role1", settings=[("user","user0")])
            with And("I grant the second role to another user"):
                node.query("GRANT role1 TO user1")
            with And("I insert into a table"):
                node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user1")])
            with Then("I check that I can read inserted data"):
                output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
                assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Revoke("1.0"),
 )
 def revoke_privilege_from_user_via_user_with_grant_option(self, table_type, node=None):
    """Check that user is unable to revoke a column they don't have access to from a user.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0,user1"):
            with When("I grant privilege with grant option to user"):
                node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
            with Then("I revoke privilege on a column the user with grant option does not have access to"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("REVOKE INSERT(b) ON merge_tree FROM user1", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Revoke("1.0"),
 )
 def revoke_privilege_from_role_via_user_with_grant_option(self, table_type, node=None):
    """Check that user is unable to revoke a column they dont have acces to from a role.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"), role(node, "role0"):
            with When("I grant privilege with grant option to user"):
                node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
            with Then("I revoke privilege on a column the user with grant option does not have access to"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("REVOKE INSERT(b) ON merge_tree FROM role0", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Revoke("1.0"),
 )
 def revoke_privilege_from_user_via_role_with_grant_option(self, table_type, node=None):
    """Check that user with a role is unable to revoke a column they dont have acces to from a user.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0,user1"), role(node, "role0"):
            with When("I grant privilege with grant option to a role"):
                node.query("GRANT INSERT(d) ON merge_tree TO role0 WITH GRANT OPTION")
            with And("I grant the role to a user"):
                node.query("GRANT role0 TO user0")
            with Then("I revoke privilege on a column the user with grant option does not have access to"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("REVOKE INSERT(b) ON merge_tree FROM user1", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
@TestScenario
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Revoke("1.0"),
 )
 def revoke_privilege_from_role_via_role_with_grant_option(self, table_type, node=None):
    """Check that user with a role is unable to revoke a column they dont have acces to from a role.
    """
    if node is None:
        node = self.context.node
    with table(node, "merge_tree", table_type):
        with user(node, "user0"), role(node, "role0,role1"):
            with When("I grant privilege with grant option to a role"):
                node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
            with And("I grant the role to a user"):
                node.query("GRANT role0 TO user0")
            with Then("I revoke privilege on a column the user with grant option does not have access to"):
                exitcode, message = errors.not_enough_privileges(name="user0")
                node.query("REVOKE INSERT(b) ON merge_tree FROM role1", settings=[("user","user0")],
                            exitcode=exitcode, message=message)
@TestOutline(Feature)
@Requirements(
    RQ_SRS_006_RBAC_Privileges_Insert("1.0"),
 )
@Examples("table_type", [
    (table_type, Requirements(requirement)) for table_type, requirement in table_requirements.items()
 ])
@Name("insert")
 def feature(self, table_type, node="clickhouse1"):
    self.context.node = self.context.cluster.node(node)
    self.context.node1 = self.context.cluster.node("clickhouse1")
    self.context.node2 = self.context.cluster.node("clickhouse2")
    self.context.node3 = self.context.cluster.node("clickhouse3")
    Scenario(test=without_privilege)(table_type=table_type)
    Scenario(test=user_with_privilege)(table_type=table_type)
    Scenario(test=user_with_revoked_privilege)(table_type=table_type)
    Scenario(test=user_with_privilege_on_columns)(table_type=table_type)
    Scenario(test=role_with_privilege)(table_type=table_type)
    Scenario(test=role_with_revoked_privilege)(table_type=table_type)
    Scenario(test=user_with_revoked_role)(table_type=table_type)
    Scenario(test=role_with_privilege_on_columns)(table_type=table_type)
    Scenario(test=user_with_privilege_on_cluster)(table_type=table_type)
    Scenario(test=user_with_privilege_from_user_with_grant_option)(table_type=table_type)
    Scenario(test=role_with_privilege_from_user_with_grant_option)(table_type=table_type)
    Scenario(test=user_with_privilege_from_role_with_grant_option)(table_type=table_type)
    Scenario(test=role_with_privilege_from_role_with_grant_option)(table_type=table_type)
    Scenario(test=revoke_privilege_from_user_via_user_with_grant_option)(table_type=table_type)
    Scenario(test=revoke_privilege_from_role_via_user_with_grant_option)(table_type=table_type)
    Scenario(test=revoke_privilege_from_user_via_role_with_grant_option)(table_type=table_type)
    Scenario(test=revoke_privilege_from_role_via_role_with_grant_option)(table_type=table_type)
--- a/tests/testflows/rbac/tests/privileges/select.py
+++ b/tests/testflows/rbac/tests/privileges/select.py
@ -9,7 +9,22 @@ import rbac.tests.errors as errors
 table_types = {
    "MergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, (a, b), 111)",
-    "CollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, (a, b), 111, y);"
+    "ReplacingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplacingMergeTree(d, (a, b), 111)",
    "SummingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, (a, b), 111)",
    "AggregatingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, (a, b), 111)",
    "CollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, (a, b), 111, y);",
    "VersionedCollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32, version UInt64, sign Int8, INDEX a (b * y, d) TYPE minmax GRANULARITY 3) ENGINE = VersionedCollapsingMergeTree(sign, version) ORDER BY tuple()",
    "GraphiteMergeTree": "CREATE TABLE {name} (key UInt32, Path String, Time DateTime, d Date, a String, b UInt8, x String, y Int8, z UInt32, Value Float64, Version UInt32, col UInt64, INDEX a (key * Value, Time) TYPE minmax GRANULARITY 3) ENGINE = GraphiteMergeTree('graphite_rollup_example') ORDER BY tuple()"
 }
 table_requirements ={
    "MergeTree": RQ_SRS_006_RBAC_Privileges_Select_MergeTree("1.0"),
    "ReplacingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_ReplacingMergeTree("1.0"),
    "SummingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_SummingMergeTree("1.0"),
    "AggregatingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_AggregatingMergeTree("1.0"),
    "CollapsingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_CollapsingMergeTree("1.0"),
    "VersionedCollapsingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_VersionedCollapsingMergeTree("1.0"),
    "GraphiteMergeTree": RQ_SRS_006_RBAC_Privileges_Select_GraphiteMergeTree("1.0"),
 }
@contextmanager
@ -462,7 +477,7 @@ def revoke_privilege_from_role_via_role_with_grant_option(self, table_type, node
    RQ_SRS_006_RBAC_Privileges_Select("1.0"),
 )
@Examples("table_type", [
-    (key,) for key in table_types.keys()
+    (table_type, Requirements(requirement)) for table_type, requirement in table_requirements.items()
 ])
@Name("select")
 def feature(self, table_type, node="clickhouse1"):
`@ -1,2 +1,2 @@`
	`[0,3,2] id2`	`[0,2,3] id2`
	`[3,1,2] id1`	`[1,2,3] id1`
		`@ -0,0 +1 @@`
							`SELECT groupArrayMovingSum(10)(0) FROM remote('127.0.0.{1,2}', numbers(0))`