Merge branch 'master' into crash-log

2024-11-10 09:32:06 +00:00 · 2020-07-31 16:12:53 +03:00 · 2020-07-31 16:12:53 +03:00 · c3ad710b84
commit c3ad710b84
parent 31cbdd1a56 91156bef54
1539 changed files with 50873 additions and 12323 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -49,7 +49,7 @@
 	url = https://github.com/ClickHouse-Extras/boost.git
 [submodule "contrib/base64"]
 	path = contrib/base64
-	url = https://github.com/powturbo/Turbo-Base64.git
+	url = https://github.com/ClickHouse-Extras/Turbo-Base64.git
 [submodule "contrib/arrow"]
 	path = contrib/arrow
 	url = https://github.com/apache/arrow
@ -76,7 +76,7 @@
 	url = https://github.com/google/snappy
 [submodule "contrib/cppkafka"]
 	path = contrib/cppkafka
-	url = https://github.com/ClickHouse-Extras/cppkafka.git
+	url = https://github.com/mfontanini/cppkafka.git
 [submodule "contrib/brotli"]
 	path = contrib/brotli
 	url = https://github.com/google/brotli.git
@ -174,3 +174,9 @@
 [submodule "contrib/sentry-native"]
 	path = contrib/sentry-native
 	url = https://github.com/getsentry/sentry-native.git
+[submodule "contrib/gcem"]
+	path = contrib/gcem
+	url = https://github.com/kthohr/gcem.git
+[submodule "contrib/stats"]
+	path = contrib/stats
+	url = https://github.com/kthohr/stats.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -12,6 +12,20 @@ foreach(policy
    endif()
 endforeach()

+# set default policy
+foreach(default_policy_var_name
+        # make option() honor normal variables for BUILD_SHARED_LIBS:
+        # - re2
+        # - snappy
+        CMAKE_POLICY_DEFAULT_CMP0077
+        # Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should
+        # set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_<CONFIG>)? over
+        # INTERFACE_LINK_LIBRARIES.
+        CMAKE_POLICY_DEFAULT_CMP0022
+    )
+    set(${default_policy_var_name} NEW)
+endforeach()
+
 project(ClickHouse)

 include (cmake/arch.cmake)
@ -273,7 +287,7 @@ endif ()

 include(cmake/dbms_glob_sources.cmake)

-if (OS_LINUX)
+if (OS_LINUX OR OS_ANDROID)
    include(cmake/linux/default_libs.cmake)
 elseif (OS_DARWIN)
    include(cmake/darwin/default_libs.cmake)
@ -364,6 +378,7 @@ include (cmake/find/avro.cmake)
 include (cmake/find/msgpack.cmake)
 include (cmake/find/cassandra.cmake)
 include (cmake/find/sentry.cmake)
+include (cmake/find/stats.cmake)

 find_contrib_lib(cityhash)
 find_contrib_lib(farmhash)
@ -378,10 +393,6 @@ include (cmake/find/mysqlclient.cmake)

 # When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc.

-if (OS_LINUX AND NOT ENABLE_JEMALLOC)
-    message (WARNING "Non default allocator is disabled. This is not recommended for production Linux builds.")
-endif ()
-
 if (USE_OPENCL)
    if (OS_DARWIN)
        set(OPENCL_LINKER_FLAGS "-framework OpenCL")
@ -397,6 +408,10 @@ endif ()

 add_subdirectory (contrib EXCLUDE_FROM_ALL)

+if (NOT ENABLE_JEMALLOC)
+    message (WARNING "Non default allocator is disabled. This is not recommended for production builds.")
+endif ()
+
 macro (add_executable target)
    # invoke built-in add_executable
    # explicitly acquire and interpose malloc symbols by clickhouse_malloc
--- a/README.md
+++ b/README.md
@ -9,12 +9,11 @@ ClickHouse is an open-source column-oriented database management system that all
 * [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information.
 * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
 * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
-* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
+* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
 * [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
 * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
 * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.

 ## Upcoming Events		

-* [ClickHouse for genetic data (in Russian)](https://cloud.yandex.ru/events/152) on July 14, 2020.
-* [ClickHouse virtual office hours](https://www.eventbrite.com/e/clickhouse-july-virtual-meetup-tickets-111199787558) on July 15, 2020.
+* [ClickHouse at ByteDance (in Chinese)](https://mp.weixin.qq.com/s/Em-HjPylO8D7WPui4RREAQ) on July 31, 2020.
--- a/SECURITY.md
+++ b/SECURITY.md
@ -18,5 +18,4 @@ currently being supported with security updates:

 ## Reporting a Vulnerability

-To report a potential vulnerability in ClickHouse please use the security advisory feature of GitHub:
-https://github.com/ClickHouse/ClickHouse/security/advisories
+To report a potential vulnerability in ClickHouse please send the details about it to [clickhouse-feedback@yandex-team.com](mailto:clickhouse-feedback@yandex-team.com).
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@ -49,7 +49,7 @@ public:
    struct Values
    {
        /// Least significat 32 bits from time_t at beginning of the day.
-        /// If the unix timestamp of beginning of the day is negative (example: 1970-01-01 MSK, where time_t == -10800), then value is zero.
+        /// If the unix timestamp of beginning of the day is negative (example: 1970-01-01 MSK, where time_t == -10800), then value will overflow.
        /// Change to time_t; change constants above; and recompile the sources if you need to support time after 2105 year.
        UInt32 date;

@ -686,12 +686,17 @@ public:
    inline time_t makeDateTime(UInt16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
    {
        size_t index = makeDayNum(year, month, day_of_month);
-        time_t time_offset = hour * 3600 + minute * 60 + second;
+        UInt32 time_offset = hour * 3600 + minute * 60 + second;

        if (time_offset >= lut[index].time_at_offset_change)
            time_offset -= lut[index].amount_of_offset_change;

-        return lut[index].date + time_offset;
+        UInt32 res = lut[index].date + time_offset;
+
+        if (unlikely(res > DATE_LUT_MAX))
+            return 0;
+
+        return res;
    }

    inline const Values & getValues(DayNum d) const { return lut[d]; }
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@ -16,6 +16,19 @@ void trim(String & s)
    s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
 }

+// Uses separate replxx::Replxx instance to avoid loading them again in the
+// current context (replxx::Replxx::history_load() will re-load the history
+// from the file), since then they will overlaps with history from the current
+// session (this will make behavior compatible with other interpreters, i.e.
+// bash).
+void history_save(const String & history_file_path, const String & line)
+{
+    replxx::Replxx rx_no_overlap;
+    rx_no_overlap.history_load(history_file_path);
+    rx_no_overlap.history_add(line);
+    rx_no_overlap.history_save(history_file_path);
+}
+
 }

 ReplxxLineReader::ReplxxLineReader(
@ -101,6 +114,10 @@ LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
 void ReplxxLineReader::addToHistory(const String & line)
 {
    // locking history file to prevent from inconsistent concurrent changes
+    //
+    // replxx::Replxx::history_save() already has lockf(),
+    // but replxx::Replxx::history_load() does not
+    // and that is why flock() is added here.
    bool locked = false;
    if (flock(history_file_fd, LOCK_EX))
        rx.print("Lock of history file failed: %s\n", strerror(errno));
@ -110,7 +127,7 @@ void ReplxxLineReader::addToHistory(const String & line)
    rx.history_add(line);

    // flush changes to the disk
-    rx.history_save(history_file_path);
+    history_save(history_file_path, line);

    if (locked && 0 != flock(history_file_fd, LOCK_UN))
        rx.print("Unlock of history file failed: %s\n", strerror(errno));
--- a/base/common/StringRef.h
+++ b/base/common/StringRef.h
@ -30,7 +30,7 @@ struct StringRef
    constexpr StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast<const char *>(data_)), size(size_) {}

    StringRef(const std::string & s) : data(s.data()), size(s.size()) {}
-    constexpr StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {}
+    constexpr explicit StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {}
    constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {}
    constexpr StringRef() = default;

--- a/base/common/getThreadId.cpp
+++ b/base/common/getThreadId.cpp
@ -1,6 +1,9 @@
 #include <common/getThreadId.h>

-#if defined(OS_LINUX)
+#if defined(OS_ANDROID)
+    #include <sys/types.h>
+    #include <unistd.h>
+#elif defined(OS_LINUX)
    #include <unistd.h>
    #include <syscall.h>
 #elif defined(OS_FREEBSD)
@ -16,7 +19,9 @@ uint64_t getThreadId()
 {
    if (!current_tid)
    {
-#if defined(OS_LINUX)
+#if defined(OS_ANDROID)
+        current_tid = gettid();
+#elif defined(OS_LINUX)
        current_tid = syscall(SYS_gettid); /// This call is always successful. - man gettid
 #elif defined(OS_FREEBSD)
        current_tid = pthread_getthreadid_np();
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@ -9,7 +9,6 @@
 #include <string.h>
 #include <signal.h>
 #include <cxxabi.h>
-#include <execinfo.h>
 #include <unistd.h>

 #include <typeinfo>
--- a/benchmark/omnisci/benchmark.sh
+++ b/benchmark/omnisci/benchmark.sh
@ -0,0 +1,17 @@
+#!/bin/bash
+
+grep -v -P '^#' queries.sql | sed -e 's/{table}/hits/' | while read query; do
+
+    echo 3 | sudo tee /proc/sys/vm/drop_caches
+    sudo systemctl restart omnisci_server
+    for i in {1..1000}; do
+        /opt/omnisci/bin/omnisql -t -p HyperInteractive <<< "SELECT 1;" 2>&1 | grep -q '1 rows returned' && break;
+        sleep 0.1;
+    done
+    sleep 10;
+
+    echo "$query";
+    for i in {1..3}; do
+        /opt/omnisci/bin/omnisql -t -p HyperInteractive <<< "$query" 2>&1 | grep -P 'Exception:|Execution time:';
+    done;
+done;
--- a/benchmark/omnisci/instruction.md
+++ b/benchmark/omnisci/instruction.md
@ -0,0 +1,332 @@
+# Instruction to run benchmark for OmniSci on web-analytics dataset
+
+OmniSci (former name "MapD") is open-source (open-core) in-memory analytical DBMS with support for GPU processing.
+It can run on CPU without GPU as well. It can show competitive performance on simple queries (like - simple aggregation on a single column).
+
+# How to install
+
+https://docs.omnisci.com/installation-and-configuration/installation/installing-on-ubuntu
+
+# Caveats
+
+- Dataset (at least needed columns) must fit in memory.
+- It does not support data compression (only dictionary encoding for strings).
+- First query execution is very slow because uncompressed data is read from disk.
+- It does not support index for quick range queries.
+- It does not support NOT NULL for data types.
+- It does not support BLOB.
+- No support for UNSIGNED data type (it's Ok according to SQL standard).
+- Lack of string processing functions.
+- Strings are limited to 32767 bytes.
+- GROUP BY on text data type is supported only if it has dictionary encoding.
+`Exception: Cannot group by string columns which are not dictionary encoded`
+- Some aggregate functions are not supported for strings at all.
+`Aggregate on TEXT is not supported yet.`
+- Sometimes I hit a bug when query is run in infinite loop and does not finish (after retry it's finished successfully).
+- One query executed in hours even with retries.
+- Sorting is slow and disabled with default settings for large resultsets.
+`Exception: Sorting the result would be too slow`
+`Cast from dictionary-encoded string to none-encoded would be slow`
+- There is approximate count distinct function but the precision is not documented.
+
+To enable sorting of large resultsets, see:
+https://stackoverflow.com/questions/62977734/omnissci-sorting-the-result-would-be-too-slow
+
+The list of known issues is here:
+https://github.com/omnisci/omniscidb/issues?q=is%3Aissue+author%3Aalexey-milovidov
+
+# How to prepare data
+
+Download the 100 million rows dataset from here and insert into ClickHouse:
+https://clickhouse.tech/docs/en/getting-started/example-datasets/metrica/
+
+Convert the CREATE TABLE query:
+
+```
+clickhouse-client --query "SHOW CREATE TABLE hits_100m" --format TSVRaw |
+    tr '`' '"' |
+    sed -r -e '
+        s/U?Int64/BIGINT/;
+        s/U?Int32/INTEGER/;
+        s/U?Int16/SMALLINT/;
+        s/U?Int8/TINYINT/;
+        s/DateTime/TIMESTAMP ENCODING FIXED(32)/;
+        s/ Date/ DATE ENCODING DAYS(16)/;
+        s/FixedString\(2\)/TEXT ENCODING DICT(16)/;
+        s/FixedString\(3\)/TEXT ENCODING DICT/;
+        s/FixedString\(\d+\)/TEXT ENCODING DICT/;
+        s/String/TEXT ENCODING DICT/;'
+```
+And cut `ENGINE` part.
+
+The resulting CREATE TABLE query:
+```
+CREATE TABLE hits
+(
+    "WatchID" BIGINT,
+    "JavaEnable" TINYINT,
+    "Title" TEXT ENCODING DICT,
+    "GoodEvent" SMALLINT,
+    "EventTime" TIMESTAMP ENCODING FIXED(32),
+    "EventDate" ENCODING DAYS(16) Date,
+    "CounterID" INTEGER,
+    "ClientIP" INTEGER,
+    "RegionID" INTEGER,
+    "UserID" BIGINT,
+    "CounterClass" TINYINT,
+    "OS" TINYINT,
+    "UserAgent" TINYINT,
+    "URL" TEXT ENCODING DICT,
+    "Referer" TEXT ENCODING DICT,
+    "Refresh" TINYINT,
+    "RefererCategoryID" SMALLINT,
+    "RefererRegionID" INTEGER,
+    "URLCategoryID" SMALLINT,
+    "URLRegionID" INTEGER,
+    "ResolutionWidth" SMALLINT,
+    "ResolutionHeight" SMALLINT,
+    "ResolutionDepth" TINYINT,
+    "FlashMajor" TINYINT,
+    "FlashMinor" TINYINT,
+    "FlashMinor2" TEXT ENCODING DICT,
+    "NetMajor" TINYINT,
+    "NetMinor" TINYINT,
+    "UserAgentMajor" SMALLINT,
+    "UserAgentMinor" TEXT ENCODING DICT(16),
+    "CookieEnable" TINYINT,
+    "JavascriptEnable" TINYINT,
+    "IsMobile" TINYINT,
+    "MobilePhone" TINYINT,
+    "MobilePhoneModel" TEXT ENCODING DICT,
+    "Params" TEXT ENCODING DICT,
+    "IPNetworkID" INTEGER,
+    "TraficSourceID" TINYINT,
+    "SearchEngineID" SMALLINT,
+    "SearchPhrase" TEXT ENCODING DICT,
+    "AdvEngineID" TINYINT,
+    "IsArtifical" TINYINT,
+    "WindowClientWidth" SMALLINT,
+    "WindowClientHeight" SMALLINT,
+    "ClientTimeZone" SMALLINT,
+    "ClientEventTime" TIMESTAMP ENCODING FIXED(32),
+    "SilverlightVersion1" TINYINT,
+    "SilverlightVersion2" TINYINT,
+    "SilverlightVersion3" INTEGER,
+    "SilverlightVersion4" SMALLINT,
+    "PageCharset" TEXT ENCODING DICT,
+    "CodeVersion" INTEGER,
+    "IsLink" TINYINT,
+    "IsDownload" TINYINT,
+    "IsNotBounce" TINYINT,
+    "FUniqID" BIGINT,
+    "OriginalURL" TEXT ENCODING DICT,
+    "HID" INTEGER,
+    "IsOldCounter" TINYINT,
+    "IsEvent" TINYINT,
+    "IsParameter" TINYINT,
+    "DontCountHits" TINYINT,
+    "WithHash" TINYINT,
+    "HitColor" TEXT ENCODING DICT(8),
+    "LocalEventTime" TIMESTAMP ENCODING FIXED(32),
+    "Age" TINYINT,
+    "Sex" TINYINT,
+    "Income" TINYINT,
+    "Interests" SMALLINT,
+    "Robotness" TINYINT,
+    "RemoteIP" INTEGER,
+    "WindowName" INTEGER,
+    "OpenerName" INTEGER,
+    "HistoryLength" SMALLINT,
+    "BrowserLanguage" TEXT ENCODING DICT(16),
+    "BrowserCountry" TEXT ENCODING DICT(16),
+    "SocialNetwork" TEXT ENCODING DICT,
+    "SocialAction" TEXT ENCODING DICT,
+    "HTTPError" SMALLINT,
+    "SendTiming" INTEGER,
+    "DNSTiming" INTEGER,
+    "ConnectTiming" INTEGER,
+    "ResponseStartTiming" INTEGER,
+    "ResponseEndTiming" INTEGER,
+    "FetchTiming" INTEGER,
+    "SocialSourceNetworkID" TINYINT,
+    "SocialSourcePage" TEXT ENCODING DICT,
+    "ParamPrice" BIGINT,
+    "ParamOrderID" TEXT ENCODING DICT,
+    "ParamCurrency" TEXT ENCODING DICT,
+    "ParamCurrencyID" SMALLINT,
+    "OpenstatServiceName" TEXT ENCODING DICT,
+    "OpenstatCampaignID" TEXT ENCODING DICT,
+    "OpenstatAdID" TEXT ENCODING DICT,
+    "OpenstatSourceID" TEXT ENCODING DICT,
+    "UTMSource" TEXT ENCODING DICT,
+    "UTMMedium" TEXT ENCODING DICT,
+    "UTMCampaign" TEXT ENCODING DICT,
+    "UTMContent" TEXT ENCODING DICT,
+    "UTMTerm" TEXT ENCODING DICT,
+    "FromTag" TEXT ENCODING DICT,
+    "HasGCLID" TINYINT,
+    "RefererHash" BIGINT,
+    "URLHash" BIGINT,
+    "CLID" INTEGER
+);
+```
+
+Convert the dataset, prepare the list of fields for SELECT:
+
+```
+clickhouse-client --query "SHOW CREATE TABLE hits_100m" --format TSVRaw |
+    tr '`' '"' |
+    sed -r -e '
+        s/"(\w+)" U?Int([0-9]+)/toInt\2(\1)/;
+        s/"(\w+)" (Fixed)?String(\([0-9]+\))?/toValidUTF8(toString(\1))/;
+        s/"(\w+)" \w+/\1/'
+```
+
+The resulting SELECT query for data preparation:
+
+```
+SELECT
+    toInt64(WatchID),
+    toInt8(JavaEnable),
+    toValidUTF8(toString(Title)),
+    toInt16(GoodEvent),
+    EventTime,
+    EventDate,
+    toInt32(CounterID),
+    toInt32(ClientIP),
+    toInt32(RegionID),
+    toInt64(UserID),
+    toInt8(CounterClass),
+    toInt8(OS),
+    toInt8(UserAgent),
+    toValidUTF8(toString(URL)),
+    toValidUTF8(toString(Referer)),
+    toInt8(Refresh),
+    toInt16(RefererCategoryID),
+    toInt32(RefererRegionID),
+    toInt16(URLCategoryID),
+    toInt32(URLRegionID),
+    toInt16(ResolutionWidth),
+    toInt16(ResolutionHeight),
+    toInt8(ResolutionDepth),
+    toInt8(FlashMajor),
+    toInt8(FlashMinor),
+    toValidUTF8(toString(FlashMinor2)),
+    toInt8(NetMajor),
+    toInt8(NetMinor),
+    toInt16(UserAgentMajor),
+    toValidUTF8(toString(UserAgentMinor)),
+    toInt8(CookieEnable),
+    toInt8(JavascriptEnable),
+    toInt8(IsMobile),
+    toInt8(MobilePhone),
+    toValidUTF8(toString(MobilePhoneModel)),
+    toValidUTF8(toString(Params)),
+    toInt32(IPNetworkID),
+    toInt8(TraficSourceID),
+    toInt16(SearchEngineID),
+    toValidUTF8(toString(SearchPhrase)),
+    toInt8(AdvEngineID),
+    toInt8(IsArtifical),
+    toInt16(WindowClientWidth),
+    toInt16(WindowClientHeight),
+    toInt16(ClientTimeZone),
+    ClientEventTime,
+    toInt8(SilverlightVersion1),
+    toInt8(SilverlightVersion2),
+    toInt32(SilverlightVersion3),
+    toInt16(SilverlightVersion4),
+    toValidUTF8(toString(PageCharset)),
+    toInt32(CodeVersion),
+    toInt8(IsLink),
+    toInt8(IsDownload),
+    toInt8(IsNotBounce),
+    toInt64(FUniqID),
+    toValidUTF8(toString(OriginalURL)),
+    toInt32(HID),
+    toInt8(IsOldCounter),
+    toInt8(IsEvent),
+    toInt8(IsParameter),
+    toInt8(DontCountHits),
+    toInt8(WithHash),
+    toValidUTF8(toString(HitColor)),
+    LocalEventTime,
+    toInt8(Age),
+    toInt8(Sex),
+    toInt8(Income),
+    toInt16(Interests),
+    toInt8(Robotness),
+    toInt32(RemoteIP),
+    toInt32(WindowName),
+    toInt32(OpenerName),
+    toInt16(HistoryLength),
+    toValidUTF8(toString(BrowserLanguage)),
+    toValidUTF8(toString(BrowserCountry)),
+    toValidUTF8(toString(SocialNetwork)),
+    toValidUTF8(toString(SocialAction)),
+    toInt16(HTTPError),
+    toInt32(SendTiming),
+    toInt32(DNSTiming),
+    toInt32(ConnectTiming),
+    toInt32(ResponseStartTiming),
+    toInt32(ResponseEndTiming),
+    toInt32(FetchTiming),
+    toInt8(SocialSourceNetworkID),
+    toValidUTF8(toString(SocialSourcePage)),
+    toInt64(ParamPrice),
+    toValidUTF8(toString(ParamOrderID)),
+    toValidUTF8(toString(ParamCurrency)),
+    toInt16(ParamCurrencyID),
+    toValidUTF8(toString(OpenstatServiceName)),
+    toValidUTF8(toString(OpenstatCampaignID)),
+    toValidUTF8(toString(OpenstatAdID)),
+    toValidUTF8(toString(OpenstatSourceID)),
+    toValidUTF8(toString(UTMSource)),
+    toValidUTF8(toString(UTMMedium)),
+    toValidUTF8(toString(UTMCampaign)),
+    toValidUTF8(toString(UTMContent)),
+    toValidUTF8(toString(UTMTerm)),
+    toValidUTF8(toString(FromTag)),
+    toInt8(HasGCLID),
+    toInt64(RefererHash),
+    toInt64(URLHash),
+    toInt32(CLID)
+FROM hits_100m_obfuscated
+INTO OUTFILE '/home/milovidov/example_datasets/hits_100m_obfuscated.csv'
+FORMAT CSV;
+```
+
+Upload data to OmniSci:
+```
+/opt/omnisci/bin/omnisql -t -p HyperInteractive
+```
+Run CREATE TABLE statement, then run:
+```
+COPY hits FROM '/home/milovidov/example_datasets/hits_100m_obfuscated.csv' WITH (HEADER = 'false');
+```
+
+Data loading took
+```
+336639 ms
+```
+on a server (Linux Ubuntu, Xeon E5-2560v2, 32 logical CPU, 128 GiB RAM, 8xHDD RAID-5, 40 TB).
+
+Run benchmark:
+
+```
+./benchmark.sh
+```
+
+Prepare the result to paste into JSON:
+
+```
+grep -oP 'Total time: \d+' log.txt |
+    grep -oP '\d+' |
+    awk '{
+        if (i % 3 == 0) { a = $1 }
+        else if (i % 3 == 1) { b = $1 }
+        else if (i % 3 == 2) { c = $1; print "[" a / 1000 ", " b / 1000 ", " c / 1000 "]," };
+        ++i; }'
+```
+
+And fill out `[null, null, null]` for missing runs.
--- a/benchmark/omnisci/log.txt
+++ b/benchmark/omnisci/log.txt
@ -0,0 +1,210 @@
+3
+SELECT count(*) FROM hits;
+Execution time: 23471 ms, Total time: 23471 ms
+Execution time: 42 ms, Total time: 43 ms
+Execution time: 35 ms, Total time: 35 ms
+3
+SELECT count(*) FROM hits WHERE AdvEngineID != 0;
+Execution time: 17328 ms, Total time: 17329 ms
+Execution time: 58 ms, Total time: 59 ms
+Execution time: 57 ms, Total time: 59 ms
+3
+SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
+Execution time: 17309 ms, Total time: 17310 ms
+Execution time: 115 ms, Total time: 115 ms
+Execution time: 129 ms, Total time: 130 ms
+3
+SELECT sum(UserID) FROM hits;
+Execution time: 26091 ms, Total time: 26091 ms
+Execution time: 88 ms, Total time: 89 ms
+Execution time: 71 ms, Total time: 72 ms
+3
+SELECT APPROX_COUNT_DISTINCT(UserID) FROM hits;
+Execution time: 21720 ms, Total time: 21720 ms
+Execution time: 364 ms, Total time: 364 ms
+Execution time: 344 ms, Total time: 345 ms
+3
+SELECT APPROX_COUNT_DISTINCT(SearchPhrase) FROM hits;
+Execution time: 19314 ms, Total time: 19315 ms
+Execution time: 385 ms, Total time: 386 ms
+Execution time: 382 ms, Total time: 382 ms
+3
+SELECT min(EventDate), max(EventDate) FROM hits;
+Execution time: 19431 ms, Total time: 19432 ms
+Execution time: 130 ms, Total time: 131 ms
+Execution time: 147 ms, Total time: 148 ms
+3
+SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
+Execution time: 20660 ms, Total time: 20661 ms
+Execution time: 63 ms, Total time: 64 ms
+Execution time: 88 ms, Total time: 89 ms
+3
+SELECT RegionID, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
+Execution time: 21364 ms, Total time: 21472 ms
+Execution time: 1387 ms, Total time: 1504 ms
+Execution time: 1443 ms, Total time: 1505 ms
+3
+SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), APPROX_COUNT_DISTINCT(UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
+Execution time: 22205 ms, Total time: 22285 ms
+Execution time: 1590 ms, Total time: 1655 ms
+Execution time: 1591 ms, Total time: 1658 ms
+3
+SELECT MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
+Execution time: 22343 ms, Total time: 22344 ms
+Execution time: 122 ms, Total time: 123 ms
+Execution time: 117 ms, Total time: 118 ms
+3
+SELECT MobilePhone, MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
+Execution time: 21681 ms, Total time: 21695 ms
+Execution time: 299 ms, Total time: 310 ms
+Execution time: 275 ms, Total time: 292 ms
+3
+SELECT SearchPhrase, count(*) AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+Execution time: 23346 ms, Total time: 23360 ms
+Execution time: 613 ms, Total time: 631 ms
+Execution time: 606 ms, Total time: 624 ms
+3
+SELECT SearchPhrase, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
+Execution time: 66014 ms, Total time: 68618 ms
+Execution time: 44309 ms, Total time: 47296 ms
+Execution time: 44019 ms, Total time: 46866 ms
+3
+SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
+Execution time: 25853 ms, Total time: 25984 ms
+Execution time: 2590 ms, Total time: 2728 ms
+Execution time: 2652 ms, Total time: 2789 ms
+3
+SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
+Execution time: 26581 ms, Total time: 26953 ms
+Execution time: 5843 ms, Total time: 6158 ms
+Execution time: 5970 ms, Total time: 6286 ms
+3
+SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
+Execution time: 33007 ms, Total time: 33581 ms
+Execution time: 9943 ms, Total time: 10509 ms
+Execution time: 9470 ms, Total time: 10047 ms
+3
+SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
+Execution time: 39009 ms, Total time: 39575 ms
+Execution time: 8151 ms, Total time: 8785 ms
+Execution time: 8037 ms, Total time: 8665 ms
+3
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
+Execution time: 56207 ms, Total time: 57764 ms
+Execution time: 26653 ms, Total time: 28199 ms
+Execution time: 25614 ms, Total time: 27336 ms
+3
+SELECT UserID FROM hits WHERE UserID = -6101065172474983726;
+Execution time: 18975 ms, Total time: 18976 ms
+Execution time: 136 ms, Total time: 136 ms
+Execution time: 136 ms, Total time: 136 ms
+3
+SELECT count(*) FROM hits WHERE URL LIKE '%metrika%';
+Execution time: 32444 ms, Total time: 32445 ms
+Execution time: 125 ms, Total time: 126 ms
+Execution time: 134 ms, Total time: 136 ms
+3
+SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+Exception: Aggregate on TEXT is not supported yet.
+Exception: Aggregate on TEXT is not supported yet.
+Exception: Aggregate on TEXT is not supported yet.
+3
+SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, APPROX_COUNT_DISTINCT(UserID) FROM hits WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+Exception: Aggregate on TEXT is not supported yet.
+Exception: Aggregate on TEXT is not supported yet.
+Exception: Aggregate on TEXT is not supported yet.
+3
+SELECT * FROM hits WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
+Execution time: 96163 ms, Total time: 96166 ms
+Execution time: 312 ms, Total time: 314 ms
+Execution time: 303 ms, Total time: 305 ms
+3
+SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
+Execution time: 27493 ms, Total time: 27494 ms
+Execution time: 216 ms, Total time: 216 ms
+Execution time: 221 ms, Total time: 222 ms
+3
+SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
+Execution time: 38230 ms, Total time: 38308 ms
+Execution time: 17175 ms, Total time: 17256 ms
+Execution time: 17225 ms, Total time: 17310 ms
+3
+SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
+Execution time: 115614 ms, Total time: 115714 ms
+Execution time: 95944 ms, Total time: 96041 ms
+Execution time: 94274 ms, Total time: 94383 ms
+3
+SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
+Execution time: 31775 ms, Total time: 31779 ms
+Execution time: 2643 ms, Total time: 2647 ms
+Execution time: 2933 ms, Total time: 2937 ms
+3
+SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
+Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
+Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
+Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
+3
+SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
+Execution time: 28853 ms, Total time: 28854 ms
+Execution time: 5654 ms, Total time: 5655 ms
+Execution time: 5579 ms, Total time: 5581 ms
+3
+SELECT SearchEngineID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
+Execution time: 31694 ms, Total time: 31925 ms
+Execution time: 3872 ms, Total time: 4142 ms
+Execution time: 3928 ms, Total time: 4162 ms
+3
+SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+Execution time: 43690 ms, Total time: 44297 ms
+Execution time: 8221 ms, Total time: 8825 ms
+Execution time: 8115 ms, Total time: 8711 ms
+3
+SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
+Execution time: 29669 ms, Total time: 29715 ms
+Execution time: 1623 ms, Total time: 1669 ms
+Execution time: 1534 ms, Total time: 1586 ms
+3
+SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
+Execution time: 34860 ms, Total time: 35201 ms
+Execution time: 7075 ms, Total time: 7414 ms
+Execution time: 7164 ms, Total time: 7567 ms
+3
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
+Execution time: 26467 ms, Total time: 26724 ms
+Execution time: 5740 ms, Total time: 6026 ms
+Execution time: 5667 ms, Total time: 5920 ms
+3
+SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
+Execution time: 31899 ms, Total time: 31908 ms
+Execution time: 1141 ms, Total time: 1154 ms
+Execution time: 1155 ms, Total time: 1168 ms
+3
+SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
+Execution time: 27991 ms, Total time: 27997 ms
+Execution time: 719 ms, Total time: 724 ms
+Execution time: 737 ms, Total time: 744 ms
+3
+SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
+Execution time: 34651 ms, Total time: 34661 ms
+Execution time: 1182 ms, Total time: 1200 ms
+Execution time: 1142 ms, Total time: 1159 ms
+3
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
+Execution time: 30130 ms, Total time: 30136 ms
+Execution time: 461 ms, Total time: 467 ms
+Execution time: 445 ms, Total time: 451 ms
+3
+SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
+Execution time: 19989 ms, Total time: 19991 ms
+Execution time: 326 ms, Total time: 327 ms
+Execution time: 325 ms, Total time: 326 ms
+3
+SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
+Execution time: 18658 ms, Total time: 18660 ms
+Execution time: 265 ms, Total time: 266 ms
+Execution time: 254 ms, Total time: 255 ms
+3
+SELECT DATE_TRUNC(minute, EventTime) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "Refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC(minute, EventTime) ORDER BY DATE_TRUNC(minute, EventTime);
+Execution time: 25225 ms, Total time: 25227 ms
+Execution time: 210 ms, Total time: 212 ms
+Execution time: 199 ms, Total time: 200 ms
--- a/benchmark/omnisci/queries.sql
+++ b/benchmark/omnisci/queries.sql
@ -0,0 +1,43 @@
+SELECT count(*) FROM {table};
+SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
+SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
+SELECT sum(UserID) FROM {table};
+SELECT APPROX_COUNT_DISTINCT(UserID) FROM {table};
+SELECT APPROX_COUNT_DISTINCT(SearchPhrase) FROM {table};
+SELECT min(EventDate), max(EventDate) FROM {table};
+SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
+SELECT RegionID, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
+SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), APPROX_COUNT_DISTINCT(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
+SELECT MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT MobilePhone, MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
+SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
+SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
+SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
+SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
+SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
+SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, APPROX_COUNT_DISTINCT(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
+SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
+SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
+SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
+SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
+SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
+SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
+SELECT SearchEngineID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+#SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
+SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
+SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
+SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
+SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
+SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
+SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
+SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
+SELECT DATE_TRUNC(minute, EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "Refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC(minute, EventTime) ORDER BY DATE_TRUNC(minute, EventTime);
--- a/cmake/Modules/FindOpenLDAP.cmake
+++ b/cmake/Modules/FindOpenLDAP.cmake
@ -7,7 +7,7 @@
 #
 # Sets values of:
 #   OPENLDAP_FOUND              - TRUE if found
-#   OPENLDAP_INCLUDE_DIR        - path to the include directory
+#   OPENLDAP_INCLUDE_DIRS       - paths to the include directories
 #   OPENLDAP_LIBRARIES          - paths to the libldap and liblber libraries
 #   OPENLDAP_LDAP_LIBRARY       - paths to the libldap library
 #   OPENLDAP_LBER_LIBRARY       - paths to the liblber library
@ -28,11 +28,11 @@ if(OPENLDAP_USE_REENTRANT_LIBS)
 endif()

 if(OPENLDAP_ROOT_DIR)
-    find_path(OPENLDAP_INCLUDE_DIR NAMES "ldap.h" "lber.h" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "include" NO_DEFAULT_PATH)
+    find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "include" NO_DEFAULT_PATH)
    find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH)
    find_library(OPENLDAP_LBER_LIBRARY NAMES "lber" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH)
 else()
-    find_path(OPENLDAP_INCLUDE_DIR NAMES "ldap.h" "lber.h")
+    find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h")
    find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}")
    find_library(OPENLDAP_LBER_LIBRARY NAMES "lber")
 endif()
@ -44,10 +44,10 @@ set(OPENLDAP_LIBRARIES ${OPENLDAP_LDAP_LIBRARY} ${OPENLDAP_LBER_LIBRARY})
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(
    OpenLDAP DEFAULT_MSG
-    OPENLDAP_INCLUDE_DIR OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY
+    OPENLDAP_INCLUDE_DIRS OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY
 )

-mark_as_advanced(OPENLDAP_INCLUDE_DIR OPENLDAP_LIBRARIES OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY)
+mark_as_advanced(OPENLDAP_INCLUDE_DIRS OPENLDAP_LIBRARIES OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY)

 if(OPENLDAP_USE_STATIC_LIBS)
    set(CMAKE_FIND_LIBRARY_SUFFIXES ${_orig_CMAKE_FIND_LIBRARY_SUFFIXES})
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -1,9 +1,9 @@
 # This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54436)
+SET(VERSION_REVISION 54437)
 SET(VERSION_MAJOR 20)
-SET(VERSION_MINOR 6)
+SET(VERSION_MINOR 7)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH efc57fb063b3fb4df968d916720ec4d4ced4642e)
-SET(VERSION_DESCRIBE v20.6.1.1-prestable)
-SET(VERSION_STRING 20.6.1.1)
+SET(VERSION_GITHASH d64e51d1a78c1b53c33915ca0f75c97b2333844f)
+SET(VERSION_DESCRIBE v20.7.1.1-prestable)
+SET(VERSION_STRING 20.7.1.1)
 # end of autochange
--- a/cmake/find/amqpcpp.cmake
+++ b/cmake/find/amqpcpp.cmake
@ -1,4 +1,5 @@
-SET(ENABLE_AMQPCPP ${ENABLE_LIBRARIES})
+option(ENABLE_AMQPCPP "Enalbe AMQP-CPP" ${ENABLE_LIBRARIES})
+
 if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt")
    message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init --recursive")
    set (ENABLE_AMQPCPP 0)
--- a/cmake/find/gtest.cmake
+++ b/cmake/find/gtest.cmake
@ -1,3 +1,7 @@
+option (ENABLE_GTEST_LIBRARY "Enable gtest library" ${ENABLE_LIBRARIES})
+
+if (ENABLE_GTEST_LIBRARY)
+
 option (USE_INTERNAL_GTEST_LIBRARY "Set to FALSE to use system Google Test instead of bundled" ${NOT_UNBUNDLED})

 if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest/CMakeLists.txt")
@ -28,4 +32,6 @@ if((GTEST_INCLUDE_DIRS AND GTEST_BOTH_LIBRARIES) OR GTEST_SRC_DIR)
    set(USE_GTEST 1)
 endif()

+endif()
+
 message (STATUS "Using gtest=${USE_GTEST}: ${GTEST_INCLUDE_DIRS} : ${GTEST_BOTH_LIBRARIES} : ${GTEST_SRC_DIR}")
--- a/cmake/find/ldap.cmake
+++ b/cmake/find/ldap.cmake
@ -16,11 +16,16 @@ if (ENABLE_LDAP)
    set (OPENLDAP_USE_REENTRANT_LIBS 1)

    if (NOT USE_INTERNAL_LDAP_LIBRARY)
-        if (APPLE AND NOT OPENLDAP_ROOT_DIR)
-            set (OPENLDAP_ROOT_DIR "/usr/local/opt/openldap")
-        endif ()
+        if (OPENLDAP_USE_STATIC_LIBS)
+            message (WARNING "Unable to use external static OpenLDAP libraries, falling back to the bundled version.")
+            set (USE_INTERNAL_LDAP_LIBRARY 1)
+        else ()
+            if (APPLE AND NOT OPENLDAP_ROOT_DIR)
+                set (OPENLDAP_ROOT_DIR "/usr/local/opt/openldap")
+            endif ()

-        find_package (OpenLDAP)
+            find_package (OpenLDAP)
+        endif ()
    endif ()

    if (NOT OPENLDAP_FOUND AND NOT MISSING_INTERNAL_LDAP_LIBRARY)
@ -54,7 +59,10 @@ if (ENABLE_LDAP)
        else ()
            set (USE_INTERNAL_LDAP_LIBRARY 1)
            set (OPENLDAP_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/openldap")
-            set (OPENLDAP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openldap/include")
+            set (OPENLDAP_INCLUDE_DIRS
+                "${ClickHouse_SOURCE_DIR}/contrib/openldap-cmake/${_system_name}_${_system_processor}/include"
+                "${ClickHouse_SOURCE_DIR}/contrib/openldap/include"
+            )
            # Below, 'ldap'/'ldap_r' and 'lber' will be resolved to
            # the targets defined in contrib/openldap-cmake/CMakeLists.txt
            if (OPENLDAP_USE_REENTRANT_LIBS)
@ -73,4 +81,4 @@ if (ENABLE_LDAP)
    endif ()
 endif ()

-message (STATUS "Using ldap=${USE_LDAP}: ${OPENLDAP_INCLUDE_DIR} : ${OPENLDAP_LIBRARIES}")
+message (STATUS "Using ldap=${USE_LDAP}: ${OPENLDAP_INCLUDE_DIRS} : ${OPENLDAP_LIBRARIES}")
--- a/cmake/find/libgsasl.cmake
+++ b/cmake/find/libgsasl.cmake
@ -1,3 +1,7 @@
+option(ENABLE_GSASL_LIBRARY "Enable gsasl library" ${ENABLE_LIBRARIES})
+
+if (ENABLE_GSASL_LIBRARY)
+
 option (USE_INTERNAL_LIBGSASL_LIBRARY "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED})

 if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libgsasl/src/gsasl.h")
@ -24,4 +28,6 @@ if(LIBGSASL_LIBRARY AND LIBGSASL_INCLUDE_DIR)
    set (USE_LIBGSASL 1)
 endif()

+endif()
+
 message (STATUS "Using libgsasl=${USE_LIBGSASL}: ${LIBGSASL_INCLUDE_DIR} : ${LIBGSASL_LIBRARY}")
--- a/cmake/find/msgpack.cmake
+++ b/cmake/find/msgpack.cmake
@ -1,3 +1,7 @@
+option (ENABLE_MSGPACK "Enable msgpack library" ${ENABLE_LIBRARIES})
+
+if (ENABLE_MSGPACK)
+
 option (USE_INTERNAL_MSGPACK_LIBRARY "Set to FALSE to use system msgpack library instead of bundled" ${NOT_UNBUNDLED})

 if (USE_INTERNAL_MSGPACK_LIBRARY)
@ -14,4 +18,10 @@ else()
    find_path(MSGPACK_INCLUDE_DIR NAMES msgpack.hpp PATHS ${MSGPACK_INCLUDE_PATHS})
 endif()

-message(STATUS "Using msgpack: ${MSGPACK_INCLUDE_DIR}")
+if (MSGPACK_INCLUDE_DIR)
+    set(USE_MSGPACK 1)
+endif()
+
+endif()
+
+message(STATUS "Using msgpack=${USE_MSGPACK}: ${MSGPACK_INCLUDE_DIR}")
--- a/cmake/find/simdjson.cmake
+++ b/cmake/find/simdjson.cmake
@ -1,17 +1,8 @@
-if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson/jsonparser.h")
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson.h")
    message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init --recursive")
    return()
 endif ()

-if (NOT HAVE_SSE42)
-    message (WARNING "submodule contrib/simdjson requires support of SSE4.2 instructions")
-    return()
-elseif (NOT HAVE_PCLMULQDQ)
-    message (WARNING "submodule contrib/simdjson requires support of PCLMULQDQ instructions")
-    return()
-endif ()
-
 option (USE_SIMDJSON "Use simdjson" ON)
-set (SIMDJSON_LIBRARY "simdjson")

-message(STATUS "Using simdjson=${USE_SIMDJSON}: ${SIMDJSON_LIBRARY}")
+message(STATUS "Using simdjson=${USE_SIMDJSON}")
--- a/cmake/find/stats.cmake
+++ b/cmake/find/stats.cmake
@ -0,0 +1,20 @@
+option(ENABLE_STATS "Enalbe StatsLib library" ${ENABLE_LIBRARIES})
+
+if (ENABLE_STATS)
+    if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/stats")
+        message (WARNING "submodule contrib/stats is missing. to fix try run: \n git submodule update --init --recursive")
+        set (ENABLE_STATS 0)
+        set (USE_STATS 0)
+    elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/gcem")
+        message (WARNING "submodule contrib/gcem is missing. to fix try run: \n git submodule update --init --recursive")
+        set (ENABLE_STATS 0)
+        set (USE_STATS 0)
+    else()
+        set(STATS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/stats/include)
+        set(GCEM_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/gcem/include)
+        set (USE_STATS 1)
+    endif()
+endif()
+
+message (STATUS "Using stats=${USE_STATS} : ${STATS_INCLUDE_DIR}")
+message (STATUS "Using gcem=${USE_STATS}: ${GCEM_INCLUDE_DIR}")
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@ -11,7 +11,12 @@ else ()
    set (BUILTINS_LIBRARY "-lgcc")
 endif ()

+if (OS_ANDROID)
+# pthread and rt are included in libc
+set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl")
+else ()
 set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
+endif ()

 message(STATUS "Default libraries: ${DEFAULT_LIBS}")

@ -35,7 +40,11 @@ add_library(global-libs INTERFACE)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)

-add_subdirectory(base/glibc-compatibility)
+if (NOT OS_ANDROID)
+    # Our compatibility layer doesn't build under Android, many errors in musl.
+    add_subdirectory(base/glibc-compatibility)
+endif ()
+
 include (cmake/find/unwind.cmake)
 include (cmake/find/cxx.cmake)

--- a/cmake/target.cmake
+++ b/cmake/target.cmake
@ -1,6 +1,11 @@
 if (CMAKE_SYSTEM_NAME MATCHES "Linux")
    set (OS_LINUX 1)
    add_definitions(-D OS_LINUX)
+elseif (CMAKE_SYSTEM_NAME MATCHES "Android")
+    # This is a toy configuration and not in CI, so expect it to be broken.
+    # Use cmake flags such as: -DCMAKE_TOOLCHAIN_FILE=~/ch2/android-ndk-r21d/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=28
+    set (OS_ANDROID 1)
+    add_definitions(-D OS_ANDROID)
 elseif (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
    set (OS_FREEBSD 1)
    add_definitions(-D OS_FREEBSD)
@ -17,7 +22,7 @@ if (CMAKE_CROSSCOMPILING)
        set (ENABLE_PARQUET OFF CACHE INTERNAL "")
        set (ENABLE_ICU OFF CACHE INTERNAL "")
        set (ENABLE_FASTOPS OFF CACHE INTERNAL "")
-    elseif (OS_LINUX)
+    elseif (OS_LINUX OR OS_ANDROID)
        if (ARCH_AARCH64)
            # FIXME: broken dependencies
            set (ENABLE_PROTOBUF OFF CACHE INTERNAL "")
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@ -22,7 +22,7 @@ elseif (COMPILER_CLANG)
        if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${APPLE_CLANG_MINIMUM_VERSION})
            message (FATAL_ERROR "AppleClang compiler version must be at least ${APPLE_CLANG_MINIMUM_VERSION} (Xcode ${XCODE_MINIMUM_VERSION}).")
        elseif (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.0.0)
-            # char8_t is available staring (upstream vanilla) Clang 7, but prior to Clang 8,
+            # char8_t is available starting (upstream vanilla) Clang 7, but prior to Clang 8,
            # it is not enabled by -std=c++20 and can be enabled with an explicit -fchar8_t.
            set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fchar8_t")
            set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fchar8_t")
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -44,13 +44,8 @@ endif ()

 if (USE_INTERNAL_RE2_LIBRARY)
    set(RE2_BUILD_TESTING 0 CACHE INTERNAL "")
-    function(re2_support)
-        # make option() honor normal variables for BUILD_SHARED_LIBS
-        set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
-        add_subdirectory (re2)
-        add_subdirectory (re2_st)
-    endfunction()
-    re2_support()
+    add_subdirectory (re2)
+    add_subdirectory (re2_st)
 endif ()

 if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
@ -107,7 +102,7 @@ if (USE_INTERNAL_SSL_LIBRARY)
    add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY})
 endif ()

-if (ENABLE_LDAP AND USE_INTERNAL_LDAP_LIBRARY)
+if (USE_INTERNAL_LDAP_LIBRARY)
    add_subdirectory (openldap-cmake)
 endif ()

@ -227,19 +222,11 @@ if (USE_INTERNAL_AVRO_LIBRARY)
 endif()

 if(USE_INTERNAL_GTEST_LIBRARY)
-    # Wrap into function because of CMAKE_POLICY_DEFAULT_CMP0022
-    function(googletest_support)
-        set(GOOGLETEST_VERSION 1.10.0) # master
-        # Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should
-        # set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_<CONFIG>)? over
-        # INTERFACE_LINK_LIBRARIES.
-        set(CMAKE_POLICY_DEFAULT_CMP0022 NEW)
-        # Google Test from sources
-        add_subdirectory(${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
-        # avoid problems with <regexp.h>
-        target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0)
-    endfunction()
-    googletest_support()
+    set(GOOGLETEST_VERSION 1.10.0) # master
+    # Google Test from sources
+    add_subdirectory(${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
+    # avoid problems with <regexp.h>
+    target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0)
 elseif(GTEST_SRC_DIR)
    add_subdirectory(${GTEST_SRC_DIR}/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
    target_compile_definitions(gtest INTERFACE GTEST_HAS_POSIX_RE=0)
@ -320,3 +307,7 @@ endif()

 add_subdirectory (fmtlib-cmake)

+if (USE_STATS)
+    add_subdirectory (stats-cmake)
+    add_subdirectory (gcem)
+endif()
--- a/contrib/amqpcpp-cmake/CMakeLists.txt
+++ b/contrib/amqpcpp-cmake/CMakeLists.txt
@ -24,7 +24,7 @@ set (SRCS
 add_library(amqp-cpp ${SRCS})

 target_compile_options (amqp-cpp
-    PUBLIC
+    PRIVATE
        -Wno-old-style-cast
        -Wno-inconsistent-missing-destructor-override
        -Wno-deprecated
@ -38,7 +38,7 @@ target_compile_options (amqp-cpp
        -w
 )

-target_include_directories (amqp-cpp PUBLIC ${LIBRARY_DIR}/include)
+target_include_directories (amqp-cpp SYSTEM PUBLIC ${LIBRARY_DIR}/include)

 target_link_libraries (amqp-cpp PUBLIC ssl)

--- a/contrib/arrow-cmake/cpp/src/arrow/util/config.h
+++ b/contrib/arrow-cmake/cpp/src/arrow/util/config.h
@ -20,5 +20,7 @@
 #define ARROW_VERSION_PATCH 
 #define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH

-/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */
+#define ARROW_SO_VERSION ""
+#define ARROW_FULL_SO_VERSION ""
+
 /* #undef GRPCPP_PP_INCLUDE */
--- a/contrib/base64
+++ b/contrib/base64
@ -1 +1 @@
-Subproject commit 95ba56a9b041f9933f5cd2bbb2ee4e083468c20a
+Subproject commit af9b331f2b4f30b41c70f3a571ff904a8251c1d3
--- a/contrib/cppkafka
+++ b/contrib/cppkafka
@ -1 +1 @@
-Subproject commit f555ee36aaa74d17ca0dab3ce472070a610b2966
+Subproject commit b06e64ef5bffd636d918a742c689f69130c1dbab
--- a/contrib/fmtlib
+++ b/contrib/fmtlib
@ -1 +1 @@
-Subproject commit 297c3b2ed551a4989826fc8c4780bf533e964bd9
+Subproject commit c108ee1d590089ccf642fc85652b845924067af2
--- a/contrib/gcem
+++ b/contrib/gcem
@ -0,0 +1 @@
+Subproject commit 8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00
--- a/contrib/libhdfs3
+++ b/contrib/libhdfs3
@ -1 +1 @@
-Subproject commit e2131aa752d7e95441e08f9a18304c1445f2576a
+Subproject commit 1b666578c85094306b061352078022f6350bfab8
--- a/contrib/simdjson
+++ b/contrib/simdjson
@ -1 +1 @@
-Subproject commit 560f0742cc0895d00d78359dbdeb82064a24adb8
+Subproject commit 1e4aa116e5a39e4ba23b9a93e6c7f048c5105b20
--- a/contrib/simdjson-cmake/CMakeLists.txt
+++ b/contrib/simdjson-cmake/CMakeLists.txt
@ -1,14 +1,6 @@
 set(SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
-set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src")
-set(SIMDJSON_SRC
-    ${SIMDJSON_SRC_DIR}/document.cpp
-    ${SIMDJSON_SRC_DIR}/error.cpp
-    ${SIMDJSON_SRC_DIR}/implementation.cpp
-    ${SIMDJSON_SRC_DIR}/jsonioutil.cpp
-    ${SIMDJSON_SRC_DIR}/jsonminifier.cpp
-    ${SIMDJSON_SRC_DIR}/stage1_find_marks.cpp
-    ${SIMDJSON_SRC_DIR}/stage2_build_tape.cpp
-)
+set(SIMDJSON_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/src")
+set(SIMDJSON_SRC ${SIMDJSON_SRC_DIR}/simdjson.cpp)

-add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_SRC})
-target_include_directories(${SIMDJSON_LIBRARY} SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
+add_library(simdjson ${SIMDJSON_SRC})
+target_include_directories(simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
--- a/contrib/stats
+++ b/contrib/stats
@ -0,0 +1 @@
+Subproject commit b6dd459c10a88c7ea04693c007e9e35820c5d9ad
--- a/contrib/stats-cmake/CMakeLists.txt
+++ b/contrib/stats-cmake/CMakeLists.txt
@ -0,0 +1,9 @@
+# The stats is a header-only library of probability density functions,
+# cumulative distribution functions, quantile functions, and random sampling methods.
+set(STATS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/stats/include)
+set(GCEM_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/gcem/include)
+
+add_library(stats INTERFACE)
+
+target_include_directories(stats SYSTEM INTERFACE ${STATS_INCLUDE_DIR})
+target_include_directories(stats SYSTEM INTERFACE ${GCEM_INCLUDE_DIR})
--- a/debian/changelog
+++ b/debian/changelog
@ -1,5 +1,5 @@
-clickhouse (20.6.1.1) unstable; urgency=low
+clickhouse (20.7.1.1) unstable; urgency=low

  * Modified source code

- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Mon, 22 Jun 2020 20:40:23 +0300
+ -- clickhouse-release <clickhouse-release@yandex-team.ru>  Mon, 13 Jul 2020 18:25:58 +0300
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.6.1.*
+ARG version=20.7.1.*

 RUN apt-get update \
    && apt-get install --yes --no-install-recommends \
--- a/docker/images.json
+++ b/docker/images.json
@ -31,6 +31,10 @@
        "name": "yandex/clickhouse-integration-test",
        "dependent": []
    },
+    "docker/test/fuzzer": {
+        "name": "yandex/clickhouse-fuzzer",
+        "dependent": []
+    },
    "docker/test/performance-comparison": {
        "name": "yandex/clickhouse-performance-comparison",
        "dependent": []
@ -83,5 +87,21 @@
    "docker/test/testflows/runner": {
        "name": "yandex/clickhouse-testflows-runner",
        "dependent": []
+    },
+    "docker/test/fasttest": {
+        "name": "yandex/clickhouse-fasttest",
+        "dependent": []
+    },
+    "docker/test/integration/s3_proxy": {
+        "name": "yandex/clickhouse-s3-proxy",
+        "dependent": []
+    },
+    "docker/test/integration/resolver": {
+        "name": "yandex/clickhouse-python-bottle",
+        "dependent": []
+    },
+    "docker/test/integration/helper_container": {
+        "name": "yandex/clickhouse-integration-helper",
+        "dependent": []
    }
 }
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@ -33,6 +33,25 @@ then
    rm /output/clickhouse-odbc-bridge ||:

    cp -r ../docker/test/performance-comparison /output/scripts ||:
+
+    # We have to know the revision that corresponds to this binary build.
+    # It is not the nominal SHA from pull/*/head, but the pull/*/merge, which is
+    # head merged to master by github, at some point after the PR is updated.
+    # There are some quirks to consider:
+    # - apparently the real SHA is not recorded in system.build_options;
+    # - it can change at any time as github pleases, so we can't just record
+    #   the SHA and use it later, it might become inaccessible;
+    # - CI has an immutable snapshot of repository that it uses for all checks
+    #   for a given nominal SHA, but it is not accessible outside Yandex.
+    # This is why we add this repository snapshot from CI to the performance test
+    # package.
+    mkdir /output/ch
+    git -C /output/ch init --bare
+    git -C /output/ch remote add origin /build
+    git -C /output/ch fetch --no-tags --depth 50 origin HEAD:pr
+    git -C /output/ch fetch --no-tags --depth 50 origin master:master
+    git -C /output/ch reset --soft pr
+    git -C /output/ch log -5
 fi

 # May be set for split build or for performance test.
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:20.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.6.1.*
+ARG version=20.7.1.*
 ARG gosu_ver=1.10

 RUN apt-get update \
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.6.1.*
+ARG version=20.7.1.*

 RUN apt-get update && \
    apt-get install -y apt-transport-https dirmngr && \
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@ -0,0 +1,65 @@
+#  docker build -t yandex/clickhouse-fasttest .
+FROM ubuntu:19.10
+
+ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
+ENV COMMIT_SHA=''
+ENV PULL_REQUEST_NUMBER=''
+
+RUN apt-get --allow-unauthenticated update -y && apt-get install --yes wget gnupg
+RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
+RUN echo "deb [trusted=yes] http://apt.llvm.org/eoan/ llvm-toolchain-eoan-10 main" >> /etc/apt/sources.list
+
+
+RUN apt-get --allow-unauthenticated update -y \
+  && env DEBIAN_FRONTEND=noninteractive \
+      apt-get --allow-unauthenticated install --yes --no-install-recommends \
+          bash \
+          fakeroot \
+          ccache \
+          software-properties-common \
+          apt-transport-https \
+          ca-certificates \
+          wget \
+          bash \
+          fakeroot \
+          cmake \
+          ccache \
+          llvm-10 \
+          clang-10 \
+          lld-10 \
+          clang-tidy-10 \
+          ninja-build \
+          gperf \
+          git \
+          tzdata \
+          gperf \
+          rename \
+          build-essential \
+          expect \
+          python \
+          python-lxml \
+          python-termcolor \
+          python-requests \
+          unixodbc \
+          qemu-user-static \
+          sudo \
+          moreutils \
+          curl \
+          brotli
+
+RUN mkdir -p /tmp/clickhouse-odbc-tmp \
+  && wget --quiet -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
+  && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
+  && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
+  && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
+  && rm -rf /tmp/clickhouse-odbc-tmp
+
+# This symlink required by gcc to find lld compiler
+RUN ln -s /usr/bin/lld-10 /usr/bin/ld.lld
+
+ENV TZ=Europe/Moscow
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+
+COPY run.sh /
+CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -0,0 +1,134 @@
+#!/bin/bash
+
+set -x -e
+
+ls -la
+
+git clone https://github.com/ClickHouse/ClickHouse.git | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/clone_log.txt
+cd ClickHouse
+CLICKHOUSE_DIR=`pwd`
+
+
+if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
+    if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
+        git checkout FETCH_HEAD
+        echo 'Clonned merge head'
+    else
+        git fetch
+        git checkout $COMMIT_SHA
+        echo 'Checked out to commit'
+    fi
+else
+    if [ "$COMMIT_SHA" != "" ]; then
+        git checkout $COMMIT_SHA
+    fi
+fi
+
+SUBMODULES_TO_UPDATE="contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11"
+
+git submodule update --init --recursive $SUBMODULES_TO_UPDATE | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/submodule_log.txt
+
+export CMAKE_LIBS_CONFIG="-DENABLE_LIBRARIES=0 -DENABLE_TESTS=0 -DENABLE_UTILS=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_THINLTO=0 -DUSE_UNWIND=1"
+
+export CCACHE_DIR=/ccache
+export CCACHE_BASEDIR=/ClickHouse
+export CCACHE_NOHASHDIR=true
+export CCACHE_COMPILERCHECK=content
+export CCACHE_MAXSIZE=15G
+
+ccache --show-stats ||:
+ccache --zero-stats ||:
+
+mkdir build
+cd build
+CLICKHOUSE_BUILD_DIR=`pwd`
+cmake .. -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 $CMAKE_LIBS_CONFIG | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/cmake_log.txt
+ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/build_log.txt
+ninja install | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/install_log.txt
+
+
+ccache --show-stats ||:
+
+mkdir -p /etc/clickhouse-server
+mkdir -p /etc/clickhouse-client
+mkdir -p /etc/clickhouse-server/config.d
+mkdir -p /etc/clickhouse-server/users.d
+mkdir -p /var/log/clickhouse-server
+cp $CLICKHOUSE_DIR/programs/server/config.xml /etc/clickhouse-server/
+cp $CLICKHOUSE_DIR/programs/server/users.xml /etc/clickhouse-server/
+
+mkdir -p /etc/clickhouse-server/dict_examples
+ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/
+ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/
+ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/
+ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
+#ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
+ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
+
+clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
+
+until clickhouse-client --query "SELECT 1"
+do
+    sleep 0.1
+done
+
+TESTS_TO_SKIP="parquet avro h3 odbc mysql sha256 _orc_ arrow 01098_temporary_and_external_tables 01083_expressions_in_engine_arguments hdfs 00911_tautological_compare protobuf capnproto java_hash hashing secure 00490_special_line_separators_and_characters_outside_of_bmp 00436_convert_charset 00105_shard_collations 01354_order_by_tuple_collate_const 01292_create_user 01098_msgpack_format 00929_multi_match_edit_distance 00926_multimatch 00834_cancel_http_readonly_queries_on_client_close brotli parallel_alter 00302_http_compression 00417_kill_query 01294_lazy_database_concurrent 01193_metadata_loading base64 01031_mutations_interpreter_and_context json client 01305_replica_create_drop_zookeeper 01092_memory_profiler 01355_ilike 01281_unsucceeded_insert_select_queries_counter live_view limit_memory memory_limit memory_leak 00110_external_sort 00682_empty_parts_merge 00701_rollup 00109_shard_totals_after_having ddl_dictionaries 01251_dict_is_in_infinite_loop 01259_dictionary_custom_settings_ddl 01268_dictionary_direct_layout 01280_ssd_complex_key_dictionary 00652_replicated_mutations_zookeeper 01411_bayesian_ab_testing"
+
+clickhouse-test -j 4 --no-long --testname --shard --zookeeper --skip $TESTS_TO_SKIP 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/test_log.txt
+
+
+kill_clickhouse () {
+    kill `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'` 2>/dev/null
+
+    for i in {1..10}
+    do
+        if ! kill -0 `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'`; then
+            echo "No clickhouse process"
+            break
+        else
+            echo "Process" `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'` "still alive"
+            sleep 10
+        fi
+    done
+}
+
+
+FAILED_TESTS=`grep 'FAIL\|TIMEOUT\|ERROR' /test_output/test_log.txt | awk 'BEGIN { ORS=" " }; { print substr($3, 1, length($3)-1) }'`
+
+
+if [[ ! -z "$FAILED_TESTS" ]]; then
+    kill_clickhouse
+
+    clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
+
+    until clickhouse-client --query "SELECT 1"
+    do
+        sleep 0.1
+    done
+
+    echo "Going to run again: $FAILED_TESTS"
+
+    clickhouse-test --no-long --testname --shard --zookeeper $FAILED_TESTS 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_output/test_log.txt
+else
+    echo "No failed tests"
+fi
+
+mv /var/log/clickhouse-server/* /test_output
--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@ -0,0 +1,38 @@
+# docker build -t yandex/clickhouse-fuzzer .
+FROM ubuntu:18.04
+
+ENV LANG=C.UTF-8
+ENV TZ=Europe/Moscow
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
+            bash \
+            ca-certificates \
+            curl \
+            gdb \
+            git \
+            libc6-dbg \
+            moreutils \
+            ncdu \
+            p7zip-full \
+            parallel \
+            psmisc \
+            rsync \
+            tree \
+            tzdata \
+            vim \
+            wget \
+    && apt-get autoremove --yes \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY * /
+
+SHELL ["/bin/bash", "-c"]
+CMD set -o pipefail \
+    && cd /workspace \
+    && /run-fuzzer.sh 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log
+
+# docker run --network=host --volume <workspace>:/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-fuzzer
+
--- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
+++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
@ -0,0 +1,7 @@
+<yandex>
+    <profiles>
+        <default>
+            <max_execution_time>10</max_execution_time>
+        </default>
+    </profiles>
+</yandex>
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -0,0 +1,179 @@
+#!/bin/bash
+set -eux
+set -o pipefail
+trap "exit" INT TERM
+trap 'kill $(jobs -pr) ||:' EXIT
+
+stage=${stage:-}
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+echo "$script_dir"
+repo_dir=ch
+
+function clone
+{
+(
+    rm -rf ch ||:
+    mkdir ch
+    cd ch
+
+    git init
+    git remote add origin https://github.com/ClickHouse/ClickHouse
+    git fetch --depth=1 origin "$SHA_TO_TEST"
+
+    # If not master, try to fetch pull/.../{head,merge}
+    if [ "$PR_TO_TEST" != "0" ]
+    then
+        git fetch --depth=1 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
+    fi
+
+    git checkout "$SHA_TO_TEST"
+)
+}
+
+function download
+{
+#    wget -O- -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/performance/performance.tgz" \
+#        | tar --strip-components=1 -zxv
+
+    wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-10_debug_none_bundled_unsplitted_disable_False_binary/clickhouse"
+    chmod +x clickhouse
+    ln -s ./clickhouse ./clickhouse-server
+    ln -s ./clickhouse ./clickhouse-client
+}
+
+function configure
+{
+    rm -rf db ||:
+    mkdir db ||:
+    cp -av "$repo_dir"/programs/server/config* db
+    cp -av "$repo_dir"/programs/server/user* db
+    # TODO figure out which ones are needed
+    cp -av "$repo_dir"/tests/config/listen.xml db/config.d
+    cp -av "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
+}
+
+function watchdog
+{
+    sleep 3600
+
+    echo "Fuzzing run has timed out"
+    killall clickhouse-client ||:
+    for x in {1..10}
+    do
+        if ! pgrep -f clickhouse-client
+        then
+            break
+        fi
+        sleep 1
+    done
+
+    killall -9 clickhouse-client ||:
+}
+
+function fuzz
+{
+    ./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -10000 > server.log &
+    server_pid=$!
+    kill -0 $server_pid
+    while ! ./clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
+    ./clickhouse-client --query "select 1"
+    kill -0 $server_pid
+    echo Server started
+
+    fuzzer_exit_code=0
+    ./clickhouse-client --query-fuzzer-runs=1000 \
+        < <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \
+        > >(tail -10000 > fuzzer.log) \
+        2>&1 \
+        || fuzzer_exit_code=$?
+    
+    echo "Fuzzer exit code is $fuzzer_exit_code"
+
+    ./clickhouse-client --query "select elapsed, query from system.processes" ||:
+    killall clickhouse-server ||:
+    for x in {1..10}
+    do
+        if ! pgrep -f clickhouse-server
+        then
+            break
+        fi
+        sleep 1
+    done
+    killall -9 clickhouse-server ||:
+}
+
+case "$stage" in
+"")
+    ;&
+"clone")
+    time clone
+    if [ -v FUZZ_LOCAL_SCRIPT ]
+    then
+        # just fall through
+        echo Using the testing script from docker container
+        :
+    else
+        # Run the testing script from the repository
+        echo Using the testing script from the repository
+        export stage=download
+        time ch/docker/test/fuzzer/run-fuzzer.sh
+        # Keep the error code
+        exit $?
+    fi
+    ;&
+"download")
+    time download
+    ;&
+"configure")
+    time configure
+    ;&
+"fuzz")
+    # Start a watchdog that should kill the fuzzer on timeout.
+    # The shell won't kill the child sleep when we kill it, so we have to put it
+    # into a separate process group so that we can kill them all.
+    set -m
+    watchdog &
+    watchdog_pid=$!
+    set +m
+    # Check that the watchdog has started
+    kill -0 $watchdog_pid
+
+    fuzzer_exit_code=0
+    time fuzz || fuzzer_exit_code=$?
+    kill -- -$watchdog_pid ||:
+
+    # Debug
+    date
+    sleep 10
+    jobs
+    pstree -aspgT
+
+    # Make files with status and description we'll show for this check on Github
+    task_exit_code=$fuzzer_exit_code
+    if [ "$fuzzer_exit_code" == 143 ]
+    then
+        # SIGTERM -- the fuzzer was killed by timeout, which means a normal run.
+        echo "success" > status.txt
+        echo "OK" > description.txt
+        task_exit_code=0
+    elif [ "$fuzzer_exit_code" == 210 ]
+    then
+        # Lost connection to the server. This probably means that the server died
+        # with abort.
+        echo "failure" > status.txt
+        if ! grep -a "Received signal \|Logical error" server.log > description.txt
+        then
+            echo "Lost connection to server. See the logs" > description.txt
+        fi
+    else
+        # Something different -- maybe the fuzzer itself died? Don't grep the
+        # server log in this case, because we will find a message about normal
+        # server termination (Received signal 15), which is confusing.
+        echo "failure" > status.txt
+        echo "Fuzzer failed ($fuzzer_exit_code). See the logs" > description.txt
+    fi
+
+    exit $task_exit_code
+    ;&
+esac
+
--- a/docker/test/integration/base/Dockerfile
+++ b/docker/test/integration/base/Dockerfile
@ -25,7 +25,7 @@ RUN rm -rf \
 RUN apt-get clean

 # Install MySQL ODBC driver
-RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.18-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.18-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
+RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so

 ENV TZ=Europe/Moscow
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
--- a/docker/test/integration/helper_container/Dockerfile
+++ b/docker/test/integration/helper_container/Dockerfile
@ -1,3 +1,4 @@
+# docker build -t yandex/clickhouse-integration-helper .
 # Helper docker container to run iptables without sudo

 FROM alpine
--- a/docker/test/integration/resolver/Dockerfile
+++ b/docker/test/integration/resolver/Dockerfile
@ -1,4 +1,5 @@
+# docker build -t yandex/clickhouse-python-bottle .
 # Helper docker container to run python bottle apps

 FROM python:3
-RUN python -m pip install bottle
+RUN python -m pip install bottle
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -76,4 +76,3 @@ VOLUME /var/lib/docker
 EXPOSE 2375
 ENTRYPOINT ["dockerd-entrypoint.sh"]
 CMD ["sh", "-c", "pytest $PYTEST_OPTS"]
-
--- a/docker/test/integration/runner/compose/docker_compose_minio.yml
+++ b/docker/test/integration/runner/compose/docker_compose_minio.yml
@ -5,50 +5,38 @@ services:
    image: minio/minio
    volumes:
      - data1-1:/data1
+      - ${MINIO_CERTS_DIR:-}:/certs
    ports:
      - "9001:9001"
    environment:
      MINIO_ACCESS_KEY: minio
      MINIO_SECRET_KEY: minio123
-    command: server --address :9001 /data1-1
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:9001/minio/health/live"]
-      interval: 30s
-      timeout: 20s
-      retries: 3
+      MINIO_PROMETHEUS_AUTH_TYPE: public
+    command: server --address :9001 --certs-dir /certs /data1-1
    depends_on:
-      - redirect
-      - resolver
+      - proxy1
+      - proxy2

-# Redirects all requests to origin Minio.
-  redirect:
-    image: schmunk42/nginx-redirect
-    volumes:
-      - /nginx:/nginx
-    environment:
-      - SERVER_REDIRECT=minio1:9001
-      - SERVER_REDIRECT_CODE=307
-      - SERVER_ACCESS_LOG=/nginx/access.log
-
-# HTTP proxies for Minio.
+  # HTTP proxies for Minio.
  proxy1:
-    image: vimagick/tinyproxy
+    image: yandex/clickhouse-s3-proxy
    ports:
-      - "4081:8888"
+      - "8080" # Redirect proxy port
+      - "80"   # Reverse proxy port
+      - "443"  # Reverse proxy port (secure)

  proxy2:
-    image: vimagick/tinyproxy
+    image: yandex/clickhouse-s3-proxy
    ports:
-      - "4082:8888"
+      - "8080"
+      - "80"
+      - "443"

-# Empty container to run proxy resolver.
+  # Empty container to run proxy resolver.
  resolver:
-    build:
-      context: ../../../docker/test/integration/
-      dockerfile: resolver/Dockerfile
-      network: host
+    image: yandex/clickhouse-python-bottle
    ports:
-      - "4083:8080"
+      - "8080"
    tty: true
    depends_on:
      - proxy1
--- a/docker/test/integration/s3_proxy/Dockerfile
+++ b/docker/test/integration/s3_proxy/Dockerfile
@ -0,0 +1,11 @@
+# docker build -t yandex/clickhouse-s3-proxy .
+FROM nginx:alpine
+
+COPY run.sh /run.sh
+COPY server.crt /etc/ssl/certs/server.crt
+COPY server.key /etc/ssl/certs/server.key
+COPY nginx.conf /etc/nginx/nginx.conf
+
+RUN chmod +x /run.sh
+
+CMD ["/run.sh"]
--- a/docker/test/integration/s3_proxy/nginx.conf
+++ b/docker/test/integration/s3_proxy/nginx.conf
@ -0,0 +1,59 @@
+events {
+	use epoll;
+	worker_connections 128;
+}
+
+http {
+	# Docker DNS resolver
+	resolver 127.0.0.11;
+
+	map $http_x_forwarded_proto $redirect_scheme {
+		default $scheme;
+		https https;
+	}
+
+	# Redirect proxy
+	server {
+		listen 8080;
+		server_name proxy1 proxy2;
+
+		# To allow special characters in headers
+		ignore_invalid_headers off;
+
+		return 307 $redirect_scheme://${S3_HOST}:${S3_PORT}$request_uri;
+	}
+
+	# Reverse proxy
+	server {
+		listen 80;
+		listen 443 ssl;
+		server_name proxy1 proxy2;
+
+		ssl_certificate /etc/ssl/certs/server.crt;
+		ssl_certificate_key /etc/ssl/certs/server.key;
+
+		# To allow special characters in headers
+		ignore_invalid_headers off;
+		# Allow any size file to be uploaded.
+		# Set to a value such as 1000m; to restrict file size to a specific value
+		client_max_body_size 0;
+		# To disable buffering
+		proxy_buffering off;
+
+		location / {
+			proxy_set_header X-Real-IP $remote_addr;
+			proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+			proxy_set_header X-Forwarded-Proto $scheme;
+			proxy_set_header Host $http_host;
+
+			proxy_connect_timeout 300;
+			# Default is HTTP/1, keepalive is only enabled in HTTP/1.1
+			proxy_http_version 1.1;
+			proxy_set_header Connection "";
+			chunked_transfer_encoding off;
+
+			proxy_pass $scheme://${S3_HOST}:${S3_PORT};
+			proxy_ssl_verify off;
+		}
+	}
+}
--- a/docker/test/integration/s3_proxy/run.sh
+++ b/docker/test/integration/s3_proxy/run.sh
@ -0,0 +1,15 @@
+#!/usr/bin/env sh
+
+if [ -z "$S3_HOST" ] ; then
+    S3_HOST='minio1'
+fi
+
+if [ -z "$S3_PORT" ] ; then
+    S3_PORT='9001'
+fi
+
+# Replace config placeholders with environment variables
+sed -i "s|\${S3_HOST}|${S3_HOST}|" /etc/nginx/nginx.conf
+sed -i "s|\${S3_PORT}|${S3_PORT}|" /etc/nginx/nginx.conf
+
+exec nginx -g 'daemon off;'
--- a/docker/test/integration/s3_proxy/server.crt
+++ b/docker/test/integration/s3_proxy/server.crt
@ -0,0 +1,19 @@
+-----BEGIN CERTIFICATE-----
+MIIDBTCCAe2gAwIBAgIRANb2pr4HgR8YFwKNJMUSWiIwDQYJKoZIhvcNAQELBQAw
+EjEQMA4GA1UEChMHQWNtZSBDbzAeFw0yMDA3MDkxODE1MDBaFw0yMTA3MDkxODE1
+MDBaMBIxEDAOBgNVBAoTB0FjbWUgQ28wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw
+ggEKAoIBAQC9ORgaBCx42ejp9PSjc0uvwH/hTB6yZvZB4S+wxbzzfeKomX/JBcFH
+mGCIJJVjVV0rafv3vw+9f9u4wrZpN4HZKnVyz3mBXEA1WDvLTLV8n8zVyso1qbnf
+F9Fa8wnk89b0xGWyM7jie7/cTIGMrgm7hIPaM2zDzFwIfIAqZ1AexC4vADIffF9r
+cFLLjNHuv1uAc32jdfQEPluvmBMzGkz254+MabxZWIZjkYn70kNSZDoyFmMGafBt
+kRTUPNq2+fGv/eLJ9Lxm3153Ja0sCyzLlEo9+/z4ERqM5zwWre4vcwfO63c5pcSC
+zGw84teTpmDwSyiSR70TYJdtBGQqZvLZAgMBAAGjVjBUMA4GA1UdDwEB/wQEAwIC
+pDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/MBwGA1UdEQQV
+MBOCBm1pbmlvMYIJbG9jYWxob3N0MA0GCSqGSIb3DQEBCwUAA4IBAQAKU2LhvFFz
+RFfUibt/WTj3rtUfKEBrQuUOYt2A8MTbC8pyEu+UJASTzunluUFze5zchEm1s3pZ
+YRLcNwbJqLE6CzUxQ9b2iUhaeWuKrx4ZoPkY0uGiaXM/iKfVKTuNmhF2Sf/P4xUE
+Pt19yQjpIhcicWQc37BBQFvnvy+n5wgHa/pgl1+QUvAa/fwYhF9S28xRLESzZepm
+NMYysopV+YMaxcFa9SH44toXtXnvRWwVdEorlq1W3/AiJg8hDPzSa9UXLMjA968J
+ONtn3qvwac9Ot53+QsXJdsMmDZLWGCi6I1w0ZQetpr/0ubaA1F3GdK9eB/S0thqU
+l2VUgn3c/kKS
+-----END CERTIFICATE-----
--- a/docker/test/integration/s3_proxy/server.key
+++ b/docker/test/integration/s3_proxy/server.key
@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC9ORgaBCx42ejp
+9PSjc0uvwH/hTB6yZvZB4S+wxbzzfeKomX/JBcFHmGCIJJVjVV0rafv3vw+9f9u4
+wrZpN4HZKnVyz3mBXEA1WDvLTLV8n8zVyso1qbnfF9Fa8wnk89b0xGWyM7jie7/c
+TIGMrgm7hIPaM2zDzFwIfIAqZ1AexC4vADIffF9rcFLLjNHuv1uAc32jdfQEPluv
+mBMzGkz254+MabxZWIZjkYn70kNSZDoyFmMGafBtkRTUPNq2+fGv/eLJ9Lxm3153
+Ja0sCyzLlEo9+/z4ERqM5zwWre4vcwfO63c5pcSCzGw84teTpmDwSyiSR70TYJdt
+BGQqZvLZAgMBAAECggEANe8oJ4I5CtlRwh3H/S7Hy/iaeqUvuroORwjghwpVqTGg
+gV3/RlUVmkqceTG0QvP58n3rC9qxqdnfzvHw/FyN7lBj2a25fF3HD21u3aunrzX9
+NJLwwAr4p9YqHjpX/6JhCrNQKVMEx8luDmTgKDETJRfIXVF7FvQQ53pVLcD03U+g
+MgN61HBzfT5L0TLHoiKNQbVi+Wm1gw3zvb/a9Z1rULRZfIuKGM0bNNqRZt4rUUAV
+QicklDR0Qv59jhr5Y/zjinKkqF8qudvUkaNT2JH1DLfXiAhuC0OQugMjYzNntQB4
+hMhkqARnjuk/WPMvnXivnqx9o69BL5wyXIj3vD4fgQKBgQDVKaXAZJ5bo3VfcpLm
+cyjtUuOzAxLU1bVGI0Hm1ARqeGVxSTypZLSX8xFi2n5Bvbgh/Y60aEac/1uKoXA9
+gej1MT4hKpXyagrARx97E8zk5nf88kVxkiKUrifMjP2lDzHIYhdKk9R3SiV6gWvA
+FoJtjBwFhJ6uWUPyry4nqFSENQKBgQDjP9k6CTZF0EnDqbADiQr7VKpebqhtLWRD
+U0bQh/l57VrWqGksVOlivIJChP49q1H+hQ1YgfKIEDag8JJnf/inUSpVsw1ljAjv
+knqNzn0Gdd9lTsiNGgqlCjhmWedkh4eO8uau479TwQc6gB4PQdLAFynQtt8Kk45P
+GxdpRx4AlQKBgQCgxUGbYwhBC37aF1sObqrenBbajCXm2qxXEv6Ab0ZJWzb/g4I6
+LJc8x3pEeZCiWsoG8Otxy/f+L2bGn049Rb8DNzmp4Cmp5SrorHvk4yE1P1IeOEgC
+CXsFcnjYATrJBDXC8aCpgefMdOLhi71N6mxC3VrBGq5nxzHFVzTTelUMRQKBgQDa
+yekhiCb5liy+tcuhy7qH+Z7BpjaATrh+XVoLgS5+5jeT/basmN/OUQH0e0iwJRaf
+Poh30zynJT0DPDsobLwAkxN4SRg30Vf1GAjoKIqUwr2fMvfBafYfqbRdTmeKkTXB
+OjlA3kKhp3GHMDxAojX+/Q4kRTx+WUwk+0dR88d99QKBgEiYrkSLjKXUFllDmVyp
+HtlYKZiq5c33DA06SA2uVOprCdTbnbvP4WrgUsLGvqBcaPEd06fGGbvJWwUdnkXM
+HNAkqSeUe5ueovidtoPdF+aPyxdGg3Z8551xOoHZFYrvgdZ4YMPcJrwQQsvWCcYP
+GDnSoD8Xjd2LmekTpDBt5ZVz
+-----END PRIVATE KEY-----
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@ -17,6 +17,7 @@ RUN apt-get update \
            libc6-dbg \
            moreutils \
            ncdu \
+            numactl \
            p7zip-full \
            parallel \
            psmisc \
--- a/docker/test/performance-comparison/README.md
+++ b/docker/test/performance-comparison/README.md
@ -36,6 +36,9 @@ Action required for every item -- these are errors that must be fixed. The error
 #### Slow on client
 Action required for every item -- these are errors that must be fixed. This table shows queries that take significantly longer to process on the client than on the server. A possible reason might be sending too much data to the client, e.g., a forgotten `format Null`.

+#### Short queries not marked as short
+Action required for every item -- these are errors that must be fixed. This table shows queries that are "short" but not explicitly marked as such. "Short" queries are too fast to meaningfully compare performance, because the changes are drowned by the noise. We consider all queries that run faster than 0.02 s to be "short", and only check the performance if they became slower than this threshold. Probably this mode is not what you want, so you have to increase the query run time to be between 1 and 0.1 s, so that the performance can be compared. You do want this "short" mode for queries that complete "immediately", such as some varieties of `select count(*)`. You have to mark them as "short" explicitly by writing `<query short="1">...`. The value of "short" attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
+
 #### Partial queries
 Action required for the cells marked in red. Shows the queries we are unable to run on an old server -- probably because they contain a new function. You should see this table when you add a new function and a performance test for it. Check that the run time and variance are acceptable (run time between 0.1 and 1 seconds, variance below 10%). If not, they will be highlighted in red.

--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -282,6 +282,7 @@ do
    sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv"
    sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv"
    sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv"
+    sed -n "s/^short\t/$test_name\t/p" < "$test_file" >> "analyze/marked-short-queries.tsv"
    sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv"
 done
 unset IFS
@ -291,6 +292,9 @@ clickhouse-local --query "
 create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
    'test text, query_index int, query_id text, version UInt8, time float');

+-- Separately process 'partial' queries which we could only run on the new server
+-- because they use new functions. We can't make normal stats for them, but still
+-- have to show some stats so that the PR author can tweak them.
 create view partial_queries as select test, query_index
    from file('analyze/partial-queries.tsv', TSV,
        'test text, query_index int, servers Array(int)');
@ -303,6 +307,7 @@ create table partial_query_times engine File(TSVWithNamesAndTypes,
    group by test, query_index
    ;

+-- Process queries that were run normally, on both servers.
 create view left_query_log as select *
    from file('left-query-log.tsv', TSVWithNamesAndTypes,
        '$(cat "left-query-log.tsv.columns")');
@ -312,12 +317,17 @@ create view right_query_log as select *
        '$(cat "right-query-log.tsv.columns")');

 create view query_logs as
-    select *, 0 version from left_query_log
+    select 0 version, query_id, ProfileEvents.Names, ProfileEvents.Values,
+        query_duration_ms from left_query_log
    union all
-    select *, 1 version from right_query_log
+    select 1 version, query_id, ProfileEvents.Names, ProfileEvents.Values,
+        query_duration_ms from right_query_log
    ;

-create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics-full.tsv')
+-- This is a single source of truth on all metrics we have for query runs. The
+-- metrics include ProfileEvents from system.query_log, and query run times
+-- reported by the perf.py test runner.
+create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-arrays.tsv')
    as
    with (
        -- sumMapState with the list of all keys with '-0.' values. Negative zero is because
@ -349,18 +359,29 @@ create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics-
    where (test, query_index) not in partial_queries
    ;

-create table query_run_metrics engine File(
+-- This is just for convenience -- human-readable + easy to make plots.
+create table query_run_metrics_denorm engine File(TSV, 'analyze/query-run-metrics-denorm.tsv')
+    as select test, query_index, metric_names, version, query_id, metric_values
+    from query_run_metric_arrays
+    array join metric_names, metric_values
+    order by test, query_index, metric_names, version, query_id
+    ;
+
+-- This is for statistical processing with eqmed.sql
+create table query_run_metrics_for_stats engine File(
        TSV, -- do not add header -- will parse with grep
-        'analyze/query-run-metrics.tsv')
+        'analyze/query-run-metrics-for-stats.tsv')
    as select test, query_index, 0 run, version, metric_values
-    from query_run_metrics_full
+    from query_run_metric_arrays
    order by test, query_index, run, version
    ;

+-- This is the list of metric names, so that we can join them back after
+-- statistical processing.
 create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-names.tsv')
-    as select metric_names from query_run_metrics_full limit 1
+    as select metric_names from query_run_metric_arrays limit 1
    ;
-"
+" 2> >(tee -a analyze/errors.log 1>&2)

 # This is a lateral join in bash... please forgive me.
 # We don't have arrayPermute(), so I have to make random permutations with
@ -370,16 +391,16 @@ create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-n
 # for each file. I do this in parallel using GNU parallel.
 ( set +x # do not bloat the log
 IFS=$'\n'
-for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
+for prefix in $(cut -f1,2 "analyze/query-run-metrics-for-stats.tsv" | sort | uniq)
 do
    file="analyze/tmp/$(echo "$prefix" | sed 's/\t/_/g').tsv"
-    grep "^$prefix	" "analyze/query-run-metrics.tsv" > "$file" &
+    grep "^$prefix	" "analyze/query-run-metrics-for-stats.tsv" > "$file" &
    printf "%s\0\n" \
        "clickhouse-local \
            --file \"$file\" \
            --structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
            --query \"$(cat "$script_dir/eqmed.sql")\" \
-            >> \"analyze/query-reports.tsv\"" \
+            >> \"analyze/query-metric-stats.tsv\"" \
            2>> analyze/errors.log \
        >> analyze/commands.txt
 done
@ -388,6 +409,33 @@ unset IFS
 )

 parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
+
+clickhouse-local --query "
+-- Join the metric names back to the metric statistics we've calculated, and make
+-- a denormalized table of them -- statistics for all metrics for all queries.
+-- The WITH, ARRAY JOIN and CROSS JOIN do not like each other:
+--  https://github.com/ClickHouse/ClickHouse/issues/11868
+--  https://github.com/ClickHouse/ClickHouse/issues/11757
+-- Because of this, we make a view with arrays first, and then apply all the
+-- array joins.
+create view query_metric_stat_arrays as
+    with (select * from file('analyze/query-run-metric-names.tsv',
+        TSV, 'n Array(String)')) as metric_name
+    select test, query_index, metric_name, left, right, diff, stat_threshold
+    from file('analyze/query-metric-stats.tsv', TSV, 'left Array(float),
+        right Array(float), diff Array(float), stat_threshold Array(float),
+        test text, query_index int') reports
+    order by test, query_index, metric_name
+    ;
+
+create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
+        'analyze/query-metric-stats-denorm.tsv')
+    as select test, query_index, metric_name, left, right, diff, stat_threshold
+    from query_metric_stat_arrays
+    left array join metric_name, left, right, diff, stat_threshold
+    order by test, query_index, metric_name
+    ;
+" 2> >(tee -a analyze/errors.log 1>&2)
 }

 # Analyze results
@ -403,58 +451,46 @@ build_log_column_definitions
 cat analyze/errors.log >> report/errors.log ||:
 cat profile-errors.log >> report/errors.log ||:

+short_query_threshold="0.02"
+
 clickhouse-local --query "
 create view query_display_names as select * from
    file('analyze/query-display-names.tsv', TSV,
        'test text, query_index int, query_display_name text')
    ;

+create view partial_query_times as select * from
+    file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes,
+        'test text, query_index int, time_stddev float, time_median float')
+    ;
+
+-- Report for partial queries that we could only run on the new server (e.g.
+-- queries with new functions added in the tested PR).
 create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv')
-    as select floor(time_median, 3) m, floor(time_stddev / time_median, 3) v,
+    as select floor(time_median, 3) time,
+        floor(time_stddev / time_median, 3) relative_time_stddev,
        test, query_index, query_display_name
-    from file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes,
-        'test text, query_index int, time_stddev float, time_median float') t
+    from partial_query_times
    join query_display_names using (test, query_index)
    order by test, query_index
    ;

-- WITH, ARRAY JOIN and CROSS JOIN do not like each other:
--  https://github.com/ClickHouse/ClickHouse/issues/11868
--  https://github.com/ClickHouse/ClickHouse/issues/11757
-- Because of this, we make a view with arrays first, and then apply all the
-- array joins.
-create view query_metric_stat_arrays as
-    with (select * from file('analyze/query-run-metric-names.tsv',
-        TSV, 'n Array(String)')) as metric_name
-    select metric_name, left, right, diff, stat_threshold, test, query_index,
-        query_display_name
-    from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
-        right Array(float), diff Array(float), stat_threshold Array(float),
-        test text, query_index int') reports
-    left join query_display_names
-        on reports.test = query_display_names.test
-            and reports.query_index = query_display_names.query_index
-    ;
-
-create table query_metric_stats engine File(TSVWithNamesAndTypes,
-        'report/query-metric-stats.tsv')
-    as
-    select metric_name, left, right, diff, stat_threshold, test, query_index,
-        query_display_name
-    from query_metric_stat_arrays
-    left array join metric_name, left, right, diff, stat_threshold
+create view query_metric_stats as
+    select * from file('analyze/query-metric-stats-denorm.tsv',
+        TSVWithNamesAndTypes,
+        'test text, query_index int, metric_name text, left float, right float,
+            diff float, stat_threshold float')
    ;

 -- Main statistics for queries -- query time as reported in query log.
 create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
    as select
-        -- FIXME Comparison mode doesn't make sense for queries that complete
-        -- immediately (on the same order of time as noise). We compute average
-        -- run time between old and new version, and if it is below a threshold,
-        -- we just skip the query. If there is a significant regression, the
-        -- average will be above threshold, we'll process it normally and will
-        -- detect the regression.
-        (left + right) / 2 < 0.02 as short,
+        -- Comparison mode doesn't make sense for queries that complete
+        -- immediately (on the same order of time as noise). If query duration is
+        -- less that some threshold, we just skip it. If there is a significant
+        -- regression in such query, the time will exceed the threshold, and we
+        -- well process it normally and detect the regression.
+        right < $short_query_threshold as short,

        not short and abs(diff) > report_threshold        and abs(diff) > stat_threshold as changed_fail,
        not short and abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
@ -464,68 +500,33 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
        
        left, right, diff, stat_threshold,
        if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
-        test, query_index, query_display_name
+        query_metric_stats.test test, query_metric_stats.query_index query_index,
+        query_display_name
    from query_metric_stats
    left join file('analyze/report-thresholds.tsv', TSV,
            'test text, report_threshold float') thresholds
        on query_metric_stats.test = thresholds.test
+    left join query_display_names
+        on query_metric_stats.test = query_display_names.test
+            and query_metric_stats.query_index = query_display_names.query_index
    where metric_name = 'server_time'
    order by test, query_index, metric_name
    ;

-- keep the table in old format so that we can analyze new and old data together
-create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
-    as select short, changed_fail, unstable_fail, left, right, diff,
-        stat_threshold, test, query_display_name query
-    from queries
-    ;
-
-- save all test runs as JSON for the new comparison page
-create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as
-    select test, query_index, query_display_name query,
-        left, right, diff, stat_threshold, report_threshold,
-        versions_runs[1] runs_left, versions_runs[2] runs_right
-    from (
-        select
-            test, query_index,
-            groupArrayInsertAt(runs, version) versions_runs
-        from (
-            select
-                test, query_index, version,
-                groupArray(metrics[1]) runs
-            from file('analyze/query-run-metrics.tsv', TSV,
-                'test text, query_index int, run int, version UInt8, metrics Array(float)')
-            group by test, query_index, version
-        )
-        group by test, query_index
-    ) runs
-    left join query_display_names
-        on runs.test = query_display_names.test
-            and runs.query_index = query_display_names.query_index
-    left join file('analyze/report-thresholds.tsv',
-            TSV, 'test text, report_threshold float') thresholds
-        on runs.test = thresholds.test
-    left join query_metric_stats
-        on runs.test = query_metric_stats.test
-            and runs.query_index = query_metric_stats.query_index
-    where
-        query_metric_stats.metric_name = 'server_time'
-    ;
-
-create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
-    select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name
+create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') as
+    select
+        left, right,
+        left > right
+            ? '- ' || toString(floor(left / right, 3)) || 'x'
+            : '+ ' || toString(floor(right / left, 3)) || 'x',
+         diff, stat_threshold, changed_fail, test, query_index, query_display_name
    from queries where changed_show order by abs(diff) desc;

-create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
+create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') as
    select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name
    from queries where unstable_show order by stat_threshold desc;

-create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
-        'report/queries-for-flamegraph.tsv') as
-    select test, query_index from queries where unstable_show or changed_show
-    ;
-
-create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.tsv') as
+create table test_time_changes engine File(TSV, 'report/test-time-changes.tsv') as
    select test, queries, average_time_change from (
        select test, count(*) queries,
            sum(left) as left, sum(right) as right,
@ -536,22 +537,22 @@ create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.ts
    )
    ;

-create table unstable_tests_tsv engine File(TSV, 'report/unstable-tests.tsv') as
+create table unstable_tests engine File(TSV, 'report/unstable-tests.tsv') as
    select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
    from queries
    group by test
    order by total_unstable + total_changed desc
    ;

-create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.tsv') as
+create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') as
    select test,
        queries,
        coalesce(total_unstable, 0) total_unstable,
        coalesce(total_changed, 0) total_changed,
        total_unstable + total_changed total_bad,
        coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str
-    from test_time_changes_tsv
-    full join unstable_tests_tsv
+    from test_time_changes
+    full join unstable_tests
    using test
    where (abs(average_time_change) > 0.05 and queries > 5)
        or (total_bad > 0)
@ -559,28 +560,28 @@ create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.ts
    settings join_use_nulls = 1
    ;

-create table query_time engine Memory as select *
+create view total_client_time_per_query as select *
    from file('analyze/client-times.tsv', TSV,
        'test text, query_index int, client float, server float');

-create table wall_clock engine Memory as select *
-    from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
-
-create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
+create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') as
    select client, server, floor(client/server, 3) p, test, query_display_name
-    from query_time left join query_display_names using (test, query_index)
+    from total_client_time_per_query left join query_display_names using (test, query_index)
    where p > 1.02 order by p desc;

+create table wall_clock_time_per_test engine Memory as select *
+    from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
+
 create table test_time engine Memory as
    select test, sum(client) total_client_time,
        maxIf(client, not short) query_max,
        minIf(client, not short) query_min,
        count(*) queries, sum(short) short_queries
-    from query_time full join queries using (test, query_index)
+    from total_client_time_per_query full join queries using (test, query_index)
    group by test;

-create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
-    select wall_clock.test, real,
+create table test_times_report engine File(TSV, 'report/test-times.tsv') as
+    select wall_clock_time_per_test.test, real,
        floor(total_client_time, 3),
        queries,
        short_queries,
@ -590,23 +591,64 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
    from test_time
    -- wall clock times are also measured for skipped tests, so don't
    -- do full join
-    left join wall_clock using test
+    left join wall_clock_time_per_test using test
    order by avg_real_per_query desc;

 -- report for all queries page, only main metric
-create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
+create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as
    select changed_fail, unstable_fail,
-        left, right, diff,
-        floor(left > right ? left / right : right / left, 3),
-        stat_threshold, test, query_index, query_display_name
+        left, right,
+        left > right
+            ? '- ' || toString(floor(left / right, 3)) || 'x'
+            : '+ ' || toString(floor(right / left, 3)) || 'x',
+        diff, stat_threshold, test, query_index, query_display_name
    from queries order by test, query_index;

+-- queries for which we will build flamegraphs (see below)
+create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
+        'report/queries-for-flamegraph.tsv') as
+    select test, query_index from queries where unstable_show or changed_show
+    ;
+
+-- List of queries that have 'short' duration, but are not marked as 'short' by
+-- the test author (we report them).
+create table unmarked_short_queries_report
+    engine File(TSV, 'report/unmarked-short-queries.tsv')
+    as select time, test, query_index, query_display_name
+    from (
+            select right time, test, query_index from queries where short
+            union all
+            select time_median, test, query_index from partial_query_times
+                where time_median < $short_query_threshold
+        ) times
+        left join query_display_names
+            on times.test = query_display_names.test
+                and times.query_index = query_display_names.query_index
+    where (test, query_index) not in
+        (select * from file('analyze/marked-short-queries.tsv', TSV,
+            'test text, query_index int'))
+    order by test, query_index
+    ;
+
+--------------------------------------------------------------------------------
+-- various compatibility data formats follow, not related to the main report
+
+-- keep the table in old format so that we can analyze new and old data together
+create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
+    as select short, changed_fail, unstable_fail, left, right, diff,
+        stat_threshold, test, query_display_name query
+    from queries
+    ;
+
 -- new report for all queries with all metrics (no page yet)
 create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
    select metric_name, left, right, diff,
        floor(left > right ? left / right : right / left, 3),
        stat_threshold, test, query_index, query_display_name
    from query_metric_stats
+    left join query_display_names
+        on query_metric_stats.test = query_display_names.test
+            and query_metric_stats.query_index = query_display_names.query_index
    order by test, query_index;
 " 2> >(tee -a report/errors.log 1>&2)

@ -634,7 +676,8 @@ create view query_display_names as select * from

 create table unstable_query_runs engine File(TSVWithNamesAndTypes,
        'unstable-query-runs.$version.rep') as
-    select test, query_index, query_display_name, query_id
+    select query_runs.test test, query_runs.query_index query_index,
+        query_display_name, query_id
    from query_runs
    join queries_for_flamegraph on
        query_runs.test = queries_for_flamegraph.test
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@ -6,7 +6,6 @@ trap 'kill $(jobs -pr) ||:' EXIT

 mkdir db0 ||:
 mkdir left ||:
-mkdir right ||:

 left_pr=$1
 left_sha=$2
@ -24,7 +23,7 @@ dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_e

 function download
 {
-    # Historically there were various path for the performance test package.
+    # Historically there were various paths for the performance test package.
    # Test all of them.
    for path in "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/"{,clickhouse_build_check/}"performance/performance.tgz"
    do
@ -34,22 +33,13 @@ function download
        fi
    done

-    for path in "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/"{,clickhouse_build_check/}"performance/performance.tgz"
-    do
-        if curl --fail --head "$path"
-        then
-            right_path="$path"
-        fi
-    done
-
-    # might have the same version on left and right
-    if ! [ "$left_path" = "$right_path" ]
+    # Might have the same version on left and right (for testing).
+    if ! [ "$left_sha" = "$right_sha" ]
    then
        wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv  &
-        wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv &
    else
-        mkdir right ||:
-        wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right &
+        mkdir left ||:
+        cp -a right/* left &
    fi

    for dataset_name in $datasets
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -1,38 +1,25 @@
 #!/bin/bash
 set -ex

-chown nobody workspace output
-chgrp nogroup workspace output
-chmod 777 workspace output
-
-cd workspace
-
-# Fetch the repository to find and describe the compared revisions.
-rm -rf ch ||:
-time git clone --depth 50 --bare https://github.com/ClickHouse/ClickHouse ch
-git -C ch fetch origin "$SHA_TO_TEST"
-
+# Use the packaged repository to find the revision we will compare to.
 function find_reference_sha
 {
-    # If not master, try to fetch pull/.../{head,merge}
-    if [ "$PR_TO_TEST" != "0" ]
-    then
-        git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
-    fi
-
+    git -C right/ch log -1 origin/master
+    git -C right/ch log -1 pr
    # Go back from the revision to be tested, trying to find the closest published
-    # testing release.
-    start_ref="$SHA_TO_TEST"~
-    # If we are testing a PR, and it merges with master successfully, we are
-    # building and testing not the nominal last SHA specified by pull/.../head
-    # and SHA_TO_TEST, but a revision that is merged with recent master, given
-    # by pull/.../merge ref.
-    # Master is the first parent of the pull/.../merge.
-    if git -C ch rev-parse "pull/$PR_TO_TEST/merge"
+    # testing release. The PR branch may be either pull/*/head which is the
+    # author's branch, or pull/*/merge, which is head merged with some master
+    # automatically by Github. We will use a merge base with master as a reference
+    # for tesing (or some older commit). A caveat is that if we're testing the
+    # master, the merge base is the tested commit itself, so we have to step back
+    # once.
+    start_ref=$(git -C right/ch merge-base origin/master pr)
+    if [ "PR_TO_TEST" == "0" ]
    then
-        start_ref="pull/$PR_TO_TEST/merge~"
+        start_ref=$start_ref~
    fi

+    # Loop back to find a commit that actually has a published perf test package.
    while :
    do
        # FIXME the original idea was to compare to a closest testing tag, which
@ -46,12 +33,12 @@ function find_reference_sha
        echo Reference tag is "$ref_tag"
        # We use annotated tags which have their own shas, so we have to further
        # dereference the tag to get the commit it points to, hence the '~0' thing.
-        REF_SHA=$(git -C ch rev-parse "$ref_tag~0")
+        REF_SHA=$(git -C right/ch rev-parse "$ref_tag~0")

-        # FIXME sometimes we have testing tags on commits without published builds --
-        # normally these are documentation commits. Loop to skip them.
-        # Historically there were various path for the performance test package.
-        # Test all of them.
+        # FIXME sometimes we have testing tags on commits without published builds.
+        # Normally these are documentation commits. Loop to skip them.
+        # Historically there were various path for the performance test package,
+        # test all of them.
        unset found
        for path in "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/"{,clickhouse_build_check/}"performance/performance.tgz"
        do
@ -69,6 +56,24 @@ function find_reference_sha
    REF_PR=0
 }

+chown nobody workspace output
+chgrp nogroup workspace output
+chmod 777 workspace output
+
+cd workspace
+
+# Download the package for the version we are going to test
+for path in "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/"{,clickhouse_build_check/}"performance/performance.tgz"
+do
+    if curl --fail --head "$path"
+    then
+        right_path="$path"
+    fi
+done
+
+mkdir right
+wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv
+
 # Find reference revision if not specified explicitly
 if [ "$REF_SHA" == "" ]; then find_reference_sha; fi
 if [ "$REF_SHA" == "" ]; then echo Reference SHA is not specified ; exit 1 ; fi
@ -76,17 +81,14 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi

 # Show what we're testing
 (
-    git -C ch log -1 --decorate "$REF_SHA" ||:
+    git -C right/ch log -1 --decorate "$REF_SHA" ||:
 ) | tee left-commit.txt

 (
-    git -C ch log -1 --decorate "$SHA_TO_TEST" ||:
-    if git -C ch rev-parse "pull/$PR_TO_TEST/merge" &> /dev/null
-    then
-        echo
-        echo Real tested commit is:
-        git -C ch log -1 --decorate "pull/$PR_TO_TEST/merge"
-    fi
+    git -C right/ch log -1 --decorate "$SHA_TO_TEST" ||:
+    echo
+    echo Real tested commit is:
+    git -C right/ch log -1 --decorate "pr"
 ) | tee right-commit.txt

 if [ "$PR_TO_TEST" != "0" ]
@ -94,8 +96,8 @@ then
    # If the PR only changes the tests and nothing else, prepare a list of these
    # tests for use by compare.sh. Compare to merge base, because master might be
    # far in the future and have unrelated test changes.
-    base=$(git -C ch merge-base "$SHA_TO_TEST" master)
-    git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt
+    base=$(git -C right/ch merge-base pr origin/master)
+    git -C right/ch diff --name-only "$base" pr | tee changed-tests.txt
    if grep -vq '^tests/performance' changed-tests.txt
    then
        # Have some other changes besides the tests, so truncate the test list,
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -37,21 +37,44 @@ available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
 for e in subst_elems:
    available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]

-# Take care to keep the order of queries -- sometimes we have DROP IF EXISTS
+# Takes parallel lists of templates, substitutes them with all combos of
+# parameters. The set of parameters is determined based on the first list.
+# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
 # followed by CREATE in create queries section, so the order matters.
-def substitute_parameters(query_templates):
-    result = []
-    for q in query_templates:
+def substitute_parameters(query_templates, other_templates = []):
+    query_results = []
+    other_results = [[]] * (len(other_templates))
+    for i, q in enumerate(query_templates):
        keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n)
        values = [available_parameters[k] for k in keys]
-        result.extend([
-            q.format(**dict(zip(keys, values_combo)))
-                for values_combo in itertools.product(*values)])
-    return result
+        combos = itertools.product(*values)
+        for c in combos:
+            with_keys = dict(zip(keys, c))
+            query_results.append(q.format(**with_keys))
+            for j, t in enumerate(other_templates):
+                other_results[j].append(t[i].format(**with_keys))
+    if len(other_templates):
+        return query_results, other_results
+    else:
+        return query_results
+
+
+# Build a list of test queries, substituting parameters to query templates,
+# and reporting the queries marked as short.
+test_queries = []
+for e in root.findall('query'):
+    new_queries = []
+    if 'short' in e.attrib:
+        new_queries, [is_short] = substitute_parameters([e.text], [[e.attrib['short']]])
+        for i, s in enumerate(is_short):
+            # Don't print this if we only need to print the queries.
+            if eval(s) and not args.print_queries:
+                print(f'short\t{i + len(test_queries)}')
+    else:
+        new_queries = substitute_parameters([e.text])
+
+    test_queries += new_queries

-# Build a list of test queries, processing all substitutions
-test_query_templates = [q.text for q in root.findall('query')]
-test_queries = substitute_parameters(test_query_templates)

 # If we're only asked to print the queries, do that and exit
 if args.print_queries:
@ -166,7 +189,7 @@ for conn_index, c in enumerate(connections):
        c.execute(q)
        print(f'fill\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')

-# Run test queries
+# Run test queries.
 for query_index, q in enumerate(test_queries):
    query_prefix = f'{test_name}.query{query_index}'

--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -63,7 +63,48 @@ p.links a {{ padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-
    color: inherit;
    text-decoration: none;
 }}
+
 tr:nth-child(odd) td {{filter: brightness(95%);}}
+
+.all-query-times tr td:nth-child(1),
+.all-query-times tr td:nth-child(2),
+.all-query-times tr td:nth-child(3),
+.all-query-times tr td:nth-child(4),
+.all-query-times tr td:nth-child(5),
+.all-query-times tr td:nth-child(7),
+.changes-in-performance tr td:nth-child(1),
+.changes-in-performance tr td:nth-child(2),
+.changes-in-performance tr td:nth-child(3),
+.changes-in-performance tr td:nth-child(4),
+.changes-in-performance tr td:nth-child(5),
+.changes-in-performance tr td:nth-child(7),
+.unstable-queries tr td:nth-child(1),
+.unstable-queries tr td:nth-child(2),
+.unstable-queries tr td:nth-child(3),
+.unstable-queries tr td:nth-child(4),
+.unstable-queries tr td:nth-child(6),
+.test-performance-changes tr td:nth-child(2),
+.test-performance-changes tr td:nth-child(3),
+.test-performance-changes tr td:nth-child(4),
+.test-performance-changes tr td:nth-child(5),
+.test-performance-changes tr td:nth-child(6),
+.test-times tr td:nth-child(2),
+.test-times tr td:nth-child(3),
+.test-times tr td:nth-child(4),
+.test-times tr td:nth-child(5),
+.test-times tr td:nth-child(6),
+.test-times tr td:nth-child(7),
+.test-times tr td:nth-child(8),
+.concurrent-benchmarks tr td:nth-child(2),
+.concurrent-benchmarks tr td:nth-child(3),
+.concurrent-benchmarks tr td:nth-child(4),
+.concurrent-benchmarks tr td:nth-child(5),
+.metric-changes tr td:nth-child(2),
+.metric-changes tr td:nth-child(3),
+.metric-changes tr td:nth-child(4),
+.metric-changes tr td:nth-child(5)
+{{ text-align: right; }}
+
  </style>
  <title>Clickhouse performance comparison</title>
 </head>
@ -111,11 +152,14 @@ def tableHeader(r):
    return tr(''.join([th(f) for f in r]))

 def tableStart(title):
-    return """
-<h2 id="{anchor}"><a class="cancela" href="#{anchor}">{title}</a></h2>
-<table>""".format(
-        anchor = nextTableAnchor(),
-        title = title)
+    anchor = nextTableAnchor();
+    cls = '-'.join(title.lower().split(' ')[:3]);
+    return f"""
+        <h2 id="{anchor}">
+            <a class="cancela" href="#{anchor}">{title}</a>
+        </h2>
+        <table class="{cls}">
+    """

 def tableEnd():
    return '</table>'
@ -196,6 +240,12 @@ if args.report == 'main':
                     ['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
                     slow_on_client_rows)

+    unmarked_short_rows = tsvRows('report/unmarked-short-queries.tsv')
+    error_tests += len(unmarked_short_rows)
+    printSimpleTable('Short queries not marked as short',
+        ['New client time, s', 'Test', '#', 'Query'],
+        unmarked_short_rows)
+
    def print_partial():
        rows = tsvRows('report/partial-queries-report.tsv')
        if not rows:
@ -232,12 +282,13 @@ if args.report == 'main':
        columns = [
            'Old,&nbsp;s',                                          # 0
            'New,&nbsp;s',                                          # 1
-            'Relative difference (new&nbsp;&minus;&nbsp;old) / old',   # 2
-            'p&nbsp;<&nbsp;0.001 threshold',                   # 3
-            # Failed                                           # 4
-            'Test',                                            # 5
-            '#',                                               # 6
-            'Query',                                           # 7
+            'Times speedup / slowdown',                 # 2
+            'Relative difference (new&nbsp;&minus;&nbsp;old) / old',   # 3
+            'p&nbsp;<&nbsp;0.001 threshold',                   # 4
+            # Failed                                           # 5
+            'Test',                                            # 6
+            '#',                                               # 7
+            'Query',                                           # 8
            ]

        print(tableHeader(columns))
@ -245,15 +296,15 @@ if args.report == 'main':
        attrs = ['' for c in columns]
        attrs[4] = None
        for row in rows:
-            if int(row[4]):
-                if float(row[2]) < 0.:
+            if int(row[5]):
+                if float(row[3]) < 0.:
                    faster_queries += 1
-                    attrs[2] = f'style="background: {color_good}"'
+                    attrs[2] = attrs[3] = f'style="background: {color_good}"'
                else:
                    slower_queries += 1
-                    attrs[2] = f'style="background: {color_bad}"'
+                    attrs[2] = attrs[3] = f'style="background: {color_bad}"'
            else:
-                attrs[2] = ''
+                attrs[2] = attrs[3] = ''

            print(tableRow(row, attrs))

@ -275,7 +326,7 @@ if args.report == 'main':
            'Old,&nbsp;s', #0
            'New,&nbsp;s', #1
            'Relative difference (new&nbsp;-&nbsp;old)/old', #2
-            'p&nbsp;<&nbsp;0.001 threshold', #3
+            'p&nbsp;&lt;&nbsp;0.001 threshold', #3
            # Failed #4
            'Test', #5
            '#',    #6
@ -492,9 +543,9 @@ elif args.report == 'all-queries':
            # Unstable #1
            'Old,&nbsp;s', #2
            'New,&nbsp;s', #3
-            'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #4
-            'Times speedup / slowdown',                 #5
-            'p&nbsp;<&nbsp;0.001 threshold',          #6
+            'Times speedup / slowdown',                 #4
+            'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #5
+            'p&nbsp;&lt;&nbsp;0.001 threshold',          #6
            'Test',                                   #7
            '#',                                      #8
            'Query',                                  #9
@ -513,12 +564,12 @@ elif args.report == 'all-queries':
                attrs[6] = ''

            if int(r[0]):
-                if float(r[4]) > 0.:
-                    attrs[4] = f'style="background: {color_bad}"'
+                if float(r[5]) > 0.:
+                    attrs[4] = attrs[5] = f'style="background: {color_bad}"'
                else:
-                    attrs[4] = f'style="background: {color_good}"'
+                    attrs[4] = attrs[5] = f'style="background: {color_good}"'
            else:
-                attrs[4] = ''
+                attrs[4] = attrs[5] = ''

            if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
                attrs[2] = f'style="background: {color_bad}"'
--- a/docker/test/stateful_with_coverage/run.sh
+++ b/docker/test/stateful_with_coverage/run.sh
@ -55,18 +55,21 @@ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-serv
    ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/; \
    ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/;

-ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
-    ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
-    ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
-    ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \
-    ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/; \
-    ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/;
+ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
+
+# Retain any pre-existing config and allow ClickHouse to load those if required
+ln -s --backup=simple --suffix=_original.xml \
+    /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/


 service zookeeper start
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -17,7 +17,6 @@ ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config
 ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
-ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
 ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
 ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
 ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/
@ -33,6 +32,10 @@ ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
 ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/

+# Retain any pre-existing config and allow ClickHouse to load it if required
+ln -s --backup=simple --suffix=_original.xml \
+    /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
+
 if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then
    ln -s /usr/share/clickhouse-test/config/polymorphic_parts.xml /etc/clickhouse-server/config.d/
 fi
--- a/docker/test/stateless_with_coverage/run.sh
+++ b/docker/test/stateless_with_coverage/run.sh
@ -46,27 +46,30 @@ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-serv
    ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/; \
    ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/;

-ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
-    ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
-    ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \
-    ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
-    ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \
-    ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/; \
-    ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/; \
-    ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/; \
-    ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/; \
-    ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/; \
-    ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
+ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/
+ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
+ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
+ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
+
+# Retain any pre-existing config and allow ClickHouse to load it if required
+ln -s --backup=simple --suffix=_original.xml \
+    /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/

 service zookeeper start
 sleep 5
--- a/docker/test/stress/Dockerfile
+++ b/docker/test/stress/Dockerfile
@ -23,28 +23,7 @@ RUN apt-get update -y \
            brotli

 COPY ./stress /stress
+COPY run.sh /

 ENV DATASETS="hits visits"
-
-CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
-    dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
-    dpkg -i package_folder/clickhouse-server_*.deb;  \
-    dpkg -i package_folder/clickhouse-client_*.deb; \
-    dpkg -i package_folder/clickhouse-test_*.deb; \
-    ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
-    ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
-    echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment; \
-    echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
-    echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment; \
-    service clickhouse-server start && sleep 5 \
-    && /s3downloader --dataset-names $DATASETS \
-    && chmod 777 -R /var/lib/clickhouse \
-    && clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" \
-    && clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" \
-    && service clickhouse-server restart && sleep 5 \
-    && clickhouse-client --query "SHOW TABLES FROM datasets" \
-    && clickhouse-client --query "SHOW TABLES FROM test" \
-    && clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" \
-    && clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" \
-    && clickhouse-client --query "SHOW TABLES FROM test" \
-    && ./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION"
+CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -0,0 +1,56 @@
+#!/bin/bash
+
+set -x
+
+dpkg -i package_folder/clickhouse-common-static_*.deb
+dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
+dpkg -i package_folder/clickhouse-server_*.deb
+dpkg -i package_folder/clickhouse-client_*.deb
+dpkg -i package_folder/clickhouse-test_*.deb
+
+function wait_server()
+{
+    counter=0
+    until clickhouse-client --query "SELECT 1"
+    do
+        if [ "$counter" -gt 120 ]
+        then
+            break
+        fi
+        sleep 0.5
+        counter=$(($counter + 1))
+    done
+}
+
+ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
+ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
+
+echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment
+echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
+echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
+
+service clickhouse-server start
+
+wait_server
+
+/s3downloader --dataset-names $DATASETS
+chmod 777 -R /var/lib/clickhouse
+clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
+clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
+service clickhouse-server restart
+
+wait_server
+
+clickhouse-client --query "SHOW TABLES FROM datasets"
+clickhouse-client --query "SHOW TABLES FROM test"
+clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
+clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
+clickhouse-client --query "SHOW TABLES FROM test"
+
+./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION"
+
+service clickhouse-server restart
+
+wait_server
+
+clickhouse-client --query "SELECT 'Server successfuly started'" > /test_output/alive_check.txt || echo 'Server failed to start' > /test_output/alive_check.txt
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@ -41,15 +41,6 @@ def run_func_test(cmd, output_prefix, num_processes, skip_tests_option):
    return pipes


-def check_clickhouse_alive(cmd):
-    try:
-        logging.info("Checking ClickHouse still alive")
-        check_call("{} --query \"select 'Still alive'\"".format(cmd), shell=True)
-        return True
-    except:
-        return False
-
-
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
    parser = argparse.ArgumentParser(description="ClickHouse script for running stresstest")
@ -65,29 +56,18 @@ if __name__ == "__main__":
    args = parser.parse_args()
    func_pipes = []
    perf_process = None
-    try:
-        perf_process = run_perf_test(args.perf_test_cmd, args.perf_test_xml_path, args.output_folder)
-        func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests)
+    perf_process = run_perf_test(args.perf_test_cmd, args.perf_test_xml_path, args.output_folder)
+    func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests)

-        logging.info("Will wait functests to finish")
-        while True:
-            retcodes = []
-            for p in func_pipes:
-                if p.poll() is not None:
-                    retcodes.append(p.returncode)
-            if len(retcodes) == len(func_pipes):
-                break
-            logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes))
-            time.sleep(5)
+    logging.info("Will wait functests to finish")
+    while True:
+        retcodes = []
+        for p in func_pipes:
+            if p.poll() is not None:
+                retcodes.append(p.returncode)
+        if len(retcodes) == len(func_pipes):
+            break
+        logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes))
+        time.sleep(5)

-        if not check_clickhouse_alive(args.client_cmd):
-            raise Exception("Stress failed, results in logs")
-        else:
-            logging.info("Stress is ok")
-    except Exception as ex:
-        raise ex
-    finally:
-        if os.path.exists(args.server_log_folder):
-            logging.info("Copying server log files")
-            for log_file in os.listdir(args.server_log_folder):
-                shutil.copy(os.path.join(args.server_log_folder, log_file), os.path.join(args.output_folder, log_file))
+    logging.info("Stress test finished")
--- a/docker/test/testflows/runner/Dockerfile
+++ b/docker/test/testflows/runner/Dockerfile
@ -35,7 +35,7 @@ RUN apt-get update \
 ENV TZ=Europe/Moscow
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

-RUN pip3 install urllib3 testflows==1.6.24 docker-compose docker dicttoxml kazoo tzlocal
+RUN pip3 install urllib3 testflows==1.6.39 docker-compose docker dicttoxml kazoo tzlocal

 ENV DOCKER_CHANNEL stable
 ENV DOCKER_VERSION 17.09.1-ce
--- a/docs/.yaspellerrc
+++ b/docs/.yaspellerrc
@ -1,25 +0,0 @@
-{
-  "checkYo": false,
-  "excludeFiles": [],
-  "fileExtensions": [],
-  "format": "auto",
-  "ignoreTags": [
-    "code",
-    "kbd",
-    "object",
-    "samp",
-    "script",
-    "style",
-    "var"
-  ],
-  "maxRequests": 2,
-  "lang": "en,ru",
-  "report": ["console"],
-  "dictionary": [
-    "(C|c)lick(H|h)ouse",
-    "CatBoost",
-    "(Ш|ш)ард(ы|ов|а|у|е|ам|ирование|ированы|ах)?",
-    "логир(ование|уются|ования)?",
-    "конфиг(а|е|ом|у)"
-  ]
-}
--- a/docs/_description_templates/template-statement.md
+++ b/docs/_description_templates/template-statement.md
@ -0,0 +1,24 @@
+# Statement name (for example, SHOW USER)
+
+Brief description of what the statement does.
+
+Syntax:
+
+```sql
+Syntax of the statement.
+```
+
+## Other necessary sections of the description (Optional)
+
+Examples of descriptions with a complicated structure:
+
+- https://clickhouse.tech/docs/en/sql-reference/statements/grant/
+- https://clickhouse.tech/docs/en/sql-reference/statements/revoke/
+- https://clickhouse.tech/docs/en/sql-reference/statements/select/join/
+
+
+## See Also (Optional)
+
+Links to related topics as a list.
+
+-   [link](#)
--- a/docs/en/commercial/index.md
+++ b/docs/en/commercial/index.md
@ -4,15 +4,14 @@ toc_priority: 70
 toc_title: Introduction
 ---

-# ClickHouse Commercial Services
+# ClickHouse Commercial Services {#clickhouse-commercial-services}

 This section is a directory of commercial service providers specializing in ClickHouse. They are independent companies not necessarily affiliated with Yandex.

 Service categories:

-   [Cloud](cloud.md)
-   [Support](support.md)
-
+-   [Cloud](../commercial/cloud.md)
+-   [Support](../commercial/support.md)

 !!! note "For service providers"
-    If you happen to represent one of them, feel free to open a pull request adding your company to the respective section (or even adding a new section if the service doesn't fit into existing categories). The easiest way to open a pull-request for documentation page is by using a “pencil” edit button in the top-right corner. If your service available in some local market, make sure to mention it in a localized documentation page as well (or at least point it out in a pull-request description).
+    If you happen to represent one of them, feel free to open a pull request adding your company to the respective section (or even adding a new section if the service doesn’t fit into existing categories). The easiest way to open a pull-request for documentation page is by using a “pencil” edit button in the top-right corner. If your service available in some local market, make sure to mention it in a localized documentation page as well (or at least point it out in a pull-request description).
--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@ -5,7 +5,7 @@ toc_title: Architecture Overview

 # Overview of ClickHouse Architecture {#overview-of-clickhouse-architecture}

-ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns). Whenever possible, operations are dispatched on arrays, rather than on individual values. It is called "vectorized query execution" and it helps lower the cost of actual data processing.
+ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns). Whenever possible, operations are dispatched on arrays, rather than on individual values. It is called “vectorized query execution” and it helps lower the cost of actual data processing.

 > This idea is nothing new. It dates back to the `APL` (A programming language, 1957) and its descendants: `A +` (APL dialect), `J` (1990), `K` (1993), and `Q` (programming language from Kx Systems, 2003). Array programming is used in scientific data processing. Neither is this idea something new in relational databases: for example, it is used in the `VectorWise` system (also known as Actian Vector Analytic Database by Actian Corporation).

@ -21,11 +21,11 @@ Various `IColumn` implementations (`ColumnUInt8`, `ColumnString`, and so on) are

 Nevertheless, it is possible to work with individual values as well. To represent an individual value, the `Field` is used. `Field` is just a discriminated union of `UInt64`, `Int64`, `Float64`, `String` and `Array`. `IColumn` has the `operator []` method to get the n-th value as a `Field`, and the `insert` method to append a `Field` to the end of a column. These methods are not very efficient, because they require dealing with temporary `Field` objects representing an individual value. There are more efficient methods, such as `insertFrom`, `insertRangeFrom`, and so on.

-`Field` doesn't have enough information about a specific data type for a table. For example, `UInt8`, `UInt16`, `UInt32`, and `UInt64` are all represented as `UInt64` in a `Field`.
+`Field` doesn’t have enough information about a specific data type for a table. For example, `UInt8`, `UInt16`, `UInt32`, and `UInt64` are all represented as `UInt64` in a `Field`.

 ## Leaky Abstractions {#leaky-abstractions}

-`IColumn` has methods for common relational transformations of data, but they don’t meet all needs. For example, `ColumnUInt64` doesn't have a method to calculate the sum of two columns, and `ColumnString` doesn't have a method to run a substring search. These countless routines are implemented outside of `IColumn`.
+`IColumn` has methods for common relational transformations of data, but they don’t meet all needs. For example, `ColumnUInt64` doesn’t have a method to calculate the sum of two columns, and `ColumnString` doesn’t have a method to run a substring search. These countless routines are implemented outside of `IColumn`.

 Various functions on columns can be implemented in a generic, non-efficient way using `IColumn` methods to extract `Field` values, or in a specialized way using knowledge of inner memory layout of data in a specific `IColumn` implementation. It is implemented by casting functions to a specific `IColumn` type and deal with internal representation directly. For example, `ColumnUInt64` has the `getData` method that returns a reference to an internal array, then a separate routine reads or fills that array directly. We have “leaky abstractions” to allow efficient specializations of various routines.

@ -35,7 +35,7 @@ Various functions on columns can be implemented in a generic, non-efficient way

 `IDataType` and `IColumn` are only loosely related to each other. Different data types can be represented in memory by the same `IColumn` implementations. For example, `DataTypeUInt32` and `DataTypeDateTime` are both represented by `ColumnUInt32` or `ColumnConstUInt32`. In addition, the same data type can be represented by different `IColumn` implementations. For example, `DataTypeUInt8` can be represented by `ColumnUInt8` or `ColumnConstUInt8`.

-`IDataType` only stores metadata. For instance, `DataTypeUInt8` doesn't store anything at all (except virtual pointer `vptr`) and `DataTypeFixedString` stores just `N` (the size of fixed-size strings).
+`IDataType` only stores metadata. For instance, `DataTypeUInt8` doesn’t store anything at all (except virtual pointer `vptr`) and `DataTypeFixedString` stores just `N` (the size of fixed-size strings).

 `IDataType` has helper methods for various data formats. Examples are methods to serialize a value with possible quoting, to serialize a value for JSON, and to serialize a value as part of the XML format. There is no direct correspondence to data formats. For example, the different data formats `Pretty` and `TabSeparated` can use the same `serializeTextEscaped` helper method from the `IDataType` interface.

@ -120,9 +120,9 @@ There are ordinary functions and aggregate functions. For aggregate functions, s

 Ordinary functions don’t change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`’s of data to implement vectorized query execution.

-There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulatexploit block processing and violate the independence of rows.
+There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and \[runningAccumulate\](../sql-reference/functions/other-functions.md\#runningaccumulatexploit block processing and violate the independence of rows.

-ClickHouse has strong typing, so there’s no implicit type conversion. If a function doesn't support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function.
+ClickHouse has strong typing, so there’s no implicit type conversion. If a function doesn’t support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function.

 Implementing a function may be slightly inconvenient because a function explicitly dispatches supported data types and supported `IColumns`. For example, the `plus` function has code generated by instantiation of a C++ template for each combination of numeric types, and constant or non-constant left and right arguments.

@ -169,13 +169,13 @@ There is no global query plan for distributed query execution. Each node has its

 `MergeTree` is a family of storage engines that supports indexing by primary key. The primary key can be an arbitrary tuple of columns or expressions. Data in a `MergeTree` table is stored in “parts”. Each part stores data in the primary key order, so data is ordered lexicographically by the primary key tuple. All the table columns are stored in separate `column.bin` files in these parts. The files consist of compressed blocks. Each block is usually from 64 KB to 1 MB of uncompressed data, depending on the average value size. The blocks consist of column values placed contiguously one after the other. Column values are in the same order for each column (the primary key defines the order), so when you iterate by many columns, you get values for the corresponding rows.

-The primary key itself is “sparse”. It doesn't address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks,” which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached.
+The primary key itself is “sparse”. It doesn’t address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks,” which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached.

 When we are going to read something from a part in `MergeTree`, we look at `primary.idx` data and locate ranges that could contain requested data, then look at `column.mrk` data and calculate offsets for where to start reading those ranges. Because of sparseness, excess data may be read. ClickHouse is not suitable for a high load of simple point queries, because the entire range with `index_granularity` rows must be read for each key, and the entire compressed block must be decompressed for each column. We made the index sparse because we must be able to maintain trillions of rows per single server without noticeable memory consumption for the index. Also, because the primary key is sparse, it is not unique: it cannot check the existence of the key in the table at INSERT time. You could have many rows with the same key in a table.

 When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. That’s why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts.

-`MergeTree` is not an LSM tree because it doesn't contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications.
+`MergeTree` is not an LSM tree because it doesn’t contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications.

 > MergeTree tables can only have one (primary) index: there aren’t any secondary indices. It would be nice to allow multiple physical representations under one logical table, for example, to store data in more than one physical order or even to allow representations with pre-aggregated data along with original data.

@ -187,7 +187,7 @@ Replication in ClickHouse can be configured on a per-table basis. You could have

 Replication is implemented in the `ReplicatedMergeTree` storage engine. The path in `ZooKeeper` is specified as a parameter for the storage engine. All tables with the same path in `ZooKeeper` become replicas of each other: they synchronize their data and maintain consistency. Replicas can be added and removed dynamically simply by creating or dropping a table.

-Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesn't support UPDATEs, replication is conflict-free. As there is no quorum acknowledgment of inserts, just-inserted data might be lost if one node fails.
+Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesn’t support UPDATEs, replication is conflict-free. As there is no quorum acknowledgment of inserts, just-inserted data might be lost if one node fails.

 Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, and so on. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the “get the part” action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. It is achieved by electing one replica as the leader, and that replica initiates merges and writes “merge parts” actions to the log.

--- a/docs/en/development/browse-code.md
+++ b/docs/en/development/browse-code.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 71
-toc_title: Source Code
+toc_title: Source Code Browser
 ---

 # Browse ClickHouse Source Code {#browse-clickhouse-source-code}
--- a/docs/en/development/build-cross-arm.md
+++ b/docs/en/development/build-cross-arm.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 67
-toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)
+toc_title: Build on Linux for AARCH64 (ARM64)
 ---

 # How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture}
@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl

 The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.

-# Install Clang-8 {#install-clang-8}
+## Install Clang-8 {#install-clang-8}

 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
 For example, in Ubuntu Bionic you can use the following commands:
@ -20,7 +20,7 @@ sudo apt-get update
 sudo apt-get install clang-8
 ```

-# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
+## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}

 ``` bash
 cd ClickHouse
@ -29,7 +29,7 @@ wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel
 tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1
 ```

-# Build ClickHouse {#build-clickhouse}
+## Build ClickHouse {#build-clickhouse}

 ``` bash
 cd ClickHouse
--- a/docs/en/development/build-cross-osx.md
+++ b/docs/en/development/build-cross-osx.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 66
-toc_title: How to Build ClickHouse on Linux for Mac OS X
+toc_title: Build on Linux for Mac OS X
 ---

 # How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl

 The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.

-# Install Clang-8 {#install-clang-8}
+## Install Clang-8 {#install-clang-8}

 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
 For example the commands for Bionic are like:
@ -19,7 +19,7 @@ sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8
 sudo apt-get install clang-8
 ```

-# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
+## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}

 Let’s remember the path where we install `cctools` as ${CCTOOLS}

@ -47,7 +47,7 @@ mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
 tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
 ```

-# Build ClickHouse {#build-clickhouse}
+## Build ClickHouse {#build-clickhouse}

 ``` bash
 cd ClickHouse
--- a/docs/en/development/build-osx.md
+++ b/docs/en/development/build-osx.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 65
-toc_title: How to Build ClickHouse on Mac OS X
+toc_title: Build on Mac OS X
 ---

 # How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
@ -45,14 +45,12 @@ $ cd ..

 ## Caveats {#caveats}

-If you intend to run clickhouse-server, make sure to increase the system’s maxfiles variable.
+If you intend to run `clickhouse-server`, make sure to increase the system’s maxfiles variable.

 !!! info "Note"
    You’ll need to use sudo.

-To do so, create the following file:
-
-/Library/LaunchDaemons/limit.maxfiles.plist:
+To do so, create the `/Library/LaunchDaemons/limit.maxfiles.plist` file with the following content:

 ``` xml
 <?xml version="1.0" encoding="UTF-8"?>
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -1,11 +1,9 @@
 ---
 toc_priority: 64
-toc_title: How to Build ClickHouse on Linux
+toc_title: Build on Linux
 ---

-# How to Build ClickHouse for Development {#how-to-build-clickhouse-for-development}
-
-The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
+# How to Build ClickHouse on Linux {#how-to-build-clickhouse-for-development}

 Supported platforms:

@ -13,7 +11,11 @@ Supported platforms:
 -   AArch64
 -   Power9 (experimental)

-## Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja}
+## Normal Build for Development on Ubuntu
+
+The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
+
+### Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja}

 ``` bash
 $ sudo apt-get install git cmake python ninja-build
@ -21,18 +23,18 @@ $ sudo apt-get install git cmake python ninja-build

 Or cmake3 instead of cmake on older systems.

-## Install GCC 9 {#install-gcc-9}
+### Install GCC 9 {#install-gcc-9}

 There are several ways to do this.

-### Install from Repository {#install-from-repository}
+#### Install from Repository {#install-from-repository}

 On Ubuntu 19.10 or newer:

    $ sudo apt-get update
    $ sudo apt-get install gcc-9 g++-9

-### Install from a PPA Package {#install-from-a-ppa-package}
+#### Install from a PPA Package {#install-from-a-ppa-package}

 On older Ubuntu:

@ -43,18 +45,18 @@ $ sudo apt-get update
 $ sudo apt-get install gcc-9 g++-9
 ```

-### Install from Sources {#install-from-sources}
+#### Install from Sources {#install-from-sources}

 See [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh)

-## Use GCC 9 for Builds {#use-gcc-9-for-builds}
+### Use GCC 9 for Builds {#use-gcc-9-for-builds}

 ``` bash
 $ export CC=gcc-9
 $ export CXX=g++-9
 ```

-## Checkout ClickHouse Sources {#checkout-clickhouse-sources}
+### Checkout ClickHouse Sources {#checkout-clickhouse-sources}

 ``` bash
 $ git clone --recursive git@github.com:ClickHouse/ClickHouse.git
@ -66,7 +68,7 @@ or
 $ git clone --recursive https://github.com/ClickHouse/ClickHouse.git
 ```

-## Build ClickHouse {#build-clickhouse}
+### Build ClickHouse {#build-clickhouse}

 ``` bash
 $ cd ClickHouse
@ -79,7 +81,7 @@ $ ninja
 To create an executable, run `ninja clickhouse`.
 This will create the `programs/clickhouse` executable, which can be used with `client` or `server` arguments.

-# How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux}
+## How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux}

 The build requires the following components:

@ -93,32 +95,58 @@ The build requires the following components:
 If all the components are installed, you may build in the same way as the steps above.

 Example for Ubuntu Eoan:
-
-    sudo apt update
-    sudo apt install git cmake ninja-build g++ python
-    git clone --recursive https://github.com/ClickHouse/ClickHouse.git
-    mkdir build && cd build
-    cmake ../ClickHouse
-    ninja
+``` bash
+sudo apt update
+sudo apt install git cmake ninja-build g++ python
+git clone --recursive https://github.com/ClickHouse/ClickHouse.git
+mkdir build && cd build
+cmake ../ClickHouse
+ninja
+```

 Example for OpenSUSE Tumbleweed:
-
-    sudo zypper install git cmake ninja gcc-c++ python lld
-    git clone --recursive https://github.com/ClickHouse/ClickHouse.git
-    mkdir build && cd build
-    cmake ../ClickHouse
-    ninja
+``` bash
+sudo zypper install git cmake ninja gcc-c++ python lld
+git clone --recursive https://github.com/ClickHouse/ClickHouse.git
+mkdir build && cd build
+cmake ../ClickHouse
+ninja
+```

 Example for Fedora Rawhide:
+``` bash
+sudo yum update
+yum --nogpg install git cmake make gcc-c++ python2
+git clone --recursive https://github.com/ClickHouse/ClickHouse.git
+mkdir build && cd build
+cmake ../ClickHouse
+make -j $(nproc)
+```

-    sudo yum update
-    yum --nogpg install git cmake make gcc-c++ python2
-    git clone --recursive https://github.com/ClickHouse/ClickHouse.git
-    mkdir build && cd build
-    cmake ../ClickHouse
-    make -j $(nproc)

-# You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
+## How to Build ClickHouse Debian Package {#how-to-build-clickhouse-debian-package}
+
+### Install Git and Pbuilder {#install-git-and-pbuilder}
+
+``` bash
+$ sudo apt-get update
+$ sudo apt-get install git python pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring
+```
+
+### Checkout ClickHouse Sources {#checkout-clickhouse-sources-1}
+
+``` bash
+$ git clone --recursive --branch master https://github.com/ClickHouse/ClickHouse.git
+$ cd ClickHouse
+```
+
+### Run Release Script {#run-release-script}
+
+``` bash
+$ ./release
+```
+
+## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}

 ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.

@ -126,26 +154,4 @@ They are built for stable, prestable and testing releases as long as for every c

 To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green checkmark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”.

-# How to Build ClickHouse Debian Package {#how-to-build-clickhouse-debian-package}
-
-## Install Git and Pbuilder {#install-git-and-pbuilder}
-
-``` bash
-$ sudo apt-get update
-$ sudo apt-get install git python pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring
-```
-
-## Checkout ClickHouse Sources {#checkout-clickhouse-sources-1}
-
-``` bash
-$ git clone --recursive --branch master https://github.com/ClickHouse/ClickHouse.git
-$ cd ClickHouse
-```
-
-## Run Release Script {#run-release-script}
-
-``` bash
-$ ./release
-```
-
 [Original article](https://clickhouse.tech/docs/en/development/build/) <!--hide-->
--- a/docs/en/development/contrib.md
+++ b/docs/en/development/contrib.md
@ -35,6 +35,7 @@ toc_title: Third-Party Libraries Used
 | poco                | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE)      |
 | protobuf            | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE)                  |
 | re2                 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE)                                  |
+| sentry-native       | [MIT License](https://github.com/getsentry/sentry-native/blob/master/LICENSE)                                                                 |
 | UnixODBC            | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168)                                     |
 | zlib-ng             | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md)                                                         |
 | zstd                | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE)                                                                    |
--- a/docs/en/development/style.md
+++ b/docs/en/development/style.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 68
-toc_title: How to Write C++ Code
+toc_title: C++ Guide
 ---

 # How to Write C++ Code {#how-to-write-c-code}
--- a/docs/en/development/tests.md
+++ b/docs/en/development/tests.md
@ -1,6 +1,6 @@
 ---
 toc_priority: 69
-toc_title: How to Run ClickHouse Tests
+toc_title: Testing
 ---

 # ClickHouse Testing {#clickhouse-testing}
@ -25,12 +25,7 @@ Tests should use (create, drop, etc) only tables in `test` database that is assu

 If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`.

-Some tests are marked with `zookeeper`, `shard` or `long` in their names.
-`zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that
-requires server to listen `127.0.0.*`; `distributed` or `global` have the same
-meaning. `long` is for tests that run slightly longer that one second. You can
-disable these groups of tests using `--no-zookeeper`, `--no-shard` and
-`--no-long` options, respectively.
+Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively.

 ## Known Bugs {#known-bugs}

@ -153,11 +148,11 @@ Motivation:

 Normally we release and run all tests on a single variant of ClickHouse build. But there are alternative build variants that are not thoroughly tested. Examples:

-   build on FreeBSD;
-   build on Debian with libraries from system packages;
-   build with shared linking of libraries;
-   build on AArch64 platform;
-   build on PowerPc platform.
+-   build on FreeBSD
+-   build on Debian with libraries from system packages
+-   build with shared linking of libraries
+-   build on AArch64 platform
+-   build on PowerPc platform

 For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts.

@ -177,22 +172,22 @@ For production builds, gcc is used (it still generates slightly more efficient c

 ## Sanitizers {#sanitizers}

-**Address sanitizer**.
+### Address sanitizer
 We run functional and integration tests under ASan on per-commit basis.

-**Valgrind (Memcheck)**.
+### Valgrind (Memcheck)
 We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).

-**Undefined behaviour sanitizer.**
+### Undefined behaviour sanitizer
 We run functional and integration tests under ASan on per-commit basis.

-**Thread sanitizer**.
+### Thread sanitizer
 We run functional tests under TSan on per-commit basis. We still don’t run integration tests under TSan on per-commit basis.

-**Memory sanitizer**.
+### Memory sanitizer
 Currently we still don’t use MSan.

-**Debug allocator.**
+### Debug allocator
 Debug version of `jemalloc` is used for debug build.

 ## Fuzzing {#fuzzing}
@ -227,7 +222,7 @@ If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of t

 ## Code Style {#code-style}

-Code style rules are described [here](https://clickhouse.tech/docs/en/development/style/).
+Code style rules are described [here](style.md).

 To check for some common style violations, you can use `utils/check-style` script.

--- a/docs/en/engines/index.md
+++ b/docs/en/engines/index.md
@ -5,11 +5,11 @@ toc_priority: 25
 toc_title: hidden
 ---

-# ClickHouse Engines
+# ClickHouse Engines {#clickhouse-engines}

 There are two key engine kinds in ClickHouse:

-   [Table engines](table-engines/index.md)
-   [Database engines](database-engines/index.md)
+-   [Table engines](../engines/table-engines/index.md)
+-   [Database engines](../engines/database-engines/index.md)

 {## [Original article](https://clickhouse.tech/docs/en/engines/) ##}
--- a/docs/en/engines/table-engines/integrations/index.md
+++ b/docs/en/engines/table-engines/integrations/index.md
@ -3,14 +3,14 @@ toc_folder_title: Integrations
 toc_priority: 30
 ---

-# Table Engines for Integrations
+# Table Engines for Integrations {#table-engines-for-integrations}

 ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like external dictionaries or table functions, which require to use custom query methods on each use.

 List of supported integrations:

-   [ODBC](odbc.md)
-   [JDBC](jdbc.md)
-   [MySQL](mysql.md)
-   [HDFS](hdfs.md)
-   [Kafka](kafka.md)
+-   [ODBC](../../../engines/table-engines/integrations/odbc.md)
+-   [JDBC](../../../engines/table-engines/integrations/jdbc.md)
+-   [MySQL](../../../engines/table-engines/integrations/mysql.md)
+-   [HDFS](../../../engines/table-engines/integrations/hdfs.md)
+-   [Kafka](../../../engines/table-engines/integrations/kafka.md)
--- a/docs/en/engines/table-engines/integrations/mysql.md
+++ b/docs/en/engines/table-engines/integrations/mysql.md
@ -18,7 +18,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 ) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
 ```

-See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
+See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.

 The table structure can differ from the original MySQL table structure:

--- a/docs/en/engines/table-engines/integrations/odbc.md
+++ b/docs/en/engines/table-engines/integrations/odbc.md
@ -23,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 ENGINE = ODBC(connection_settings, external_database, external_table)
 ```

-See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
+See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.

 The table structure can differ from the source table structure:

--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@ -0,0 +1,122 @@
+---
+toc_priority: 6
+toc_title: RabbitMQ
+---
+
+# RabbitMQ Engine {#rabbitmq-engine}
+
+This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.com).
+
+RabbitMQ lets you:
+
+-   Publish or subscribe to data flows.
+-   Process streams as they become available.
+
+## Creating a Table {#table_engine-rabbitmq-creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
+(
+    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
+    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
+    ...
+) ENGINE = RabbitMQ SETTINGS
+    rabbitmq_host_port = 'host:port',
+    rabbitmq_exchange_name = 'exchange_name',
+    rabbitmq_format = 'data_format'[,]
+    [rabbitmq_exchange_type = 'exchange_type',]
+    [rabbitmq_routing_key_list = 'key1,key2,...',]
+    [rabbitmq_row_delimiter = 'delimiter_symbol',]
+    [rabbitmq_num_consumers = N,]
+    [rabbitmq_num_queues = N,]
+    [rabbitmq_transactional_channel = 0]
+```
+
+Required parameters:
+
+-   `rabbitmq_host_port` – host:port (for example, `localhost:5672`).
+-   `rabbitmq_exchange_name` – RabbitMQ exchange name.
+-   `rabbitmq_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
+
+Optional parameters:
+
+-   `rabbitmq_exchange_type` – The type of RabbitMQ exchange: `direct`, `fanout`, `topic`, `headers`, `consistent-hash`. Default: `fanout`.
+-   `rabbitmq_routing_key_list` – A comma-separated list of routing keys.
+-   `rabbitmq_row_delimiter` – Delimiter character, which ends the message.
+-   `rabbitmq_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient.
+-   `rabbitmq_num_queues` – The number of queues per consumer. Default: `1`. Specify more queues if the capacity of one queue per consumer is insufficient. Single queue can contain up to 50K messages at the same time.
+-   `rabbitmq_transactional_channel` – Wrap insert queries in transactions. Default: `0`.
+
+Required configuration:
+
+The RabbitMQ server configuration should be added using the ClickHouse config file.
+
+``` xml
+ <rabbitmq>
+    <username>root</username>
+    <password>clickhouse</password>
+ </rabbitmq>
+```
+
+Example:
+
+``` sql
+  CREATE TABLE queue (
+    key UInt64,
+    value UInt64
+  ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
+                            rabbitmq_exchange_name = 'exchange1',
+                            rabbitmq_format = 'JSONEachRow',
+                            rabbitmq_num_consumers = 5;
+```
+
+## Description {#description}
+
+`SELECT` is not particularly useful for reading messages (except for debugging), because each message can be read only once. It is more practical to create real-time threads using materialized views. To do this:
+
+1.  Use the engine to create a RabbitMQ consumer and consider it a data stream.
+2.  Create a table with the desired structure.
+3.  Create a materialized view that converts data from the engine and puts it into a previously created table.
+
+When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from RabbitMQ and convert them to the required format using `SELECT`.
+One RabbitMQ table can have as many materialized views as you like.
+
+Data can be channeled based on `rabbitmq_exchange_type` and the specified `rabbitmq_routing_key_list`.
+There can be no more than one exchange per table. One exchange can be shared between multiple tables - it enables routing into multiple tables at the same time.
+
+Exchange type options:
+
+-   `direct` - Routing is based on exact matching of keys. Example table key list: `key1,key2,key3,key4,key5`, message key can eqaul any of them.
+-   `fanout` - Routing to all tables (where exchange name is the same) regardless of the keys.
+-   `topic` - Routing is based on patterns with dot-separated keys. Examples: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`.
+-   `headers` - Routing is based on `key=value` matches with a setting `x-match=all` or `x-match=any`. Example table key list: `x-match=all,format=logs,type=report,year=2020`.
+-   `consistent-hash` - Data is evenly distributed between all bound tables (where exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
+
+If exchange type is not specified, then default is `fanout` and routing keys for data publishing must be randomized in range `[1, num_consumers]` for every message/batch (or in range `[1, num_consumers * num_queues]` if `rabbitmq_num_queues` is set). This table configuration works quicker then any other, especially when `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` parameters are set.
+
+If `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` parameters are specified along with `rabbitmq_exchange_type`, then:
+
+-   `rabbitmq-consistent-hash-exchange` plugin must be enabled.
+-   `message_id` property of the published messages must be specified (unique for each message/batch).
+
+Example:
+
+``` sql
+  CREATE TABLE queue (
+    key UInt64,
+    value UInt64
+  ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
+                            rabbitmq_exchange_name = 'exchange1',
+                            rabbitmq_exchange_type = 'headers',
+                            rabbitmq_routing_key_list = 'format=logs,type=report,year=2020',
+                            rabbitmq_format = 'JSONEachRow',
+                            rabbitmq_num_consumers = 5;
+
+  CREATE TABLE daily (key UInt64, value UInt64)
+    ENGINE = MergeTree();
+
+  CREATE MATERIALIZED VIEW consumer TO daily
+    AS SELECT key, value FROM queue;
+
+  SELECT key, value FROM daily ORDER BY key;
+```
--- a/docs/en/engines/table-engines/log-family/index.md
+++ b/docs/en/engines/table-engines/log-family/index.md
@ -1,6 +1,45 @@
 ---
 toc_folder_title: Log Family
 toc_priority: 29
+toc_title: Introduction
 ---

+# Log Engine Family {#log-engine-family}

+These engines were developed for scenarios when you need to quickly write many small tables (up to about 1 million rows) and read them later as a whole.
+
+Engines of the family:
+
+-   [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
+-   [Log](../../../engines/table-engines/log-family/log.md)
+-   [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
+
+## Common Properties {#common-properties}
+
+Engines:
+
+-   Store data on a disk.
+
+-   Append data to the end of file when writing.
+
+-   Support locks for concurrent data access.
+
+    During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently.
+
+-   Do not support [mutations](../../../sql-reference/statements/alter/index.md#alter-mutations).
+
+-   Do not support indexes.
+
+    This means that `SELECT` queries for ranges of data are not efficient.
+
+-   Do not write data atomically.
+
+    You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown.
+
+## Differences {#differences}
+
+The `TinyLog` engine is the simplest in the family and provides the poorest functionality and lowest efficiency. The `TinyLog` engine doesn’t support parallel data reading by several threads in a single query. It reads data slower than other engines in the family that support parallel reading from a single query and it uses almost as many file descriptors as the `Log` engine because it stores each column in a separate file. Use it only in simple scenarios.
+
+The `Log` and `StripeLog` engines support parallel data reading. When reading data, ClickHouse uses multiple threads. Each thread processes a separate data block. The `Log` engine uses a separate file for each column of the table. `StripeLog` stores all the data in one file. As a result, the `StripeLog` engine uses fewer file descriptors, but the `Log` engine provides higher efficiency when reading data.
+
+[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log_family/) <!--hide-->
--- a/docs/en/engines/table-engines/log-family/log-family.md
+++ b/docs/en/engines/table-engines/log-family/log-family.md
@ -1,44 +0,0 @@
---
-toc_priority: 31
-toc_title: Introduction
---
-
-# Log Engine Family {#log-engine-family}
-
-These engines were developed for scenarios when you need to quickly write many small tables (up to about 1 million rows) and read them later as a whole.
-
-Engines of the family:
-
-   [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
-   [Log](../../../engines/table-engines/log-family/log.md)
-   [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
-
-## Common Properties {#common-properties}
-
-Engines:
-
-   Store data on a disk.
-
-   Append data to the end of file when writing.
-
-   Support locks for concurrent data access.
-
-    During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently.
-
-   Do not support [mutation](../../../sql-reference/statements/alter.md#alter-mutations) operations.
-
-   Do not support indexes.
-
-    This means that `SELECT` queries for ranges of data are not efficient.
-
-   Do not write data atomically.
-
-    You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown.
-
-## Differences {#differences}
-
-The `TinyLog` engine is the simplest in the family and provides the poorest functionality and lowest efficiency. The `TinyLog` engine doesn’t support parallel data reading by several threads. It reads data slower than other engines in the family that support parallel reading and it uses almost as many descriptors as the `Log` engine because it stores each column in a separate file. Use it in simple low-load scenarios.
-
-The `Log` and `StripeLog` engines support parallel data reading. When reading data, ClickHouse uses multiple threads. Each thread processes a separate data block. The `Log` engine uses a separate file for each column of the table. `StripeLog` stores all the data in one file. As a result, the `StripeLog` engine uses fewer descriptors in the operating system, but the `Log` engine provides higher efficiency when reading data.
-
-[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log_family/) <!--hide-->
--- a/docs/en/engines/table-engines/log-family/log.md
+++ b/docs/en/engines/table-engines/log-family/log.md
@ -5,7 +5,7 @@ toc_title: Log

 # Log {#log}

-Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
+Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/index.md) article.

 Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
 For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
--- a/docs/en/engines/table-engines/log-family/stripelog.md
+++ b/docs/en/engines/table-engines/log-family/stripelog.md
@ -5,7 +5,7 @@ toc_title: StripeLog

 # Stripelog {#stripelog}

-This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
+This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/index.md) article.

 Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).

@ -20,7 +20,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 ) ENGINE = StripeLog
 ```

-See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
+See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.

 ## Writing the Data {#table_engines-stripelog-writing-the-data}

--- a/docs/en/engines/table-engines/log-family/tinylog.md
+++ b/docs/en/engines/table-engines/log-family/tinylog.md
@ -5,7 +5,7 @@ toc_title: TinyLog

 # TinyLog {#tinylog}

-The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) for common properties of log engines and their differences.
+The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/index.md) for common properties of log engines and their differences.

 This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.

--- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
@ -32,7 +32,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 [SETTINGS name=value, ...]
 ```

-For a description of request parameters, see [request description](../../../sql-reference/statements/create.md).
+For a description of request parameters, see [request description](../../../sql-reference/statements/create/table.md).

 **Query clauses**

--- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md
@ -26,7 +26,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 [SETTINGS name=value, ...]
 ```

-For a description of query parameters, see [query description](../../../sql-reference/statements/create.md).
+For a description of query parameters, see [query description](../../../sql-reference/statements/create/table.md).

 **CollapsingMergeTree Parameters**

--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@ -77,7 +77,7 @@ Let’s break down the name of the first part: `201901_1_3_1`:

 The `active` column shows the status of the part. `1` is active; `0` is inactive. The inactive parts are, for example, source parts remaining after merging to a larger part. The corrupted data parts are also indicated as inactive.

-As you can see in the example, there are several separated parts of the same partition (for example, `201901_1_3_1` and `201901_1_9_2`). This means that these parts are not merged yet. ClickHouse merges the inserted parts of data periodically, approximately 15 minutes after inserting. In addition, you can perform a non-scheduled merge using the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query. Example:
+As you can see in the example, there are several separated parts of the same partition (for example, `201901_1_3_1` and `201901_1_9_2`). This means that these parts are not merged yet. ClickHouse merges the inserted parts of data periodically, approximately 15 minutes after inserting. In addition, you can perform a non-scheduled merge using the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query. Example:

 ``` sql
 OPTIMIZE TABLE visits PARTITION 201902;
@ -116,10 +116,10 @@ drwxr-xr-x 2 clickhouse clickhouse 4096 Feb  1 16:48 detached

 The folders ‘201901\_1\_1\_0’, ‘201901\_1\_7\_1’ and so on are the directories of the parts. Each part relates to a corresponding partition and contains data just for a certain month (the table in this example has partitioning by month).

-The `detached` directory contains parts that were detached from the table using the [DETACH](../../../sql-reference/statements/alter.md#alter_detach-partition) query. The corrupted parts are also moved to this directory, instead of being deleted. The server does not use the parts from the `detached` directory. You can add, delete, or modify the data in this directory at any time – the server will not know about this until you run the [ATTACH](../../../sql-reference/statements/alter.md#alter_attach-partition) query.
+The `detached` directory contains parts that were detached from the table using the [DETACH](../../../sql-reference/statements/alter/partition.md#alter_detach-partition) query. The corrupted parts are also moved to this directory, instead of being deleted. The server does not use the parts from the `detached` directory. You can add, delete, or modify the data in this directory at any time – the server will not know about this until you run the [ATTACH](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query.

 Note that on the operating server, you cannot manually change the set of parts or their data on the file system, since the server will not know about it. For non-replicated tables, you can do this when the server is stopped, but it isn’t recommended. For replicated tables, the set of parts cannot be changed in any case.

-ClickHouse allows you to perform operations with the partitions: delete them, copy from one table to another, or create a backup. See the list of all operations in the section [Manipulations With Partitions and Parts](../../../sql-reference/statements/alter.md#alter_manipulations-with-partitions).
+ClickHouse allows you to perform operations with the partitions: delete them, copy from one table to another, or create a backup. See the list of all operations in the section [Manipulations With Partitions and Parts](../../../sql-reference/statements/alter/partition.md#alter_manipulations-with-partitions).

 [Original article](https://clickhouse.tech/docs/en/operations/table_engines/custom_partitioning_key/) <!--hide-->
--- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md
@ -28,7 +28,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 [SETTINGS name=value, ...]
 ```

-See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
+See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.

 A table for the Graphite data should have the following columns for the following data:

--- a/docs/en/engines/table-engines/mergetree-family/index.md
+++ b/docs/en/engines/table-engines/mergetree-family/index.md
@ -1,6 +1,17 @@
 ---
 toc_folder_title: MergeTree Family
 toc_priority: 28
+toc_title: Introduction
 ---

+# MergeTree Engine Family {#mergetree-engine-family}

+Table engines from the MergeTree family are the core of ClickHouse data storage capabilities. They provide most features for resilience and high-performance data retrieval: columnar storage, custom partitioning, sparse primary index, secondary data-skipping indexes, etc.
+
+Base [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) table engine can be considered the default table engine for single-node ClickHouse instances because it is versatile and practical for a wide range of use cases.
+
+For production usage [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) is the way to go, because it adds high-availability to all features of regular MergeTree engine. A bonus is automatic data deduplication on data ingestion, so the software can safely retry if there was some network issue during insert.
+
+All other engines of MergeTree family add extra functionality for some specific use cases. Usually, it’s implemented as additional data manipulation in background.
+
+The main downside of MergeTree engines is that they are rather heavy-weight. So the typical pattern is to have not so many of them. If you need many small tables, for example for temporary data, consider [Log engine family](../../../engines/table-engines/log-family/index.md).
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -49,7 +49,7 @@ ORDER BY expr
 [SETTINGS name=value, ...]
 ```

-For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md).
+For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create/table.md).

 ### Query Clauses {#mergetree-query-clauses}

@ -96,6 +96,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
    -   `write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Don’t turn it off.
    -   `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
    -   `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
+    -   `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage).

 **Example of Sections Setting**

@ -149,6 +150,10 @@ When data is inserted in a table, separate data parts are created and each of th

 Data belonging to different partitions are separated into different parts. In the background, ClickHouse merges data parts for more efficient storage. Parts belonging to different partitions are not merged. The merge mechanism does not guarantee that all rows with the same primary key will be in the same data part.

+Data parts can be stored in `Wide` or `Compact` format. In `Wide` format each column is stored in a separate file in a filesystem, in `Compact` format all columns are stored in one file. `Compact` format can be used to increase performance of small and frequent inserts. 
+
+Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the table engine. If the number of bytes or rows in a data part is less then the corresponding setting's value, the part is stored in `Compact` format. Otherwise it is stored in `Wide` format. If none of these settings is set, data parts are stored in `Wide` format.
+
 Each data part is logically divided into granules. A granule is the smallest indivisible data set that ClickHouse reads when selecting data. ClickHouse doesn’t split rows or values, so each granule always contains an integer number of rows. The first row of a granule is marked with the value of the primary key for the row. For each data part, ClickHouse creates an index file that stores the marks. For each column, whether it’s in the primary key or not, ClickHouse also stores the same marks. These marks let you find data directly in column files.

 The granule size is restricted by the `index_granularity` and `index_granularity_bytes` settings of the table engine. The number of rows in a granule lays in the `[1, index_granularity]` range, depending on the size of the rows. The size of a granule can exceed `index_granularity_bytes` if the size of a single row is greater than the value of the setting. In this case, the size of the granule equals the size of the row.
@ -212,7 +217,7 @@ This feature is helpful when using the [SummingMergeTree](../../../engines/table

 In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple.

-[ALTER](../../../sql-reference/statements/alter.md) of the sorting key is a lightweight operation because when a new column is simultaneously added to the table and to the sorting key, existing data parts don’t need to be changed. Since the old sorting key is a prefix of the new sorting key and there is no data in the newly added column, the data is sorted by both the old and new sorting keys at the moment of table modification.
+[ALTER](../../../sql-reference/statements/alter/index.md) of the sorting key is a lightweight operation because when a new column is simultaneously added to the table and to the sorting key, existing data parts don’t need to be changed. Since the old sorting key is a prefix of the new sorting key and there is no data in the newly added column, the data is sorted by both the old and new sorting keys at the moment of table modification.

 ### Use of Indexes and Partitions in Queries {#use-of-indexes-and-partitions-in-queries}

@ -482,7 +487,7 @@ Data with an expired TTL is removed when ClickHouse merges data parts.

 When ClickHouse see that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set `merge_with_ttl_timeout`. If the value is too low, it will perform many off-schedule merges that may consume a lot of resources.

-If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query before `SELECT`.
+If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query before `SELECT`.

 ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}

@ -490,7 +495,7 @@ If you perform the `SELECT` query between merges, you may get expired data. To a

 `MergeTree` family table engines can store data on multiple block devices. For example, it can be useful when the data of a certain table are implicitly split into “hot” and “cold”. The most recent data is regularly requested but requires only a small amount of space. On the contrary, the fat-tailed historical data is requested rarely. If several disks are available, the “hot” data may be located on fast disks (for example, NVMe SSDs or in memory), while the “cold” data - on relatively slow ones (for example, HDD).

-Data part is the minimum movable unit for `MergeTree`-engine tables. The data belonging to one part are stored on one disk. Data parts can be moved between disks in the background (according to user settings) as well as by means of the [ALTER](../../../sql-reference/statements/alter.md#alter_move-partition) queries.
+Data part is the minimum movable unit for `MergeTree`-engine tables. The data belonging to one part are stored on one disk. Data parts can be moved between disks in the background (according to user settings) as well as by means of the [ALTER](../../../sql-reference/statements/alter/partition.md#alter_move-partition) queries.

 ### Terms {#terms}

@ -636,9 +641,9 @@ The number of threads performing background moves of data parts can be changed b
 In the case of `MergeTree` tables, data is getting to disk in different ways:

 -   As a result of an insert (`INSERT` query).
-   During background merges and [mutations](../../../sql-reference/statements/alter.md#alter-mutations).
+-   During background merges and [mutations](../../../sql-reference/statements/alter/index.md#alter-mutations).
 -   When downloading from another replica.
-   As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition).
+-   As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter/partition.md#alter_freeze-partition).

 In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy:

@ -650,7 +655,7 @@ Under the hood, mutations and partition freezing make use of [hard links](https:
 In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
 Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.

-User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.
+User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter/partition.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.

 Moving data does not interfere with data replication. Therefore, different storage policies can be specified for the same table on different replicas.

--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00`
				`@ -0,0 +1 @@`
				`Subproject commit b6dd459c10a88c7ea04693c007e9e35820c5d9ad`