Merge remote-tracking branch 'upstream/master' into fix27

2024-11-25 00:52:02 +00:00 · 2020-01-08 18:43:54 +03:00 · 2020-01-08 18:43:54 +03:00 · 674d34e93e
commit 674d34e93e
parent 4bb320627c 8140b2f75a
290 changed files with 4010 additions and 1705 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -134,3 +134,6 @@
 [submodule "contrib/libc-headers"]
 	path = contrib/libc-headers
 	url = https://github.com/ClickHouse-Extras/libc-headers.git
+[submodule "contrib/ryu"]
+	path = contrib/ryu
+	url = https://github.com/ClickHouse-Extras/ryu.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -210,7 +210,7 @@ set (CMAKE_C_FLAGS_DEBUG                 "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3

 if (COMPILER_CLANG)
    # Exception unwinding doesn't work in clang release build without this option
-    # TODO investigate if contrib/libcxxabi is out of date
+    # TODO investigate that
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer")
 endif ()
--- a/cmake/find/icu.cmake
+++ b/cmake/find/icu.cmake
@ -1,4 +1,8 @@
-option(ENABLE_ICU "Enable ICU" ${ENABLE_LIBRARIES})
+if (OS_LINUX)
+    option(ENABLE_ICU "Enable ICU" ${ENABLE_LIBRARIES})
+else ()
+    option(ENABLE_ICU "Enable ICU" 0)
+endif ()

 if (ENABLE_ICU)

--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -32,6 +32,8 @@ if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
    add_subdirectory (double-conversion-cmake)
 endif ()

+add_subdirectory (ryu-cmake)
+
 if (USE_INTERNAL_CITYHASH_LIBRARY)
    add_subdirectory (cityhash102)
 endif ()
--- a/contrib/libc-headers
+++ b/contrib/libc-headers
@ -1 +1 @@
-Subproject commit cd82fd9d8eefe50a47a0adf7c617c3ea7d558d11
+Subproject commit 9676d2645a713e679dc981ffd84dee99fcd68b8e
--- a/contrib/libcxx
+++ b/contrib/libcxx
@ -1 +1 @@
-Subproject commit f7c63235238a71b7e0563fab8c7c5ec1b54831f6
+Subproject commit a8c453300879d0bf255f9d5959d42e2c8aac1bfb
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@ -47,6 +47,11 @@ add_library(cxx ${SRCS})
 target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
 target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)

+# Enable capturing stack traces for all exceptions.
+if (USE_UNWIND)
+    target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1)
+endif ()
+
 target_compile_options(cxx PUBLIC $<$<COMPILE_LANGUAGE:CXX>:-nostdinc++>)

 check_cxx_compiler_flag(-Wreserved-id-macro HAVE_WARNING_RESERVED_ID_MACRO)
--- a/contrib/libcxxabi-cmake/CMakeLists.txt
+++ b/contrib/libcxxabi-cmake/CMakeLists.txt
@ -32,6 +32,11 @@ target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
 target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.
 target_link_libraries(cxxabi PUBLIC ${EXCEPTION_HANDLING_LIBRARY})

+# Enable capturing stack traces for all exceptions.
+if (USE_UNWIND)
+    target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1)
+endif ()
+
 install(
    TARGETS cxxabi
    EXPORT global
--- a/contrib/libhdfs3-cmake/CMake/Platform.cmake
+++ b/contrib/libhdfs3-cmake/CMake/Platform.cmake
@ -7,10 +7,14 @@ ELSE(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")

 IF(CMAKE_COMPILER_IS_GNUCXX)
-    EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
+    EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpfullversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
    
    IF (NOT GCC_COMPILER_VERSION)
-        MESSAGE(FATAL_ERROR "Cannot get gcc version")
+        EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
+
+        IF (NOT GCC_COMPILER_VERSION)
+            MESSAGE(FATAL_ERROR "Cannot get gcc version")
+        ENDIF (NOT GCC_COMPILER_VERSION)
    ENDIF (NOT GCC_COMPILER_VERSION)
    
    STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
--- a/contrib/openssl-cmake/CMakeLists.txt
+++ b/contrib/openssl-cmake/CMakeLists.txt
@ -28,6 +28,8 @@ if (ARCH_AMD64)
    endif ()

    macro(perl_generate_asm FILE_IN FILE_OUT)
+        get_filename_component(DIRNAME ${FILE_OUT} DIRECTORY)
+        file(MAKE_DIRECTORY ${DIRNAME})
        add_custom_command(OUTPUT ${FILE_OUT}
            COMMAND /usr/bin/env perl ${FILE_IN} ${OPENSSL_SYSTEM} ${FILE_OUT}
            # ASM code has broken unwind tables (CFI), strip them.
@ -70,6 +72,8 @@ if (ARCH_AMD64)
 elseif (ARCH_AARCH64)

    macro(perl_generate_asm FILE_IN FILE_OUT)
+        get_filename_component(DIRNAME ${FILE_OUT} DIRECTORY)
+        file(MAKE_DIRECTORY ${DIRNAME})
        add_custom_command(OUTPUT ${FILE_OUT}
            COMMAND /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT})
            # Hope that the ASM code for AArch64 doesn't have broken CFI. Otherwise, add the same sed as for x86_64.
--- a/contrib/ryu
+++ b/contrib/ryu
@ -0,0 +1 @@
+Subproject commit 5b4a853534b47438b4d97935370f6b2397137c2b
--- a/contrib/ryu-cmake/CMakeLists.txt
+++ b/contrib/ryu-cmake/CMakeLists.txt
@ -0,0 +1,10 @@
+SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/ryu)
+
+add_library(ryu
+${LIBRARY_DIR}/ryu/d2fixed.c
+${LIBRARY_DIR}/ryu/d2s.c
+${LIBRARY_DIR}/ryu/f2s.c
+${LIBRARY_DIR}/ryu/generic_128.c
+)
+
+target_include_directories(ryu SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}")
--- a/contrib/zlib-ng
+++ b/contrib/zlib-ng
@ -1 +1 @@
-Subproject commit 5673222fbd37ea89afb2ea73096f9bf5ec68ea31
+Subproject commit bba56a73be249514acfbc7d49aa2a68994dad8ab
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@ -330,6 +330,7 @@ target_link_libraries (clickhouse_common_io
    ${LINK_LIBRARIES_ONLY_ON_X86_64}
        PUBLIC
    ${DOUBLE_CONVERSION_LIBRARIES}
+    ryu
        PUBLIC
    ${Poco_Net_LIBRARY}
    ${Poco_Util_LIBRARY}
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@ -300,7 +300,7 @@ private:
                && std::string::npos == embedded_stack_trace_pos)
            {
                std::cerr << "Stack trace:" << std::endl
-                    << e.getStackTrace().toString();
+                    << e.getStackTraceString();
            }

            /// If exception code isn't zero, we should return non-zero return code anyway.
@ -327,6 +327,78 @@ private:
            || (now.month() == 1 && now.day() <= 5);
    }

+    bool isChineseNewYearMode(const String & local_tz)
+    {
+        /// Days of Dec. 20 in Chinese calendar starting from year 2019 to year 2105
+        static constexpr UInt16 chineseNewYearIndicators[]
+            = {18275, 18659, 19014, 19368, 19752, 20107, 20491, 20845, 21199, 21583, 21937, 22292, 22676, 23030, 23414, 23768, 24122, 24506,
+               24860, 25215, 25599, 25954, 26308, 26692, 27046, 27430, 27784, 28138, 28522, 28877, 29232, 29616, 29970, 30354, 30708, 31062,
+               31446, 31800, 32155, 32539, 32894, 33248, 33632, 33986, 34369, 34724, 35078, 35462, 35817, 36171, 36555, 36909, 37293, 37647,
+               38002, 38386, 38740, 39095, 39479, 39833, 40187, 40571, 40925, 41309, 41664, 42018, 42402, 42757, 43111, 43495, 43849, 44233,
+               44587, 44942, 45326, 45680, 46035, 46418, 46772, 47126, 47510, 47865, 48249, 48604, 48958, 49342};
+        static constexpr size_t N = sizeof(chineseNewYearIndicators) / sizeof(chineseNewYearIndicators[0]);
+
+        /// All time zone names are acquired from https://www.iana.org/time-zones
+        static constexpr const char * chineseNewYearTimeZoneIndicators[] = {
+            /// Time zones celebrating Chinese new year.
+            "Asia/Shanghai",
+            "Asia/Chongqing",
+            "Asia/Harbin",
+            "Asia/Urumqi",
+            "Asia/Hong_Kong",
+            "Asia/Chungking",
+            "Asia/Macao",
+            "Asia/Macau",
+            "Asia/Taipei",
+            "Asia/Singapore",
+
+            /// Time zones celebrating Chinese new year but with different festival names. Let's not print the message for now.
+            // "Asia/Brunei",
+            // "Asia/Ho_Chi_Minh",
+            // "Asia/Hovd",
+            // "Asia/Jakarta",
+            // "Asia/Jayapura",
+            // "Asia/Kashgar",
+            // "Asia/Kuala_Lumpur",
+            // "Asia/Kuching",
+            // "Asia/Makassar",
+            // "Asia/Pontianak",
+            // "Asia/Pyongyang",
+            // "Asia/Saigon",
+            // "Asia/Seoul",
+            // "Asia/Ujung_Pandang",
+            // "Asia/Ulaanbaatar",
+            // "Asia/Ulan_Bator",
+        };
+        static constexpr size_t M = sizeof(chineseNewYearTimeZoneIndicators) / sizeof(chineseNewYearTimeZoneIndicators[0]);
+
+        time_t current_time = time(nullptr);
+
+        if (chineseNewYearTimeZoneIndicators + M
+            == std::find_if(chineseNewYearTimeZoneIndicators, chineseNewYearTimeZoneIndicators + M, [&local_tz](const char * tz)
+            {
+                return tz == local_tz;
+            }))
+            return false;
+
+        /// It's bad to be intrusive.
+        if (current_time % 3 != 0)
+            return false;
+
+        auto days = DateLUT::instance().toDayNum(current_time).toUnderType();
+        for (auto i = 0ul; i < N; ++i)
+        {
+            auto d = chineseNewYearIndicators[i];
+
+            /// Let's celebrate until Lantern Festival
+            if (d <= days && d + 25u >= days)
+                return true;
+            else if (d > days)
+                return false;
+        }
+        return false;
+    }
+
    int mainImpl()
    {
        UseSSL use_ssl;
@ -374,7 +446,7 @@ private:
        connect();

        /// Initialize DateLUT here to avoid counting time spent here as query execution time.
-        DateLUT::instance();
+        const auto local_tz = DateLUT::instance().getTimeZone();
        if (!context.getSettingsRef().use_client_time_zone)
        {
            const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts);
@ -540,7 +612,12 @@ private:

            loop();

-            std::cout << (isNewYearMode() ? "Happy new year." : "Bye.") << std::endl;
+            if (isNewYearMode())
+                std::cout << "Happy new year." << std::endl;
+            else if (isChineseNewYearMode(local_tz))
+                std::cout << "Happy Chinese new year. 春节快乐!" << std::endl;
+            else
+                std::cout << "Bye." << std::endl;
            return 0;
        }
        else
@ -714,7 +791,7 @@ private:

                        if (config().getBool("stacktrace", false))
                            std::cerr << "Stack trace:" << std::endl
-                                      << e.getStackTrace().toString() << std::endl;
+                                      << e.getStackTraceString() << std::endl;

                        std::cerr << std::endl;

--- a/dbms/programs/odbc-bridge/MainHandler.cpp
+++ b/dbms/programs/odbc-bridge/MainHandler.cpp
@ -115,7 +115,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
    catch (const Exception & ex)
    {
        process_error("Invalid 'columns' parameter in request body '" + ex.message() + "'");
-        LOG_WARNING(log, ex.getStackTrace().toString());
+        LOG_WARNING(log, ex.getStackTraceString());
        return;
    }

--- a/dbms/programs/performance-test/PerformanceTest.cpp
+++ b/dbms/programs/performance-test/PerformanceTest.cpp
@ -85,16 +85,6 @@ bool PerformanceTest::checkPreconditions() const

    for (const std::string & precondition : preconditions)
    {
-        if (precondition == "flush_disk_cache")
-        {
-            if (system(
-                    "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')"))
-            {
-                LOG_WARNING(log, "Failed to flush disk cache");
-                return false;
-            }
-        }
-
        if (precondition == "ram_size")
        {
            size_t ram_size_needed = config->getUInt64("preconditions.ram_size");
@ -337,7 +327,7 @@ void PerformanceTest::runQueries(
        {
            statistics.exception = "Code: " + std::to_string(e.code()) + ", e.displayText() = " + e.displayText();
            LOG_WARNING(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText()
-                << ", Stack trace:\n\n" << e.getStackTrace().toString());
+                << ", Stack trace:\n\n" << e.getStackTraceString());
        }

        if (!statistics.got_SIGINT)
--- a/dbms/programs/performance-test/PerformanceTestInfo.cpp
+++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp
@ -45,21 +45,11 @@ namespace fs = std::filesystem;

 PerformanceTestInfo::PerformanceTestInfo(
    XMLConfigurationPtr config,
-    const std::string & profiles_file_,
    const Settings & global_settings_)
-    : profiles_file(profiles_file_)
-    , settings(global_settings_)
+    : settings(global_settings_)
 {
    path = config->getString("path");
    test_name = fs::path(path).stem().string();
-    if (config->has("main_metric"))
-    {
-        Strings main_metrics;
-        config->keys("main_metric", main_metrics);
-        if (main_metrics.size())
-            main_metric = main_metrics[0];
-    }
-
    applySettings(config);
    extractQueries(config);
    extractAuxiliaryQueries(config);
@ -75,38 +65,8 @@ void PerformanceTestInfo::applySettings(XMLConfigurationPtr config)
        SettingsChanges settings_to_apply;
        Strings config_settings;
        config->keys("settings", config_settings);
-
-        auto settings_contain = [&config_settings] (const std::string & setting)
-        {
-            auto position = std::find(config_settings.begin(), config_settings.end(), setting);
-            return position != config_settings.end();
-
-        };
-        /// Preprocess configuration file
-        if (settings_contain("profile"))
-        {
-            if (!profiles_file.empty())
-            {
-                std::string profile_name = config->getString("settings.profile");
-                XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file));
-
-                Strings profile_settings;
-                profiles_config->keys("profiles." + profile_name, profile_settings);
-
-                extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply);
-            }
-        }
-
        extractSettings(config, "settings", config_settings, settings_to_apply);
        settings.applyChanges(settings_to_apply);
-
-        if (settings_contain("average_rows_speed_precision"))
-            TestStats::avg_rows_speed_precision =
-                config->getDouble("settings.average_rows_speed_precision");
-
-        if (settings_contain("average_bytes_speed_precision"))
-            TestStats::avg_bytes_speed_precision =
-                config->getDouble("settings.average_bytes_speed_precision");
    }
 }

--- a/dbms/programs/performance-test/PerformanceTestInfo.h
+++ b/dbms/programs/performance-test/PerformanceTestInfo.h
@ -26,15 +26,13 @@ using StringToVector = std::map<std::string, Strings>;
 class PerformanceTestInfo
 {
 public:
-    PerformanceTestInfo(XMLConfigurationPtr config, const std::string & profiles_file_, const Settings & global_settings_);
+    PerformanceTestInfo(XMLConfigurationPtr config, const Settings & global_settings_);

    std::string test_name;
    std::string path;
-    std::string main_metric;

    Strings queries;

-    std::string profiles_file;
    Settings settings;
    ExecutionType exec_type;
    StringToVector substitutions;
--- a/dbms/programs/performance-test/PerformanceTestSuite.cpp
+++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp
@ -64,7 +64,6 @@ public:
        const std::string & password_,
        const Settings & cmd_settings,
        const bool lite_output_,
-        const std::string & profiles_file_,
        Strings && input_files_,
        Strings && tests_tags_,
        Strings && skip_tags_,
@ -86,7 +85,6 @@ public:
        , skip_names_regexp(std::move(skip_names_regexp_))
        , query_indexes(query_indexes_)
        , lite_output(lite_output_)
-        , profiles_file(profiles_file_)
        , input_files(input_files_)
        , log(&Poco::Logger::get("PerformanceTestSuite"))
    {
@ -139,7 +137,6 @@ private:
    using XMLConfigurationPtr = Poco::AutoPtr<XMLConfiguration>;

    bool lite_output;
-    std::string profiles_file;

    Strings input_files;
    std::vector<XMLConfigurationPtr> tests_configurations;
@ -197,7 +194,7 @@ private:

    std::pair<std::string, bool> runTest(XMLConfigurationPtr & test_config)
    {
-        PerformanceTestInfo info(test_config, profiles_file, global_context.getSettingsRef());
+        PerformanceTestInfo info(test_config, global_context.getSettingsRef());
        LOG_INFO(log, "Config for test '" << info.test_name << "' parsed");
        PerformanceTest current(test_config, connection, timeouts, interrupt_listener, info, global_context, query_indexes[info.path]);

@ -332,7 +329,6 @@ try
    desc.add_options()
        ("help", "produce help message")
        ("lite", "use lite version of output")
-        ("profiles-file", value<std::string>()->default_value(""), "Specify a file with global profiles")
        ("host,h", value<std::string>()->default_value("localhost"), "")
        ("port", value<UInt16>()->default_value(9000), "")
        ("secure,s", "Use TLS connection")
@ -401,7 +397,6 @@ try
        options["password"].as<std::string>(),
        cmd_settings,
        options.count("lite") > 0,
-        options["profiles-file"].as<std::string>(),
        std::move(input_files),
        std::move(tests_tags),
        std::move(skip_tags),
--- a/dbms/programs/performance-test/ReportBuilder.cpp
+++ b/dbms/programs/performance-test/ReportBuilder.cpp
@ -19,15 +19,10 @@ namespace
 {
 std::string getMainMetric(const PerformanceTestInfo & test_info)
 {
-    std::string main_metric;
-    if (test_info.main_metric.empty())
-        if (test_info.exec_type == ExecutionType::Loop)
-            main_metric = "min_time";
-        else
-            main_metric = "rows_per_second";
+    if (test_info.exec_type == ExecutionType::Loop)
+        return "min_time";
    else
-        main_metric = test_info.main_metric;
-    return main_metric;
+        return "rows_per_second";
 }

 bool isASCIIString(const std::string & str)
@ -64,7 +59,6 @@ std::string ReportBuilder::buildFullReport(
 {
    FormatSettings settings;

-
    JSONString json_output;

    json_output.set("hostname", hostname);
@ -75,7 +69,6 @@ std::string ReportBuilder::buildFullReport(
    json_output.set("time", getCurrentTime());
    json_output.set("test_name", test_info.test_name);
    json_output.set("path", test_info.path);
-    json_output.set("main_metric", getMainMetric(test_info));

    if (!test_info.substitutions.empty())
    {
--- a/dbms/programs/server/HTTPHandler.cpp
+++ b/dbms/programs/server/HTTPHandler.cpp
@ -20,8 +20,6 @@
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
-#include <IO/ZlibInflatingReadBuffer.h>
-#include <IO/BrotliReadBuffer.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/WriteBufferFromHTTPServerResponse.h>
@ -300,32 +298,24 @@ void HTTPHandler::processQuery(

    /// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
    String http_response_compression_methods = request.get("Accept-Encoding", "");
-    bool client_supports_http_compression = false;
-    CompressionMethod http_response_compression_method {};
+    CompressionMethod http_response_compression_method = CompressionMethod::None;

    if (!http_response_compression_methods.empty())
    {
+        /// If client supports brotli - it's preferred.
        /// Both gzip and deflate are supported. If the client supports both, gzip is preferred.
        /// NOTE parsing of the list of methods is slightly incorrect.
-        if (std::string::npos != http_response_compression_methods.find("gzip"))
-        {
-            client_supports_http_compression = true;
-            http_response_compression_method = CompressionMethod::Gzip;
-        }
-        else if (std::string::npos != http_response_compression_methods.find("deflate"))
-        {
-            client_supports_http_compression = true;
-            http_response_compression_method = CompressionMethod::Zlib;
-        }
-#if USE_BROTLI
-        else if (http_response_compression_methods == "br")
-        {
-            client_supports_http_compression = true;
+
+        if (std::string::npos != http_response_compression_methods.find("br"))
            http_response_compression_method = CompressionMethod::Brotli;
-        }
-#endif
+        else if (std::string::npos != http_response_compression_methods.find("gzip"))
+            http_response_compression_method = CompressionMethod::Gzip;
+        else if (std::string::npos != http_response_compression_methods.find("deflate"))
+            http_response_compression_method = CompressionMethod::Zlib;
    }

+    bool client_supports_http_compression = http_response_compression_method != CompressionMethod::None;
+
    /// Client can pass a 'compress' flag in the query string. In this case the query result is
    /// compressed using internal algorithm. This is not reflected in HTTP headers.
    bool internal_compression = params.getParsed<bool>("compress", false);
@ -344,8 +334,8 @@ void HTTPHandler::processQuery(
    unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10);

    used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
-        request, response, keep_alive_timeout,
-        client_supports_http_compression, http_response_compression_method, buffer_size_http);
+        request, response, keep_alive_timeout, client_supports_http_compression, http_response_compression_method);
+
    if (internal_compression)
        used_output.out_maybe_compressed = std::make_shared<CompressedWriteBuffer>(*used_output.out);
    else
@ -400,32 +390,9 @@ void HTTPHandler::processQuery(
    std::unique_ptr<ReadBuffer> in_post_raw = std::make_unique<ReadBufferFromIStream>(istr);

    /// Request body can be compressed using algorithm specified in the Content-Encoding header.
-    std::unique_ptr<ReadBuffer> in_post;
    String http_request_compression_method_str = request.get("Content-Encoding", "");
-    if (!http_request_compression_method_str.empty())
-    {
-        if (http_request_compression_method_str == "gzip")
-        {
-            in_post = std::make_unique<ZlibInflatingReadBuffer>(std::move(in_post_raw), CompressionMethod::Gzip);
-        }
-        else if (http_request_compression_method_str == "deflate")
-        {
-            in_post = std::make_unique<ZlibInflatingReadBuffer>(std::move(in_post_raw), CompressionMethod::Zlib);
-        }
-#if USE_BROTLI
-        else if (http_request_compression_method_str == "br")
-        {
-            in_post = std::make_unique<BrotliReadBuffer>(std::move(in_post_raw));
-        }
-#endif
-        else
-        {
-            throw Exception("Unknown Content-Encoding of HTTP request: " + http_request_compression_method_str,
-                    ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
-        }
-    }
-    else
-        in_post = std::move(in_post_raw);
+    std::unique_ptr<ReadBuffer> in_post = wrapReadBufferWithCompressionMethod(
+        std::make_unique<ReadBufferFromIStream>(istr), chooseCompressionMethod({}, http_request_compression_method_str));

    /// The data can also be compressed using incompatible internal algorithm. This is indicated by
    /// 'decompress' query parameter.
--- a/dbms/programs/server/TCPHandler.cpp
+++ b/dbms/programs/server/TCPHandler.cpp
@ -112,7 +112,7 @@ void TCPHandler::runImpl()
        {
            Exception e("Database " + backQuote(default_database) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE);
            LOG_ERROR(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText()
-                << ", Stack trace:\n\n" << e.getStackTrace().toString());
+                << ", Stack trace:\n\n" << e.getStackTraceString());
            sendException(e, connection_context.getSettingsRef().calculate_text_stack_trace);
            return;
        }
@ -158,7 +158,7 @@ void TCPHandler::runImpl()
        /** An exception during the execution of request (it must be sent over the network to the client).
         *  The client will be able to accept it, if it did not happen while sending another packet and the client has not disconnected yet.
         */
-        std::unique_ptr<Exception> exception;
+        std::optional<DB::Exception> exception;
        bool network_error = false;

        bool send_exception_with_stack_trace = connection_context.getSettingsRef().calculate_text_stack_trace;
@ -280,7 +280,7 @@ void TCPHandler::runImpl()
        catch (const Exception & e)
        {
            state.io.onException();
-            exception.reset(e.clone());
+            exception.emplace(e);

            if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT)
                throw;
@ -298,22 +298,22 @@ void TCPHandler::runImpl()
             *  We will try to send exception to the client in any case - see below.
             */
            state.io.onException();
-            exception = std::make_unique<Exception>(e.displayText(), ErrorCodes::POCO_EXCEPTION);
+            exception.emplace(Exception::CreateFromPoco, e);
        }
        catch (const Poco::Exception & e)
        {
            state.io.onException();
-            exception = std::make_unique<Exception>(e.displayText(), ErrorCodes::POCO_EXCEPTION);
+            exception.emplace(Exception::CreateFromPoco, e);
        }
        catch (const std::exception & e)
        {
            state.io.onException();
-            exception = std::make_unique<Exception>(e.what(), ErrorCodes::STD_EXCEPTION);
+            exception.emplace(Exception::CreateFromSTD, e);
        }
        catch (...)
        {
            state.io.onException();
-            exception = std::make_unique<Exception>("Unknown exception", ErrorCodes::UNKNOWN_EXCEPTION);
+            exception.emplace("Unknown exception", ErrorCodes::UNKNOWN_EXCEPTION);
        }

        try
--- a/dbms/src/Common/ErrorCodes.cpp
+++ b/dbms/src/Common/ErrorCodes.cpp
@ -138,7 +138,6 @@ namespace ErrorCodes
    extern const int FUNCTION_IS_SPECIAL = 129;
    extern const int CANNOT_READ_ARRAY_FROM_TEXT = 130;
    extern const int TOO_LARGE_STRING_SIZE = 131;
-    extern const int CANNOT_CREATE_TABLE_FROM_METADATA = 132;
    extern const int AGGREGATE_FUNCTION_DOESNT_ALLOW_PARAMETERS = 133;
    extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS = 134;
    extern const int ZERO_ARRAY_OR_TUPLE_INDEX = 135;
@ -474,7 +473,6 @@ namespace ErrorCodes
    extern const int NOT_ENOUGH_PRIVILEGES = 497;
    extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED = 498;
    extern const int S3_ERROR = 499;
-    extern const int CANNOT_CREATE_DICTIONARY_FROM_METADATA = 500;
    extern const int CANNOT_CREATE_DATABASE = 501;
    extern const int CANNOT_SIGQUEUE = 502;
    extern const int AGGREGATE_FUNCTION_THROW = 503;
--- a/dbms/src/Common/Exception.cpp
+++ b/dbms/src/Common/Exception.cpp
@ -25,6 +25,55 @@ namespace ErrorCodes
    extern const int NOT_IMPLEMENTED;
 }

+
+Exception::Exception()
+{
+}
+
+Exception::Exception(const std::string & msg, int code)
+    : Poco::Exception(msg, code)
+{
+}
+
+Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
+    : Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION)
+{
+#ifdef STD_EXCEPTION_HAS_STACK_TRACE
+    set_stack_trace(exc.get_stack_trace_frames(), exc.get_stack_trace_size());
+#endif
+}
+
+Exception::Exception(CreateFromSTDTag, const std::exception & exc)
+    : Poco::Exception(String(typeid(exc).name()) + ": " + String(exc.what()), ErrorCodes::STD_EXCEPTION)
+{
+#ifdef STD_EXCEPTION_HAS_STACK_TRACE
+    set_stack_trace(exc.get_stack_trace_frames(), exc.get_stack_trace_size());
+#endif
+}
+
+
+std::string getExceptionStackTraceString(const std::exception & e)
+{
+#ifdef STD_EXCEPTION_HAS_STACK_TRACE
+    return StackTrace::toString(e.get_stack_trace_frames(), 0, e.get_stack_trace_size());
+#else
+    if (const auto * db_exception = dynamic_cast<const Exception *>(&e))
+        return db_exception->getStackTraceString();
+    return {};
+#endif
+}
+
+
+std::string Exception::getStackTraceString() const
+{
+#ifdef STD_EXCEPTION_HAS_STACK_TRACE
+    return StackTrace::toString(get_stack_trace_frames(), 0, get_stack_trace_size());
+#else
+    return trace.toString();
+#endif
+}
+
+
 std::string errnoToString(int code, int e)
 {
    const size_t buf_size = 128;
@ -141,6 +190,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
        {
            stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
                << ", e.displayText() = " << e.displayText()
+                << (with_stacktrace ? getExceptionStackTraceString(e) : "")
                << (with_extra_info ? getExtraExceptionInfo(e) : "")
                << " (version " << VERSION_STRING << VERSION_OFFICIAL;
        }
@ -157,8 +207,9 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
                name += " (demangling status: " + toString(status) + ")";

            stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what()
-                   << (with_extra_info ? getExtraExceptionInfo(e) : "")
-                   << ", version = " << VERSION_STRING << VERSION_OFFICIAL;
+                << (with_stacktrace ? getExceptionStackTraceString(e) : "")
+                << (with_extra_info ? getExtraExceptionInfo(e) : "")
+                << ", version = " << VERSION_STRING << VERSION_OFFICIAL;
        }
        catch (...) {}
    }
@ -261,7 +312,7 @@ std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool
        stream << "Code: " << e.code() << ", e.displayText() = " << text;

        if (with_stacktrace && !has_embedded_stack_trace)
-            stream << ", Stack trace (when copying this message, always include the lines below):\n\n" << e.getStackTrace().toString();
+            stream << ", Stack trace (when copying this message, always include the lines below):\n\n" << e.getStackTraceString();
    }
    catch (...) {}

--- a/dbms/src/Common/Exception.h
+++ b/dbms/src/Common/Exception.h
@ -22,13 +22,14 @@ namespace ErrorCodes
 class Exception : public Poco::Exception
 {
 public:
-    Exception() {}  /// For deferred initialization.
-    Exception(const std::string & msg, int code) : Poco::Exception(msg, code) {}
-    Exception(const std::string & msg, const Exception & nested_exception, int code)
-        : Poco::Exception(msg, nested_exception, code), trace(nested_exception.trace) {}
+    Exception();
+    Exception(const std::string & msg, int code);

    enum CreateFromPocoTag { CreateFromPoco };
-    Exception(CreateFromPocoTag, const Poco::Exception & exc) : Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION) {}
+    enum CreateFromSTDTag { CreateFromSTD };
+
+    Exception(CreateFromPocoTag, const Poco::Exception & exc);
+    Exception(CreateFromSTDTag, const std::exception & exc);

    Exception * clone() const override { return new Exception(*this); }
    void rethrow() const override { throw *this; }
@ -38,15 +39,20 @@ public:
    /// Add something to the existing message.
    void addMessage(const std::string & arg) { extendedMessage(arg); }

-    const StackTrace & getStackTrace() const { return trace; }
+    std::string getStackTraceString() const;

 private:
+#ifndef STD_EXCEPTION_HAS_STACK_TRACE
    StackTrace trace;
+#endif

    const char * className() const throw() override { return "DB::Exception"; }
 };


+std::string getExceptionStackTraceString(const std::exception & e);
+
+
 /// Contains an additional member `saved_errno`. See the throwFromErrno function.
 class ErrnoException : public Exception
 {
--- a/dbms/src/Common/ProfileEvents.cpp
+++ b/dbms/src/Common/ProfileEvents.cpp
@ -37,6 +37,8 @@
    M(CreatedReadBufferOrdinary, "") \
    M(CreatedReadBufferAIO, "") \
    M(CreatedReadBufferAIOFailed, "") \
+    M(CreatedReadBufferMMap, "") \
+    M(CreatedReadBufferMMapFailed, "") \
    M(CreatedWriteBufferOrdinary, "") \
    M(CreatedWriteBufferAIO, "") \
    M(CreatedWriteBufferAIOFailed, "") \
--- a/dbms/src/Common/StackTrace.cpp
+++ b/dbms/src/Common/StackTrace.cpp
@ -4,6 +4,7 @@
 #include <Common/Elf.h>
 #include <Common/SymbolIndex.h>
 #include <Common/config.h>
+#include <Common/MemorySanitizer.h>
 #include <common/SimpleCache.h>
 #include <common/demangle.h>
 #include <Core/Defines.h>
@ -226,6 +227,7 @@ void StackTrace::tryCapture()
    size = 0;
 #if USE_UNWIND
    size = unw_backtrace(frames.data(), capacity);
+    __msan_unpoison(frames.data(), size * sizeof(frames[0]));
 #endif
 }

@ -328,3 +330,15 @@ std::string StackTrace::toString() const
    static SimpleCache<decltype(toStringImpl), &toStringImpl> func_cached;
    return func_cached(frames, offset, size);
 }
+
+std::string StackTrace::toString(void ** frames_, size_t offset, size_t size)
+{
+    __msan_unpoison(frames_, size * sizeof(*frames_));
+
+    StackTrace::Frames frames_copy{};
+    for (size_t i = 0; i < size; ++i)
+        frames_copy[i] = frames_[i];
+
+    static SimpleCache<decltype(toStringImpl), &toStringImpl> func_cached;
+    return func_cached(frames_copy, offset, size);
+}
--- a/dbms/src/Common/StackTrace.h
+++ b/dbms/src/Common/StackTrace.h
@ -41,6 +41,8 @@ public:
    const Frames & getFrames() const;
    std::string toString() const;

+    static std::string toString(void ** frames, size_t offset, size_t size);
+
    void toStringEveryLine(std::function<void(const std::string &)> callback) const;

 protected:
--- a/dbms/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/dbms/src/Compression/CachedCompressedReadBuffer.cpp
@ -19,7 +19,7 @@ void CachedCompressedReadBuffer::initInput()
 {
    if (!file_in)
    {
-        file_in = createReadBufferFromFileBase(path, estimated_size, aio_threshold, buf_size);
+        file_in = createReadBufferFromFileBase(path, estimated_size, aio_threshold, mmap_threshold, buf_size);
        compressed_in = file_in.get();

        if (profile_callback)
@ -73,10 +73,11 @@ bool CachedCompressedReadBuffer::nextImpl()


 CachedCompressedReadBuffer::CachedCompressedReadBuffer(
-    const std::string & path_, UncompressedCache * cache_, size_t estimated_size_, size_t aio_threshold_,
+    const std::string & path_, UncompressedCache * cache_,
+    size_t estimated_size_, size_t aio_threshold_, size_t mmap_threshold_,
    size_t buf_size_)
    : ReadBuffer(nullptr, 0), path(path_), cache(cache_), buf_size(buf_size_), estimated_size(estimated_size_),
-        aio_threshold(aio_threshold_), file_pos(0)
+        aio_threshold(aio_threshold_), mmap_threshold(mmap_threshold_), file_pos(0)
 {
 }

--- a/dbms/src/Compression/CachedCompressedReadBuffer.h
+++ b/dbms/src/Compression/CachedCompressedReadBuffer.h
@ -26,6 +26,7 @@ private:
    size_t buf_size;
    size_t estimated_size;
    size_t aio_threshold;
+    size_t mmap_threshold;

    std::unique_ptr<ReadBufferFromFileBase> file_in;
    size_t file_pos;
@ -42,7 +43,8 @@ private:

 public:
    CachedCompressedReadBuffer(
-        const std::string & path_, UncompressedCache * cache_, size_t estimated_size_, size_t aio_threshold_,
+        const std::string & path_, UncompressedCache * cache_,
+        size_t estimated_size_, size_t aio_threshold_, size_t mmap_threshold_,
        size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE);


--- a/dbms/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/dbms/src/Compression/CompressedReadBufferFromFile.cpp
@ -33,9 +33,9 @@ bool CompressedReadBufferFromFile::nextImpl()


 CompressedReadBufferFromFile::CompressedReadBufferFromFile(
-    const std::string & path, size_t estimated_size, size_t aio_threshold, size_t buf_size)
+    const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, size_t buf_size)
    : BufferWithOwnMemory<ReadBuffer>(0),
-        p_file_in(createReadBufferFromFileBase(path, estimated_size, aio_threshold, buf_size)),
+        p_file_in(createReadBufferFromFileBase(path, estimated_size, aio_threshold, mmap_threshold, buf_size)),
        file_in(*p_file_in)
 {
    compressed_in = &file_in;
--- a/dbms/src/Compression/CompressedReadBufferFromFile.h
+++ b/dbms/src/Compression/CompressedReadBufferFromFile.h
@ -30,7 +30,7 @@ private:

 public:
    CompressedReadBufferFromFile(
-        const std::string & path, size_t estimated_size, size_t aio_threshold, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+        const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);

    void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block);

--- a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
@ -26,7 +26,7 @@ extern const int CANNOT_DECOMPRESS;
 namespace
 {

-Int64 getMaxValueForByteSize(UInt8 byte_size)
+inline Int64 getMaxValueForByteSize(Int8 byte_size)
 {
    switch (byte_size)
    {
@ -51,11 +51,56 @@ struct WriteSpec
    const UInt8 data_bits;
 };

-const std::array<UInt8, 5> DELTA_SIZES{7, 9, 12, 32, 64};
+// delta size prefix and data lengths based on few high bits peeked from binary stream
+static const WriteSpec WRITE_SPEC_LUT[32] = {
+    // 0b0 - 1-bit prefix, no data to read
+    /* 00000 */ {1, 0b0, 0},
+    /* 00001 */ {1, 0b0, 0},
+    /* 00010 */ {1, 0b0, 0},
+    /* 00011 */ {1, 0b0, 0},
+    /* 00100 */ {1, 0b0, 0},
+    /* 00101 */ {1, 0b0, 0},
+    /* 00110 */ {1, 0b0, 0},
+    /* 00111 */ {1, 0b0, 0},
+    /* 01000 */ {1, 0b0, 0},
+    /* 01001 */ {1, 0b0, 0},
+    /* 01010 */ {1, 0b0, 0},
+    /* 01011 */ {1, 0b0, 0},
+    /* 01100 */ {1, 0b0, 0},
+    /* 01101 */ {1, 0b0, 0},
+    /* 01110 */ {1, 0b0, 0},
+    /* 01111 */ {1, 0b0, 0},
+
+    // 0b10 - 2 bit prefix, 7 bits of data
+    /* 10000 */ {2, 0b10, 7},
+    /* 10001 */ {2, 0b10, 7},
+    /* 10010 */ {2, 0b10, 7},
+    /* 10011 */ {2, 0b10, 7},
+    /* 10100 */ {2, 0b10, 7},
+    /* 10101 */ {2, 0b10, 7},
+    /* 10110 */ {2, 0b10, 7},
+    /* 10111 */ {2, 0b10, 7},
+
+    // 0b110 - 3 bit prefix, 9 bits of data
+    /* 11000 */ {3, 0b110, 9},
+    /* 11001 */ {3, 0b110, 9},
+    /* 11010 */ {3, 0b110, 9},
+    /* 11011 */ {3, 0b110, 9},
+
+    // 0b1110 - 4 bit prefix, 12 bits of data
+    /* 11100 */ {4, 0b1110, 12},
+    /* 11101 */ {4, 0b1110, 12},
+
+    // 5-bit prefixes
+    /* 11110 */ {5, 0b11110, 32},
+    /* 11111 */ {5, 0b11111, 64},
+};
+

 template <typename T>
 WriteSpec getDeltaWriteSpec(const T & value)
 {
+    // TODO: to speed up things a bit by counting number of leading zeroes instead of doing lots of comparisons
    if (value > -63 && value < 64)
    {
        return WriteSpec{2, 0b10, 7};
@ -107,14 +152,15 @@ UInt32 getCompressedDataSize(UInt8 data_bytes_size, UInt32 uncompressed_size)
 template <typename ValueType>
 UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 {
-    // Since only unsinged int has granted 2-compliment overflow handling, we are doing math here on unsigned types.
-    // To simplify and booletproof code, we operate enforce ValueType to be unsigned too.
+    // Since only unsinged int has granted 2-complement overflow handling,
+    // we are doing math here only on unsigned types.
+    // To simplify and booletproof code, we enforce ValueType to be unsigned too.
    static_assert(is_unsigned_v<ValueType>, "ValueType must be unsigned.");
    using UnsignedDeltaType = ValueType;

    // We use signed delta type to turn huge unsigned values into smaller signed:
    // ffffffff => -1
-    using SignedDeltaType = typename std::make_signed<UnsignedDeltaType>::type;
+    using SignedDeltaType = typename std::make_signed_t<UnsignedDeltaType>;

    if (source_size % sizeof(ValueType) != 0)
        throw Exception("Cannot compress, data size " + toString(source_size)
@ -149,8 +195,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
        prev_value = curr_value;
    }

-    WriteBuffer buffer(dest, getCompressedDataSize(sizeof(ValueType), source_size - sizeof(ValueType)*2));
-    BitWriter writer(buffer);
+    BitWriter writer(dest, getCompressedDataSize(sizeof(ValueType), source_size - sizeof(ValueType)*2));

    int item = 2;
    for (; source < source_end; source += sizeof(ValueType), ++item)
@ -170,7 +215,8 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
        else
        {
            const SignedDeltaType signed_dd = static_cast<SignedDeltaType>(double_delta);
-            const auto sign = std::signbit(signed_dd);
+            const auto sign = signed_dd < 0;
+
            // -1 shirnks dd down to fit into number of bits, and there can't be 0, so it is OK.
            const auto abs_value = static_cast<UnsignedDeltaType>(std::abs(signed_dd) - 1);
            const auto write_spec = getDeltaWriteSpec(signed_dd);
@ -183,7 +229,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)

    writer.flush();

-    return sizeof(items_count) + sizeof(prev_value) + sizeof(prev_delta) + buffer.count();
+    return sizeof(items_count) + sizeof(prev_value) + sizeof(prev_delta) + writer.count() / 8;
 }

 template <typename ValueType>
@ -220,35 +266,28 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
        dest += sizeof(prev_value);
    }

-    ReadBufferFromMemory buffer(source, source_size - sizeof(prev_value) - sizeof(prev_delta) - sizeof(items_count));
-    BitReader reader(buffer);
+    BitReader reader(source, source_size - sizeof(prev_value) - sizeof(prev_delta) - sizeof(items_count));

    // since data is tightly packed, up to 1 bit per value, and last byte is padded with zeroes,
    // we have to keep track of items to avoid reading more that there is.
    for (UInt32 items_read = 2; items_read < items_count && !reader.eof(); ++items_read)
    {
        UnsignedDeltaType double_delta = 0;
-        if (reader.readBit() == 1)
-        {
-            UInt8 i = 0;
-            for (; i < sizeof(DELTA_SIZES) - 1; ++i)
-            {
-                const auto next_bit = reader.readBit();
-                if (next_bit == 0)
-                {
-                    break;
-                }
-            }

+        static_assert(sizeof(WRITE_SPEC_LUT)/sizeof(WRITE_SPEC_LUT[0]) == 32); // 5-bit prefix lookup table
+        const auto write_spec = WRITE_SPEC_LUT[reader.peekByte() >> (8 - 5)]; // only 5 high bits of peeked byte value
+
+        reader.skipBufferedBits(write_spec.prefix_bits); // discard the prefix value, since we've already used it
+        if (write_spec.data_bits != 0)
+        {
            const UInt8 sign = reader.readBit();
-            SignedDeltaType signed_dd = static_cast<SignedDeltaType>(reader.readBits(DELTA_SIZES[i] - 1) + 1);
+            SignedDeltaType signed_dd = static_cast<SignedDeltaType>(reader.readBits(write_spec.data_bits - 1) + 1);
            if (sign)
            {
                signed_dd *= -1;
            }
            double_delta = static_cast<UnsignedDeltaType>(signed_dd);
        }
-        // else if first bit is zero, no need to read more data.

        const UnsignedDeltaType delta = double_delta + prev_delta;
        const ValueType curr_value = prev_value + delta;
--- a/dbms/src/Compression/CompressionCodecDoubleDelta.h
+++ b/dbms/src/Compression/CompressionCodecDoubleDelta.h
@ -5,6 +5,92 @@
 namespace DB
 {

+/** DoubleDelta column codec implementation.
+ *
+ * Based on Gorilla paper: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf, which was extended
+ * to support 64bit types. The drawback is 1 extra bit for 32-byte wide deltas: 5-bit prefix
+ * instead of 4-bit prefix.
+ *
+ * This codec is best used against monotonic integer sequences with constant (or almost contant)
+ * stride, like event timestamp for some monitoring application.
+ *
+ * Given input sequence a: [a0, a1, ... an]:
+ *
+ * First, write number of items (sizeof(int32)*8 bits):                n
+ * Then write first item as is (sizeof(a[0])*8 bits):                  a[0]
+ * Second item is written as delta (sizeof(a[0])*8 bits):              a[1] - a[0]
+ * Loop over remaining items and calculate double delta:
+ *   double_delta = a[i] - 2 * a[i - 1] + a[i - 2]
+ *   Write it in compact binary form with `BitWriter`
+ *   if double_delta == 0:
+ *      write 1bit:                                                    0
+ *   else if -63 < double_delta < 64:
+ *      write 2 bit prefix:                                            10
+ *      write sign bit (1 if signed):                                  x
+ *      write 7-1 bits of abs(double_delta - 1):                       xxxxxx
+ *   else if -255 < double_delta < 256:
+ *      write 3 bit prefix:                                            110
+ *      write sign bit (1 if signed):                                  x
+ *      write 9-1 bits of abs(double_delta - 1):                       xxxxxxxx
+ *   else if -2047 < double_delta < 2048:
+ *      write 4 bit prefix:                                            1110
+ *      write sign bit (1 if signed):                                  x
+ *      write 12-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx
+ *   else if double_delta fits into 32-bit int:
+ *      write 5 bit prefix:                                            11110
+ *      write sign bit (1 if signed):                                  x
+ *      write 32-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx...
+ *   else
+ *      write 5 bit prefix:                                            11111
+ *      write sign bit (1 if signed):                                  x
+ *      write 64-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx...
+ *
+ * @example sequence of UInt8 values [1, 2, 3, 4, 5, 6, 7, 8, 9 10] is encoded as (codec header is ommited):
+ *
+ * .- 4-byte little-endian sequence length (10 == 0xa)
+ * |               .- 1 byte (sizeof(UInt8) a[0]                                            : 0x01
+ * |               |   .- 1 byte of delta: a[1] - a[0] = 2 - 1 = 1                          : 0x01
+ * |               |   |   .- 8 zero bits since double delta for remaining 8 elements was 0 : 0x00
+ * v_______________v___v___v___
+ * \x0a\x00\x00\x00\x01\x01\x00
+ *
+ * @example sequence of Int16 values [-10, 10, -20, 20, -40, 40] is encoded as:
+ *
+ * .- 4-byte little endian sequence length = 6                                 : 0x00000006
+ * |                .- 2 bytes (sizeof(Int16) a[0] as UInt16 = -10             : 0xfff6
+ * |                |       .- 2 bytes of delta: a[1] - a[0] = 10 - (-10) = 20 : 0x0014
+ * |                |       |       .- 4 encoded double deltas (see below)
+ * v_______________ v______ v______ v______________________
+ * \x06\x00\x00\x00\xf6\xff\x14\x00\xb8\xe2\x2e\xb1\xe4\x58
+ *
+ * 4 binary encoded double deltas (\xb8\xe2\x2e\xb1\xe4\x58):
+ * double_delta (DD) = -20 - 2 * 10 + (-10) = -50
+ * .- 2-bit prefix                                                         : 0b10
+ * | .- sign-bit                                                           : 0b1
+ * | |.- abs(DD - 1) = 49                                                  : 0b110001
+ * | ||
+ * | ||      DD = 20 - 2 * (-20) + 10 = 70
+ * | ||      .- 3-bit prefix                                               : 0b110
+ * | ||      |  .- sign bit                                                : 0b0
+ * | ||      |  |.- abs(DD - 1) = 69                                       : 0b1000101
+ * | ||      |  ||
+ * | ||      |  ||        DD = -40 - 2 * 20 + (-20) = -100
+ * | ||      |  ||        .- 3-bit prefix                                  : 0b110
+ * | ||      |  ||        |    .- sign-bit                                 : 0b0
+ * | ||      |  ||        |    |.- abs(DD - 1) = 99                        : 0b1100011
+ * | ||      |  ||        |    ||
+ * | ||      |  ||        |    ||       DD = 40 - 2 * (-40) + 20 = 140
+ * | ||      |  ||        |    ||       .- 3-bit prefix                    : 0b110
+ * | ||      |  ||        |    ||       |  .- sign bit                     : 0b0
+ * | ||      |  ||        |    ||       |  |.- abs(DD - 1) = 139           : 0b10001011
+ * | ||      |  ||        |    ||       |  ||
+ * V_vv______V__vv________V____vv_______V__vv________,- padding bits
+ * 10111000 11100010 00101110 10110001 11100100 01011000
+ *
+ * Please also see unit tests for:
+ *   * Examples on what output `BitWriter` produces on predefined input.
+ *   * Compatibility tests solidifying encoded binary output on set of predefined sequences.
+ */
 class CompressionCodecDoubleDelta : public ICompressionCodec
 {
 public:
--- a/dbms/src/Compression/CompressionCodecGorilla.cpp
+++ b/dbms/src/Compression/CompressionCodecGorilla.cpp
@ -112,8 +112,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest,
        dest += sizeof(prev_value);
    }

-    WriteBuffer buffer(dest, dest_end - dest);
-    BitWriter writer(buffer);
+    BitWriter writer(dest, dest_end - dest);

    while (source < source_end)
    {
@ -148,7 +147,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest,

    writer.flush();

-    return sizeof(items_count) + sizeof(prev_value) + buffer.count();
+    return sizeof(items_count) + sizeof(prev_value) + writer.count() / 8;
 }

 template <typename T>
@ -174,8 +173,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
        dest += sizeof(prev_value);
    }

-    ReadBufferFromMemory buffer(source, source_size - sizeof(items_count) - sizeof(prev_value));
-    BitReader reader(buffer);
+    BitReader reader(source, source_size - sizeof(items_count) - sizeof(prev_value));

    binary_value_info prev_xored_info{0, 0, 0};

--- a/dbms/src/Compression/CompressionCodecGorilla.h
+++ b/dbms/src/Compression/CompressionCodecGorilla.h
@ -5,6 +5,89 @@
 namespace DB
 {

+/** Gorilla column codec implementation.
+ *
+ * Based on Gorilla paper: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf
+ *
+ * This codec is best used against monotonic floating sequences, like CPU usage percentage
+ * or any other gauge.
+ *
+ * Given input sequence a: [a0, a1, ... an]
+ *
+ * First, write number of items (sizeof(int32)*8 bits):                n
+ * Then write first item as is (sizeof(a[0])*8 bits):                  a[0]
+ * Loop over remaining items and calculate xor_diff:
+ *   xor_diff = a[i] ^ a[i - 1] (e.g. 00000011'10110100)
+ *   Write it in compact binary form with `BitWriter`
+ *   if xor_diff == 0:
+ *       write 1 bit:                                                  0
+ *   else:
+ *       calculate leading zero bits (lzb)
+ *       and trailing zero bits (tzb) of xor_diff,
+ *       compare to lzb and tzb of previous xor_diff
+ *       (X = sizeof(a[i]) * 8, e.g. X = 16, lzb = 6, tzb = 2)
+ *       if lzb >= prev_lzb && tzb >= prev_tzb:
+ *           (e.g. prev_lzb=4, prev_tzb=1)
+ *           write 2 bit prefix:                                       0b10
+ *           write xor_diff >> prev_tzb (X - prev_lzb - prev_tzb bits):0b00111011010
+ *           (where X = sizeof(a[i]) * 8, e.g. 16)
+ *       else:
+ *           write 2 bit prefix:                                       0b11
+ *           write 5 bits of lzb:                                      0b00110
+ *           write 6 bits of (X - lzb - tzb)=(16-6-2)=8:               0b001000
+ *           write (X - lzb - tzb) non-zero bits of xor_diff:          0b11101101
+ *           prev_lzb = lzb
+ *           prev_tzb = tzb
+ *
+ * @example sequence of Float32 values [0.1, 0.1, 0.11, 0.2, 0.1] is encoded as:
+ *
+ * .- 4-byte little endian sequence length: 5                                 : 0x00000005
+ * |                .- 4 byte (sizeof(Float32) a[0] as UInt32 : -10           : 0xcdcccc3d
+ * |                |               .- 4 encoded xor diffs (see below)
+ * v_______________ v______________ v__________________________________________________
+ * \x05\x00\x00\x00\xcd\xcc\xcc\x3d\x6a\x5a\xd8\xb6\x3c\xcd\x75\xb1\x6c\x77\x00\x00\x00
+ *
+ * 4 binary encoded xor diffs (\x6a\x5a\xd8\xb6\x3c\xcd\x75\xb1\x6c\x77\x00\x00\x00):
+ *
+ * ...........................................
+ * a[i-1]   = 00111101110011001100110011001101
+ * a[i]     = 00111101110011001100110011001101
+ * xor_diff = 00000000000000000000000000000000
+ * .- 1-bit prefix                                                           : 0b0
+ * |
+ * | ...........................................
+ * | a[i-1]   = 00111101110011001100110011001101
+ * ! a[i]     = 00111101111000010100011110101110
+ * | xor_diff = 00000000001011011000101101100011
+ * | lzb = 10
+ * | tzb = 0
+ * |.- 2-bit prefix                                                          : 0b11
+ * || .- lzb (10)                                                            : 0b1010
+ * || |     .- data length (32-10-0): 22                                     : 0b010110
+ * || |     |     .- data                                                    : 0b1011011000101101100011
+ * || |     |     |
+ * || |     |     |                        ...........................................
+ * || |     |     |                        a[i-1]   = 00111101111000010100011110101110
+ * || |     |     |                        a[i]     = 00111110010011001100110011001101
+ * || |     |     |                        xor_diff = 00000011101011011000101101100011
+ * || |     |     |                        .- 2-bit prefix                            : 0b11
+ * || |     |     |                        | .- lzb = 6                               : 0b00110
+ * || |     |     |                        | |     .- data length = (32 - 6) = 26     : 0b011010
+ * || |     |     |                        | |     |      .- data                     : 0b11101011011000101101100011
+ * || |     |     |                        | |     |      |
+ * || |     |     |                        | |     |      |                            ...........................................
+ * || |     |     |                        | |     |      |                            a[i-1]   = 00111110010011001100110011001101
+ * || |     |     |                        | |     |      |                            a[i]     = 00111101110011001100110011001101
+ * || |     |     |                        | |     |      |                            xor_diff = 00000011100000000000000000000000
+ * || |     |     |                        | |     |      |                            .- 2-bit prefix                            : 0b10
+ * || |     |     |                        | |     |      |                            | .- data                                  : 0b11100000000000000000000000
+ * VV_v____ v_____v________________________V_v_____v______v____________________________V_v_____________________________
+ * 01101010 01011010 11011000 10110110 00111100 11001101 01110101 10110001 01101100 01110111 00000000 00000000 00000000
+ *
+ * Please also see unit tests for:
+ *   * Examples on what output `BitWriter` produces on predefined input.
+ *   * Compatibility tests solidifying encoded binary output on set of predefined sequences.
+ */
 class CompressionCodecGorilla : public ICompressionCodec
 {
 public:
--- a/dbms/src/Compression/tests/cached_compressed_read_buffer.cpp
+++ b/dbms/src/Compression/tests/cached_compressed_read_buffer.cpp
@ -32,7 +32,7 @@ int main(int argc, char ** argv)

        {
            Stopwatch watch;
-            CachedCompressedReadBuffer in(path, &cache, 0, 0);
+            CachedCompressedReadBuffer in(path, &cache, 0, 0, 0);
            WriteBufferFromFile out("/dev/null");
            copyData(in, out);

@ -44,7 +44,7 @@ int main(int argc, char ** argv)

        {
            Stopwatch watch;
-            CachedCompressedReadBuffer in(path, &cache, 0, 0);
+            CachedCompressedReadBuffer in(path, &cache, 0, 0, 0);
            WriteBufferFromFile out("/dev/null");
            copyData(in, out);

--- a/dbms/src/Compression/tests/gtest_compressionCodec.cpp
+++ b/dbms/src/Compression/tests/gtest_compressionCodec.cpp
@ -1,6 +1,7 @@
 #include <Compression/CompressionFactory.h>

 #include <Common/PODArray.h>
+#include <Common/Stopwatch.h>
 #include <Core/Types.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/IDataType.h>
@ -62,6 +63,32 @@ std::vector<T> operator+(std::vector<T> && left, std::vector<T> && right)
 namespace
 {

+template <typename T>
+struct AsHexStringHelper
+{
+    const T & container;
+};
+
+template <typename T>
+std::ostream & operator << (std::ostream & ostr, const AsHexStringHelper<T> & helper)
+{
+    ostr << std::hex;
+    for (const auto & e : helper.container)
+    {
+        ostr << "\\x" << std::setw(2) << std::setfill('0') << (static_cast<unsigned int>(e) & 0xFF);
+    }
+
+    return ostr;
+}
+
+template <typename T>
+AsHexStringHelper<T> AsHexString(const T & container)
+{
+    static_assert (sizeof(container[0]) == 1 && std::is_pod<std::decay_t<decltype(container[0])>>::value, "Only works on containers of byte-size PODs.");
+
+    return AsHexStringHelper<T>{container};
+}
+
 template <typename T>
 std::string bin(const T & value, size_t bits = sizeof(T)*8)
 {
@ -113,10 +140,71 @@ DataTypePtr makeDataType()

 #undef MAKE_DATA_TYPE

-    assert(false && "unsupported size");
+    assert(false && "unknown datatype");
    return nullptr;
 }

+template <typename T, typename Container>
+class BinaryDataAsSequenceOfValuesIterator
+{
+    const Container & container;
+    const void * data;
+    const void * data_end;
+
+    T current_value;
+
+public:
+    using Self = BinaryDataAsSequenceOfValuesIterator<T, Container>;
+
+    explicit BinaryDataAsSequenceOfValuesIterator(const Container & container_)
+        : container(container_),
+          data(&container[0]),
+          data_end(reinterpret_cast<const char *>(data) + container.size()),
+          current_value(T{})
+    {
+        static_assert(sizeof(container[0]) == 1 && std::is_pod<std::decay_t<decltype(container[0])>>::value, "Only works on containers of byte-size PODs.");
+        read();
+    }
+
+    const T & operator*() const
+    {
+        return current_value;
+    }
+
+    size_t ItemsLeft() const
+    {
+        return reinterpret_cast<const char *>(data_end) - reinterpret_cast<const char *>(data);
+    }
+
+    Self & operator++()
+    {
+        read();
+        return *this;
+    }
+
+    operator bool() const
+    {
+        return ItemsLeft() > 0;
+    }
+
+private:
+    void read()
+    {
+        if (!*this)
+        {
+            throw std::runtime_error("No more data to read");
+        }
+
+        current_value = unalignedLoad<T>(data);
+        data = reinterpret_cast<const char *>(data) + sizeof(T);
+    }
+};
+
+template <typename T, typename Container>
+BinaryDataAsSequenceOfValuesIterator<T, Container> AsSequenceOf(const Container & container)
+{
+    return BinaryDataAsSequenceOfValuesIterator<T, Container>(container);
+}

 template <typename T, typename ContainerLeft, typename ContainerRight>
 ::testing::AssertionResult EqualByteContainersAs(const ContainerLeft & left, const ContainerRight & right)
@ -126,9 +214,6 @@ template <typename T, typename ContainerLeft, typename ContainerRight>

    ::testing::AssertionResult result = ::testing::AssertionSuccess();

-    ReadBufferFromMemory left_read_buffer(left.data(), left.size());
-    ReadBufferFromMemory right_read_buffer(right.data(), right.size());
-
    const auto l_size = left.size() / sizeof(T);
    const auto r_size = right.size() / sizeof(T);
    const auto size = std::min(l_size, r_size);
@ -137,16 +222,25 @@ template <typename T, typename ContainerLeft, typename ContainerRight>
    {
        result = ::testing::AssertionFailure() << "size mismatch" << " expected: " << l_size << " got:" << r_size;
    }
+    if (l_size == 0 || r_size == 0)
+    {
+        return result;
+    }
+
+    auto l = AsSequenceOf<T>(left);
+    auto r = AsSequenceOf<T>(right);

    const auto MAX_MISMATCHING_ITEMS = 5;
    int mismatching_items = 0;
-    for (int i = 0; i < size; ++i)
-    {
-        T left_value{};
-        left_read_buffer.readStrict(reinterpret_cast<char*>(&left_value), sizeof(left_value));
+    size_t i = 0;

-        T right_value{};
-        right_read_buffer.readStrict(reinterpret_cast<char*>(&right_value), sizeof(right_value));
+    while (l && r)
+    {
+        const auto left_value = *l;
+        const auto right_value = *r;
+        ++l;
+        ++r;
+        ++i;

        if (left_value != right_value)
        {
@ -157,25 +251,47 @@ template <typename T, typename ContainerLeft, typename ContainerRight>

            if (++mismatching_items <= MAX_MISMATCHING_ITEMS)
            {
-                result << "mismatching " << sizeof(T) << "-byte item #" << i
+                result << "\nmismatching " << sizeof(T) << "-byte item #" << i
                   << "\nexpected: " << bin(left_value) << " (0x" << std::hex << left_value << ")"
-                   << "\ngot     : " << bin(right_value) << " (0x" << std::hex << right_value << ")"
-                   << std::endl;
+                   << "\ngot     : " << bin(right_value) << " (0x" << std::hex << right_value << ")";
                if (mismatching_items == MAX_MISMATCHING_ITEMS)
                {
-                    result << "..." << std::endl;
+                    result << "\n..." << std::endl;
                }
            }
        }
    }
    if (mismatching_items > 0)
    {
-        result << "\ntotal mismatching items:" << mismatching_items << " of " << size;
+        result << "total mismatching items:" << mismatching_items << " of " << size;
    }

    return result;
 }

+template <typename ContainerLeft, typename ContainerRight>
+::testing::AssertionResult EqualByteContainers(UInt8 element_size, const ContainerLeft & left, const ContainerRight & right)
+{
+    switch (element_size)
+    {
+        case 1:
+            return EqualByteContainersAs<UInt8>(left, right);
+            break;
+        case 2:
+            return EqualByteContainersAs<UInt16>(left, right);
+            break;
+        case 4:
+            return EqualByteContainersAs<UInt32>(left, right);
+            break;
+        case 8:
+            return EqualByteContainersAs<UInt64>(left, right);
+            break;
+        default:
+            assert(false && "Invalid element_size");
+            return ::testing::AssertionFailure() << "Invalid element_size: " << element_size;
+    }
+}
+
 struct Codec
 {
    std::string codec_statement;
@ -214,20 +330,23 @@ struct CodecTestSequence
    CodecTestSequence & operator=(const CodecTestSequence &) = default;
    CodecTestSequence(CodecTestSequence &&) = default;
    CodecTestSequence & operator=(CodecTestSequence &&) = default;
+
+    CodecTestSequence & append(const CodecTestSequence & other)
+    {
+        assert(data_type->equals(*other.data_type));
+
+        serialized_data.insert(serialized_data.end(), other.serialized_data.begin(), other.serialized_data.end());
+        if (!name.empty())
+            name += " + ";
+        name += other.name;
+
+        return *this;
+    }
 };

-CodecTestSequence operator+(CodecTestSequence && left, CodecTestSequence && right)
+CodecTestSequence operator+(CodecTestSequence && left, const CodecTestSequence & right)
 {
-    assert(left.data_type->equals(*right.data_type));
-
-    std::vector<char> data(std::move(left.serialized_data));
-    data.insert(data.end(), right.serialized_data.begin(), right.serialized_data.end());
-
-    return CodecTestSequence{
-        left.name + " + " + right.name,
-        std::move(data),
-        std::move(left.data_type)
-    };
+    return left.append(right);
 }

 template <typename T>
@ -288,17 +407,22 @@ CodecTestSequence makeSeq(Args && ... args)
    };
 }

-template <typename T, typename Generator>
-CodecTestSequence generateSeq(Generator gen, const char* gen_name, size_t Begin = 0, size_t End = 10000)
+template <typename T, typename Generator, typename B = int, typename E = int>
+CodecTestSequence generateSeq(Generator gen, const char* gen_name, B Begin = 0, E End = 10000)
 {
-    assert (End >= Begin);
-
+    const auto direction = std::signbit(End - Begin) ? -1 : 1;
    std::vector<char> data(sizeof(T) * (End - Begin));
    char * write_pos = data.data();

-    for (size_t i = Begin; i < End; ++i)
+    for (auto i = Begin; i < End; i += direction)
    {
        const T v = gen(static_cast<T>(i));
+
+//        if constexpr (debug_log_items)
+//        {
+//            std::cerr << "#" << i << " " << type_name<T>() << "(" << sizeof(T) << " bytes) : " << v << std::endl;
+//        }
+
        unalignedStore<T>(write_pos, v);
        write_pos += sizeof(v);
    }
@ -310,6 +434,96 @@ CodecTestSequence generateSeq(Generator gen, const char* gen_name, size_t Begin
    };
 }

+struct NoOpTimer
+{
+    void start() {}
+    void report(const char*) {}
+};
+
+struct StopwatchTimer
+{
+    explicit StopwatchTimer(clockid_t clock_type, size_t estimated_marks = 32)
+        : stopwatch(clock_type)
+    {
+        results.reserve(estimated_marks);
+    }
+
+    void start()
+    {
+        stopwatch.restart();
+    }
+
+    void report(const char * mark)
+    {
+        results.emplace_back(mark, stopwatch.elapsed());
+    }
+
+    void stop()
+    {
+        stopwatch.stop();
+    }
+
+    const std::vector<std::tuple<const char*, UInt64>> & getResults() const
+    {
+        return results;
+    }
+
+private:
+    Stopwatch stopwatch;
+    std::vector<std::tuple<const char*, UInt64>> results;
+};
+
+CompressionCodecPtr makeCodec(const std::string & codec_string, const DataTypePtr data_type)
+{
+    const std::string codec_statement = "(" + codec_string + ")";
+    Tokens tokens(codec_statement.begin().base(), codec_statement.end().base());
+    IParser::Pos token_iterator(tokens);
+
+    Expected expected;
+    ASTPtr codec_ast;
+    ParserCodec parser;
+
+    parser.parse(token_iterator, codec_ast, expected);
+
+    return CompressionCodecFactory::instance().get(codec_ast, data_type);
+}
+
+template <typename Timer>
+void testTranscoding(Timer & timer, ICompressionCodec & codec, const CodecTestSequence & test_sequence, std::optional<double> expected_compression_ratio = std::optional<double>{})
+{
+    const auto & source_data = test_sequence.serialized_data;
+
+    const UInt32 encoded_max_size = codec.getCompressedReserveSize(source_data.size());
+    PODArray<char> encoded(encoded_max_size);
+
+    timer.start();
+
+    const UInt32 encoded_size = codec.compress(source_data.data(), source_data.size(), encoded.data());
+    timer.report("encoding");
+
+    encoded.resize(encoded_size);
+
+    PODArray<char> decoded(source_data.size());
+
+    timer.start();
+    const UInt32 decoded_size = codec.decompress(encoded.data(), encoded.size(), decoded.data());
+    timer.report("decoding");
+
+    decoded.resize(decoded_size);
+
+    ASSERT_TRUE(EqualByteContainers(test_sequence.data_type->getSizeOfValueInMemory(), source_data, decoded));
+
+    const auto header_size = codec.getHeaderSize();
+    const auto compression_ratio = (encoded_size - header_size) / (source_data.size() * 1.0);
+
+    if (expected_compression_ratio)
+    {
+        ASSERT_LE(compression_ratio, *expected_compression_ratio)
+                << "\n\tdecoded size: " << source_data.size()
+                << "\n\tencoded size: " << encoded_size
+                << "(no header: " << encoded_size - header_size << ")";
+    }
+}

 class CodecTest : public ::testing::TestWithParam<std::tuple<Codec, CodecTestSequence>>
 {
@ -320,67 +534,18 @@ public:
        CODEC_WITHOUT_DATA_TYPE,
    };

-    CompressionCodecPtr makeCodec(MakeCodecParam with_data_type) const
+    CompressionCodecPtr makeCodec(MakeCodecParam with_data_type)
    {
        const auto & codec_string = std::get<0>(GetParam()).codec_statement;
        const auto & data_type = with_data_type == CODEC_WITH_DATA_TYPE ? std::get<1>(GetParam()).data_type : nullptr;

-        const std::string codec_statement = "(" + codec_string + ")";
-        Tokens tokens(codec_statement.begin().base(), codec_statement.end().base());
-        IParser::Pos token_iterator(tokens);
-
-        Expected expected;
-        ASTPtr codec_ast;
-        ParserCodec parser;
-
-        parser.parse(token_iterator, codec_ast, expected);
-
-        return CompressionCodecFactory::instance().get(codec_ast, data_type);
+        return ::makeCodec(codec_string, data_type);
    }

    void testTranscoding(ICompressionCodec & codec)
    {
-        const auto & test_sequence = std::get<1>(GetParam());
-        const auto & source_data = test_sequence.serialized_data;
-
-        const UInt32 encoded_max_size = codec.getCompressedReserveSize(source_data.size());
-        PODArray<char> encoded(encoded_max_size);
-
-        const UInt32 encoded_size = codec.compress(source_data.data(), source_data.size(), encoded.data());
-        encoded.resize(encoded_size);
-
-        PODArray<char> decoded(source_data.size());
-        const UInt32 decoded_size = codec.decompress(encoded.data(), encoded.size(), decoded.data());
-        decoded.resize(decoded_size);
-
-        switch (test_sequence.data_type->getSizeOfValueInMemory())
-        {
-            case 1:
-                ASSERT_TRUE(EqualByteContainersAs<UInt8>(source_data, decoded));
-                break;
-            case 2:
-                ASSERT_TRUE(EqualByteContainersAs<UInt16>(source_data, decoded));
-                break;
-            case 4:
-                ASSERT_TRUE(EqualByteContainersAs<UInt32>(source_data, decoded));
-                break;
-            case 8:
-                ASSERT_TRUE(EqualByteContainersAs<UInt64>(source_data, decoded));
-                break;
-            default:
-                FAIL() << "Invalid test sequence data type: " << test_sequence.data_type->getName();
-        }
-        const auto header_size = codec.getHeaderSize();
-        const auto compression_ratio = (encoded_size - header_size) / (source_data.size() * 1.0);
-
-        const auto & codec_spec = std::get<0>(GetParam());
-        if (codec_spec.expected_compression_ratio)
-        {
-            ASSERT_LE(compression_ratio, *codec_spec.expected_compression_ratio)
-                    << "\n\tdecoded size: " << source_data.size()
-                    << "\n\tencoded size: " << encoded_size
-                    << "(no header: " << encoded_size - header_size << ")";
-        }
+        NoOpTimer timer;
+        ::testTranscoding(timer, codec, std::get<1>(GetParam()), std::get<0>(GetParam()).expected_compression_ratio);
    }
 };

@ -396,10 +561,121 @@ TEST_P(CodecTest, TranscodingWithoutDataType)
    testTranscoding(*codec);
 }

+// Param is tuple-of-tuple to simplify instantiating with values, since typically group of cases test only one codec.
+class CodecTest_Compatibility : public ::testing::TestWithParam<std::tuple<Codec, std::tuple<CodecTestSequence, std::string>>>
+{};
+
+// Check that iput sequence when encoded matches the encoded string binary.
+TEST_P(CodecTest_Compatibility, Encoding)
+{
+    const auto & codec_spec = std::get<0>(GetParam());
+    const auto & [data_sequence, expected] = std::get<1>(GetParam());
+    const auto codec = makeCodec(codec_spec.codec_statement, data_sequence.data_type);
+
+    const auto & source_data = data_sequence.serialized_data;
+
+    // Just encode the data with codec
+    const UInt32 encoded_max_size = codec->getCompressedReserveSize(source_data.size());
+    PODArray<char> encoded(encoded_max_size);
+
+    const UInt32 encoded_size = codec->compress(source_data.data(), source_data.size(), encoded.data());
+    encoded.resize(encoded_size);
+    SCOPED_TRACE(::testing::Message("encoded:  ") << AsHexString(encoded));
+
+    ASSERT_TRUE(EqualByteContainersAs<UInt8>(expected, encoded));
+}
+
+// Check that binary string is exactly decoded into input sequence.
+TEST_P(CodecTest_Compatibility, Decoding)
+{
+    const auto & codec_spec = std::get<0>(GetParam());
+    const auto & [expected, encoded_data] = std::get<1>(GetParam());
+    const auto codec = makeCodec(codec_spec.codec_statement, expected.data_type);
+
+    PODArray<char> decoded(expected.serialized_data.size());
+    const UInt32 decoded_size = codec->decompress(encoded_data.c_str(), encoded_data.size(), decoded.data());
+    decoded.resize(decoded_size);
+
+    ASSERT_TRUE(EqualByteContainers(expected.data_type->getSizeOfValueInMemory(), expected.serialized_data, decoded));
+}
+
+class CodecTest_Performance : public ::testing::TestWithParam<std::tuple<Codec, CodecTestSequence>>
+{};
+
+TEST_P(CodecTest_Performance, TranscodingWithDataType)
+{
+    const auto & [codec_spec, test_seq] = GetParam();
+    const auto codec = ::makeCodec(codec_spec.codec_statement, test_seq.data_type);
+
+    const auto runs = 10;
+    std::map<std::string, std::vector<UInt64>> results;
+
+    for (size_t i = 0; i < runs; ++i)
+    {
+        StopwatchTimer timer{CLOCK_THREAD_CPUTIME_ID};
+        ::testTranscoding(timer, *codec, test_seq);
+        timer.stop();
+
+        for (const auto & [label, value] : timer.getResults())
+        {
+            results[label].push_back(value);
+        }
+    }
+
+    auto computeMeanAndStdDev = [](const auto & values)
+    {
+        double mean{};
+
+        if (values.size() < 2)
+            return std::make_tuple(mean, double{});
+
+        using ValueType = typename std::decay_t<decltype(values)>::value_type;
+        std::vector<ValueType> tmp_v(std::begin(values), std::end(values));
+        std::sort(tmp_v.begin(), tmp_v.end());
+
+        // remove min and max
+        tmp_v.erase(tmp_v.begin());
+        tmp_v.erase(tmp_v.end() - 1);
+
+        for (const auto & v : tmp_v)
+        {
+            mean += v;
+        }
+
+        mean = mean / tmp_v.size();
+        double std_dev = 0.0;
+        for (const auto & v : tmp_v)
+        {
+            const auto d = (v - mean);
+            std_dev += (d * d);
+        }
+        std_dev = std::sqrt(std_dev / tmp_v.size());
+
+        return std::make_tuple(mean, std_dev);
+    };
+
+    std::cerr << codec_spec.codec_statement
+              << " " << test_seq.data_type->getName()
+              << " (" << test_seq.serialized_data.size() << " bytes, "
+              << std::hex << CityHash_v1_0_2::CityHash64(test_seq.serialized_data.data(), test_seq.serialized_data.size()) << std::dec
+              << ", average of " << runs << " runs, μs)";
+
+    for (const auto & k : {"encoding", "decoding"})
+    {
+        const auto & values = results[k];
+        const auto & [mean, std_dev] = computeMeanAndStdDev(values);
+        // Ensure that Coefficient of variation is reasonably low, otherwise these numbers are meaningless
+        EXPECT_GT(0.05, std_dev / mean);
+        std::cerr << "\t" << std::fixed << std::setprecision(1) << mean / 1000.0;
+    }
+
+    std::cerr << std::endl;
+}
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Here we use generators to produce test payload for codecs.
 // Generator is a callable that can produce infinite number of values,
-// output value MUST be of the same type input value.
+// output value MUST be of the same type as input value.
 ///////////////////////////////////////////////////////////////////////////////////////////////////

 auto SameValueGenerator = [](auto value)
@ -543,6 +819,23 @@ std::vector<CodecTestSequence> generatePyramidOfSequences(const size_t sequences
    return sequences;
 };

+// Just as if all sequences from generatePyramidOfSequences were appended to one-by-one to the first one.
+template <typename T, typename Generator>
+CodecTestSequence generatePyramidSequence(const size_t sequences_count, Generator && generator, const char* generator_name)
+{
+    CodecTestSequence sequence;
+    sequence.data_type = makeDataType<T>();
+    sequence.serialized_data.reserve(sequences_count * sequences_count * sizeof(T));
+
+    for (size_t i = 1; i < sequences_count; ++i)
+    {
+        std::string name = generator_name + std::string(" from 0 to ") + std::to_string(i);
+        sequence.append(generateSeq<T>(std::forward<decltype(generator)>(generator), name.c_str(), 0, i));
+    }
+
+    return sequence;
+};
+

 // helper macro to produce human-friendly sequence name from generator
 #define G(generator) generator, #generator
@ -575,7 +868,7 @@ INSTANTIATE_TEST_CASE_P(SmallSequences,
    ::testing::Combine(
        DefaultCodecsToTest,
        ::testing::ValuesIn(
-                  generatePyramidOfSequences<Int8  >(42, G(SequentialGenerator(1)))
+                  generatePyramidOfSequences<Int8 >(42, G(SequentialGenerator(1)))
                + generatePyramidOfSequences<Int16 >(42, G(SequentialGenerator(1)))
                + generatePyramidOfSequences<Int32 >(42, G(SequentialGenerator(1)))
                + generatePyramidOfSequences<Int64 >(42, G(SequentialGenerator(1)))
@ -609,7 +902,7 @@ INSTANTIATE_TEST_CASE_P(SameValueInt,
    ::testing::Combine(
        DefaultCodecsToTest,
        ::testing::Values(
-            generateSeq<Int8  >(G(SameValueGenerator(1000))),
+            generateSeq<Int8>(G(SameValueGenerator(1000))),
            generateSeq<Int16 >(G(SameValueGenerator(1000))),
            generateSeq<Int32 >(G(SameValueGenerator(1000))),
            generateSeq<Int64 >(G(SameValueGenerator(1000))),
@ -626,7 +919,7 @@ INSTANTIATE_TEST_CASE_P(SameNegativeValueInt,
    ::testing::Combine(
        DefaultCodecsToTest,
        ::testing::Values(
-            generateSeq<Int8  >(G(SameValueGenerator(-1000))),
+            generateSeq<Int8>(G(SameValueGenerator(-1000))),
            generateSeq<Int16 >(G(SameValueGenerator(-1000))),
            generateSeq<Int32 >(G(SameValueGenerator(-1000))),
            generateSeq<Int64 >(G(SameValueGenerator(-1000))),
@ -671,7 +964,7 @@ INSTANTIATE_TEST_CASE_P(SequentialInt,
    ::testing::Combine(
        DefaultCodecsToTest,
        ::testing::Values(
-            generateSeq<Int8  >(G(SequentialGenerator(1))),
+            generateSeq<Int8>(G(SequentialGenerator(1))),
            generateSeq<Int16 >(G(SequentialGenerator(1))),
            generateSeq<Int32 >(G(SequentialGenerator(1))),
            generateSeq<Int64 >(G(SequentialGenerator(1))),
@ -690,7 +983,7 @@ INSTANTIATE_TEST_CASE_P(SequentialReverseInt,
    ::testing::Combine(
        DefaultCodecsToTest,
        ::testing::Values(
-            generateSeq<Int8  >(G(SequentialGenerator(-1))),
+            generateSeq<Int8>(G(SequentialGenerator(-1))),
            generateSeq<Int16 >(G(SequentialGenerator(-1))),
            generateSeq<Int32 >(G(SequentialGenerator(-1))),
            generateSeq<Int64 >(G(SequentialGenerator(-1))),
@ -735,10 +1028,10 @@ INSTANTIATE_TEST_CASE_P(MonotonicInt,
    ::testing::Combine(
        DefaultCodecsToTest,
        ::testing::Values(
-            generateSeq<Int8  >(G(MonotonicGenerator(1, 5))),
-            generateSeq<Int16 >(G(MonotonicGenerator(1, 5))),
-            generateSeq<Int32 >(G(MonotonicGenerator(1, 5))),
-            generateSeq<Int64 >(G(MonotonicGenerator(1, 5))),
+            generateSeq<Int8>(G(MonotonicGenerator(1, 5))),
+            generateSeq<Int16>(G(MonotonicGenerator(1, 5))),
+            generateSeq<Int32>(G(MonotonicGenerator(1, 5))),
+            generateSeq<Int64>(G(MonotonicGenerator(1, 5))),
            generateSeq<UInt8 >(G(MonotonicGenerator(1, 5))),
            generateSeq<UInt16>(G(MonotonicGenerator(1, 5))),
            generateSeq<UInt32>(G(MonotonicGenerator(1, 5))),
@ -752,11 +1045,11 @@ INSTANTIATE_TEST_CASE_P(MonotonicReverseInt,
    ::testing::Combine(
        DefaultCodecsToTest,
        ::testing::Values(
-            generateSeq<Int8  >(G(MonotonicGenerator(-1, 5))),
-            generateSeq<Int16 >(G(MonotonicGenerator(-1, 5))),
-            generateSeq<Int32 >(G(MonotonicGenerator(-1, 5))),
-            generateSeq<Int64 >(G(MonotonicGenerator(-1, 5))),
-            generateSeq<UInt8 >(G(MonotonicGenerator(-1, 5))),
+            generateSeq<Int8>(G(MonotonicGenerator(-1, 5))),
+            generateSeq<Int16>(G(MonotonicGenerator(-1, 5))),
+            generateSeq<Int32>(G(MonotonicGenerator(-1, 5))),
+            generateSeq<Int64>(G(MonotonicGenerator(-1, 5))),
+            generateSeq<UInt8>(G(MonotonicGenerator(-1, 5))),
            generateSeq<UInt16>(G(MonotonicGenerator(-1, 5))),
            generateSeq<UInt32>(G(MonotonicGenerator(-1, 5))),
            generateSeq<UInt64>(G(MonotonicGenerator(-1, 5)))
@ -862,4 +1155,191 @@ INSTANTIATE_TEST_CASE_P(OverflowFloat,
    ),
 );

+template <typename ValueType>
+auto DDCompatibilityTestSequence()
+{
+    // Generates sequences with double delta in given range.
+    auto ddGenerator = [prev_delta = static_cast<Int64>(0), prev = static_cast<Int64>(0)](auto dd) mutable
+    {
+        const auto curr = dd + prev + prev_delta;
+        prev = curr;
+        prev_delta = dd + prev_delta;
+        return curr;
+    };
+
+    auto ret = generateSeq<ValueType>(G(SameValueGenerator(42)), 0, 3);
+
+    // These values are from DoubleDelta paper (and implementation) and represent points at which DD encoded length is changed.
+    // DD value less that this point is encoded in shorter binary form (bigger - longer binary).
+    const Int64 dd_corner_points[] = {-63, 64, -255, 256, -2047, 2048, std::numeric_limits<Int32>::min(), std::numeric_limits<Int32>::max()};
+    for (const auto & p : dd_corner_points)
+    {
+        if (std::abs(p) > std::numeric_limits<ValueType>::max())
+        {
+            break;
+        }
+
+        // - 4 is to allow DD value to settle before transitioning through important point,
+        // since DD depends on 2 previous values of data, + 2 is arbitrary.
+        ret.append(generateSeq<ValueType>(G(ddGenerator), p - 4, p + 2));
+    }
+
+    return ret;
+}
+
+#define BIN_STR(x) std::string{x, sizeof(x) - 1}
+
+INSTANTIATE_TEST_CASE_P(DoubleDelta,
+    CodecTest_Compatibility,
+    ::testing::Combine(
+        ::testing::Values(Codec("DoubleDelta")),
+        ::testing::ValuesIn(std::initializer_list<std::tuple<CodecTestSequence, std::string>>{
+            {
+                DDCompatibilityTestSequence<Int8>(),
+                BIN_STR("\x94\x21\x00\x00\x00\x0f\x00\x00\x00\x01\x00\x0f\x00\x00\x00\x2a\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xb1\xaa\xf4\xf6\x7d\x87\xf8\x80")
+            },
+            {
+                DDCompatibilityTestSequence<UInt8>(),
+                BIN_STR("\x94\x27\x00\x00\x00\x15\x00\x00\x00\x01\x00\x15\x00\x00\x00\x2a\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xb1\xaa\xf4\xf6\x7d\x87\xf8\x81\x8e\xd0\xca\x02\x01\x01")
+            },
+            {
+                DDCompatibilityTestSequence<Int16>(),
+                BIN_STR("\x94\x70\x00\x00\x00\x4e\x00\x00\x00\x02\x00\x27\x00\x00\x00\x2a\x00\x00\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xbc\xe3\x5d\xa3\xd3\xd9\xf6\x1f\xe2\x07\x7c\x47\x20\x67\x48\x07\x47\xff\x47\xf6\xfe\xf8\x00\x00\x70\x6b\xd0\x00\x02\x83\xd9\xfb\x9f\xdc\x1f\xfc\x20\x1e\x80\x00\x22\xc8\xf0\x00\x00\x66\x67\xa0\x00\x02\x00\x3d\x00\x00\x0f\xff\xe8\x00\x00\x7f\xee\xff\xdf\x40\x00\x0f\xf2\x78\x00\x01\x7f\x83\x9f\xf7\x9f\xfb\xc0\x00\x00\xff\xfe\x00\x00\x08\x00")
+            },
+            {
+                DDCompatibilityTestSequence<UInt16>(),
+                BIN_STR("\x94\x70\x00\x00\x00\x4e\x00\x00\x00\x02\x00\x27\x00\x00\x00\x2a\x00\x00\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xbc\xe3\x5d\xa3\xd3\xd9\xf6\x1f\xe2\x07\x7c\x47\x20\x67\x48\x07\x47\xff\x47\xf6\xfe\xf8\x00\x00\x70\x6b\xd0\x00\x02\x83\xd9\xfb\x9f\xdc\x1f\xfc\x20\x1e\x80\x00\x22\xc8\xf0\x00\x00\x66\x67\xa0\x00\x02\x00\x3d\x00\x00\x0f\xff\xe8\x00\x00\x7f\xee\xff\xdf\x40\x00\x0f\xf2\x78\x00\x01\x7f\x83\x9f\xf7\x9f\xfb\xc0\x00\x00\xff\xfe\x00\x00\x08\x00")
+            },
+            {
+                DDCompatibilityTestSequence<Int32>(),
+                BIN_STR("\x94\x74\x00\x00\x00\x9c\x00\x00\x00\x04\x00\x27\x00\x00\x00\x2a\x00\x00\x00\x00\x00\x00\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xbc\xe3\x5d\xa3\xd3\xd9\xf6\x1f\xe2\x07\x7c\x47\x20\x67\x48\x07\x47\xff\x47\xf6\xfe\xf8\x00\x00\x70\x6b\xd0\x00\x02\x83\xd9\xfb\x9f\xdc\x1f\xfc\x20\x1e\x80\x00\x22\xc8\xf0\x00\x00\x66\x67\xa0\x00\x02\x00\x3d\x00\x00\x0f\xff\xe8\x00\x00\x7f\xee\xff\xdf\x00\x00\x70\x0d\x7a\x00\x02\x80\x7b\x9f\xf7\x9f\xfb\xc0\x00\x00\xff\xfe\x00\x00\x08\x00")
+            },
+            {
+                DDCompatibilityTestSequence<UInt32>(),
+                BIN_STR("\x94\xb5\x00\x00\x00\xcc\x00\x00\x00\x04\x00\x33\x00\x00\x00\x2a\x00\x00\x00\x00\x00\x00\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xbc\xe3\x5d\xa3\xd3\xd9\xf6\x1f\xe2\x07\x7c\x47\x20\x67\x48\x07\x47\xff\x47\xf6\xfe\xf8\x00\x00\x70\x6b\xd0\x00\x02\x83\xd9\xfb\x9f\xdc\x1f\xfc\x20\x1e\x80\x00\x22\xc8\xf0\x00\x00\x66\x67\xa0\x00\x02\x00\x3d\x00\x00\x0f\xff\xe8\x00\x00\x7f\xee\xff\xdf\x00\x00\x70\x0d\x7a\x00\x02\x80\x7b\x9f\xf7\x9f\xfb\xc0\x00\x00\xff\xfe\x00\x00\x08\x00\xf3\xff\xf9\x41\xaf\xbf\xff\xd6\x0c\xfc\xff\xff\xff\xfb\xf0\x00\x00\x00\x07\xff\xff\xff\xef\xc0\x00\x00\x00\x3f\xff\xff\xff\xfb\xff\xff\xff\xfa\x69\x74\xf3\xff\xff\xff\xe7\x9f\xff\xff\xff\x7e\x00\x00\x00\x00\xff\xff\xff\xfd\xf8\x00\x00\x00\x07\xff\xff\xff\xf0")
+            },
+            {
+                DDCompatibilityTestSequence<Int64>(),
+                BIN_STR("\x94\xd4\x00\x00\x00\x98\x01\x00\x00\x08\x00\x33\x00\x00\x00\x2a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xbc\xe3\x5d\xa3\xd3\xd9\xf6\x1f\xe2\x07\x7c\x47\x20\x67\x48\x07\x47\xff\x47\xf6\xfe\xf8\x00\x00\x70\x6b\xd0\x00\x02\x83\xd9\xfb\x9f\xdc\x1f\xfc\x20\x1e\x80\x00\x22\xc8\xf0\x00\x00\x66\x67\xa0\x00\x02\x00\x3d\x00\x00\x0f\xff\xe8\x00\x00\x7f\xee\xff\xdf\x00\x00\x70\x0d\x7a\x00\x02\x80\x7b\x9f\xf7\x9f\xfb\xc0\x00\x00\xff\xfe\x00\x00\x08\x00\xfc\x00\x00\x00\x04\x00\x06\xbe\x4f\xbf\xff\xd6\x0c\xff\x00\x00\x00\x01\x00\x00\x00\x03\xf8\x00\x00\x00\x08\x00\x00\x00\x0f\xc0\x00\x00\x00\x3f\xff\xff\xff\xfb\xff\xff\xff\xfb\xe0\x00\x00\x01\xc0\x00\x00\x06\x9f\x80\x00\x00\x0a\x00\x00\x00\x34\xf3\xff\xff\xff\xe7\x9f\xff\xff\xff\x7e\x00\x00\x00\x00\xff\xff\xff\xfd\xf0\x00\x00\x00\x07\xff\xff\xff\xf0")
+            },
+            {
+                DDCompatibilityTestSequence<UInt64>(),
+                BIN_STR("\x94\xd4\x00\x00\x00\x98\x01\x00\x00\x08\x00\x33\x00\x00\x00\x2a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xbc\xe3\x5d\xa3\xd3\xd9\xf6\x1f\xe2\x07\x7c\x47\x20\x67\x48\x07\x47\xff\x47\xf6\xfe\xf8\x00\x00\x70\x6b\xd0\x00\x02\x83\xd9\xfb\x9f\xdc\x1f\xfc\x20\x1e\x80\x00\x22\xc8\xf0\x00\x00\x66\x67\xa0\x00\x02\x00\x3d\x00\x00\x0f\xff\xe8\x00\x00\x7f\xee\xff\xdf\x00\x00\x70\x0d\x7a\x00\x02\x80\x7b\x9f\xf7\x9f\xfb\xc0\x00\x00\xff\xfe\x00\x00\x08\x00\xfc\x00\x00\x00\x04\x00\x06\xbe\x4f\xbf\xff\xd6\x0c\xff\x00\x00\x00\x01\x00\x00\x00\x03\xf8\x00\x00\x00\x08\x00\x00\x00\x0f\xc0\x00\x00\x00\x3f\xff\xff\xff\xfb\xff\xff\xff\xfb\xe0\x00\x00\x01\xc0\x00\x00\x06\x9f\x80\x00\x00\x0a\x00\x00\x00\x34\xf3\xff\xff\xff\xe7\x9f\xff\xff\xff\x7e\x00\x00\x00\x00\xff\xff\xff\xfd\xf0\x00\x00\x00\x07\xff\xff\xff\xf0")
+            },
+        })
+    ),
+);
+
+template <typename ValueType>
+auto DDperformanceTestSequence()
+{
+    const auto times = 100'000;
+    return DDCompatibilityTestSequence<ValueType>() * times // average case
+        + generateSeq<ValueType>(G(MinMaxGenerator()), 0, times) // worst
+        + generateSeq<ValueType>(G(SameValueGenerator(42)), 0, times); // best
+}
+
+// prime numbers in ascending order with some random repitions hit all the cases of Gorilla.
+auto PrimesWithMultiplierGenerator = [](int multiplier = 1)
+{
+    return [multiplier](auto i)
+    {
+        static const int vals[] = {
+             2, 3, 5, 7, 11, 11, 13, 17, 19, 23, 29, 29, 31, 37, 41, 43,
+            47, 47, 53, 59, 61, 61, 67, 71, 73, 79, 83, 89, 89, 97, 101, 103,
+            107, 107, 109, 113, 113, 127, 127, 127
+        };
+        static const size_t count = sizeof(vals)/sizeof(vals[0]);
+
+        using T = decltype(i);
+        return static_cast<T>(vals[i % count] * static_cast<T>(multiplier));
+    };
+};
+
+template <typename ValueType>
+auto GCompatibilityTestSequence()
+{
+    // Also multiply result by some factor to test large values on types that can hold those.
+    return generateSeq<ValueType>(G(PrimesWithMultiplierGenerator(intExp10(sizeof(ValueType)))), 0, 42);
+}
+
+INSTANTIATE_TEST_CASE_P(Gorilla,
+    CodecTest_Compatibility,
+    ::testing::Combine(
+        ::testing::Values(Codec("Gorilla")),
+        ::testing::ValuesIn(std::initializer_list<std::tuple<CodecTestSequence, std::string>>{
+            {
+                GCompatibilityTestSequence<Int8>(),
+                BIN_STR("\x95\x35\x00\x00\x00\x2a\x00\x00\x00\x01\x00\x2a\x00\x00\x00\x14\xe1\xdd\x25\xe5\x7b\x29\x86\xee\x2a\x16\x5a\xc5\x0b\x23\x75\x1b\x3c\xb1\x97\x8b\x5f\xcb\x43\xd9\xc5\x48\xab\x23\xaf\x62\x93\x71\x4a\x73\x0f\xc6\x0a")
+            },
+            {
+                GCompatibilityTestSequence<UInt8>(),
+                BIN_STR("\x95\x35\x00\x00\x00\x2a\x00\x00\x00\x01\x00\x2a\x00\x00\x00\x14\xe1\xdd\x25\xe5\x7b\x29\x86\xee\x2a\x16\x5a\xc5\x0b\x23\x75\x1b\x3c\xb1\x97\x8b\x5f\xcb\x43\xd9\xc5\x48\xab\x23\xaf\x62\x93\x71\x4a\x73\x0f\xc6\x0a")
+            },
+            {
+                GCompatibilityTestSequence<Int16>(),
+                BIN_STR("\x95\x52\x00\x00\x00\x54\x00\x00\x00\x02\x00\x2a\x00\x00\x00\xc8\x00\xdc\xfe\x66\xdb\x1f\x4e\xa7\xde\xdc\xd5\xec\x6e\xf7\x37\x3a\x23\xe7\x63\xf5\x6a\x8e\x99\x37\x34\xf9\xf8\x2e\x76\x35\x2d\x51\xbb\x3b\xc3\x6d\x13\xbf\x86\x53\x9e\x25\xe4\xaf\xaf\x63\xd5\x6a\x6e\x76\x35\x3a\x27\xd3\x0f\x91\xae\x6b\x33\x57\x6e\x64\xcc\x55\x81\xe4")
+            },
+            {
+                GCompatibilityTestSequence<UInt16>(),
+                BIN_STR("\x95\x52\x00\x00\x00\x54\x00\x00\x00\x02\x00\x2a\x00\x00\x00\xc8\x00\xdc\xfe\x66\xdb\x1f\x4e\xa7\xde\xdc\xd5\xec\x6e\xf7\x37\x3a\x23\xe7\x63\xf5\x6a\x8e\x99\x37\x34\xf9\xf8\x2e\x76\x35\x2d\x51\xbb\x3b\xc3\x6d\x13\xbf\x86\x53\x9e\x25\xe4\xaf\xaf\x63\xd5\x6a\x6e\x76\x35\x3a\x27\xd3\x0f\x91\xae\x6b\x33\x57\x6e\x64\xcc\x55\x81\xe4")
+            },
+            {
+                GCompatibilityTestSequence<Int32>(),
+                BIN_STR("\x95\x65\x00\x00\x00\xa8\x00\x00\x00\x04\x00\x2a\x00\x00\x00\x20\x4e\x00\x00\xe4\x57\x63\xc0\xbb\x67\xbc\xce\x91\x97\x99\x15\x9e\xe3\x36\x3f\x89\x5f\x8e\xf2\xec\x8e\xd3\xbf\x75\x43\x58\xc4\x7e\xcf\x93\x43\x38\xc6\x91\x36\x1f\xe7\xb6\x11\x6f\x02\x73\x46\xef\xe0\xec\x50\xfb\x79\xcb\x9c\x14\xfa\x13\xea\x8d\x66\x43\x48\xa0\xde\x3a\xcf\xff\x26\xe0\x5f\x93\xde\x5e\x7f\x6e\x36\x5e\xe6\xb4\x66\x5d\xb0\x0e\xc4")
+            },
+            {
+                GCompatibilityTestSequence<UInt32>(),
+                BIN_STR("\x95\x65\x00\x00\x00\xa8\x00\x00\x00\x04\x00\x2a\x00\x00\x00\x20\x4e\x00\x00\xe4\x57\x63\xc0\xbb\x67\xbc\xce\x91\x97\x99\x15\x9e\xe3\x36\x3f\x89\x5f\x8e\xf2\xec\x8e\xd3\xbf\x75\x43\x58\xc4\x7e\xcf\x93\x43\x38\xc6\x91\x36\x1f\xe7\xb6\x11\x6f\x02\x73\x46\xef\xe0\xec\x50\xfb\x79\xcb\x9c\x14\xfa\x13\xea\x8d\x66\x43\x48\xa0\xde\x3a\xcf\xff\x26\xe0\x5f\x93\xde\x5e\x7f\x6e\x36\x5e\xe6\xb4\x66\x5d\xb0\x0e\xc4")
+            },
+            {
+                GCompatibilityTestSequence<Int64>(),
+                BIN_STR("\x95\x91\x00\x00\x00\x50\x01\x00\x00\x08\x00\x2a\x00\x00\x00\x00\xc2\xeb\x0b\x00\x00\x00\x00\xe3\x2b\xa0\xa6\x19\x85\x98\xdc\x45\x74\x74\x43\xc2\x57\x41\x4c\x6e\x42\x79\xd9\x8f\x88\xa5\x05\xf3\xf1\x94\xa3\x62\x1e\x02\xdf\x05\x10\xf1\x15\x97\x35\x2a\x50\x71\x0f\x09\x6c\x89\xf7\x65\x1d\x11\xb7\xcc\x7d\x0b\x70\xc1\x86\x88\x48\x47\x87\xb6\x32\x26\xa7\x86\x87\x88\xd3\x93\x3d\xfc\x28\x68\x85\x05\x0b\x13\xc6\x5f\xd4\x70\xe1\x5e\x76\xf1\x9f\xf3\x33\x2a\x14\x14\x5e\x40\xc1\x5c\x28\x3f\xec\x43\x03\x05\x11\x91\xe8\xeb\x8e\x0a\x0e\x27\x21\x55\xcb\x39\xbc\x6a\xff\x11\x5d\x81\xa0\xa6\x10")
+            },
+            {
+                GCompatibilityTestSequence<UInt64>(),
+                BIN_STR("\x95\x91\x00\x00\x00\x50\x01\x00\x00\x08\x00\x2a\x00\x00\x00\x00\xc2\xeb\x0b\x00\x00\x00\x00\xe3\x2b\xa0\xa6\x19\x85\x98\xdc\x45\x74\x74\x43\xc2\x57\x41\x4c\x6e\x42\x79\xd9\x8f\x88\xa5\x05\xf3\xf1\x94\xa3\x62\x1e\x02\xdf\x05\x10\xf1\x15\x97\x35\x2a\x50\x71\x0f\x09\x6c\x89\xf7\x65\x1d\x11\xb7\xcc\x7d\x0b\x70\xc1\x86\x88\x48\x47\x87\xb6\x32\x26\xa7\x86\x87\x88\xd3\x93\x3d\xfc\x28\x68\x85\x05\x0b\x13\xc6\x5f\xd4\x70\xe1\x5e\x76\xf1\x9f\xf3\x33\x2a\x14\x14\x5e\x40\xc1\x5c\x28\x3f\xec\x43\x03\x05\x11\x91\xe8\xeb\x8e\x0a\x0e\x27\x21\x55\xcb\x39\xbc\x6a\xff\x11\x5d\x81\xa0\xa6\x10")
+            },
+        })
+    ),
+);
+
+// These 'tests' try to measure performance of encoding and decoding and hence only make sence to be run locally,
+// also they require pretty big data to run agains and generating this data slows down startup of unit test process.
+// So un-comment only at your discretion.
+
+//INSTANTIATE_TEST_CASE_P(DoubleDelta,
+//    CodecTest_Performance,
+//    ::testing::Combine(
+//        ::testing::Values(Codec("DoubleDelta")),
+//        ::testing::Values(
+//            DDperformanceTestSequence<Int8 >(),
+//            DDperformanceTestSequence<UInt8 >(),
+//            DDperformanceTestSequence<Int16 >(),
+//            DDperformanceTestSequence<UInt16>(),
+//            DDperformanceTestSequence<Int32 >(),
+//            DDperformanceTestSequence<UInt32>(),
+//            DDperformanceTestSequence<Int64 >(),
+//            DDperformanceTestSequence<UInt64>()
+//        )
+//    ),
+//);
+
+//INSTANTIATE_TEST_CASE_P(Gorilla,
+//    CodecTest_Performance,
+//    ::testing::Combine(
+//        ::testing::Values(Codec("Gorilla")),
+//        ::testing::Values(
+//            generatePyramidSequence<Int8 >(42, G(PrimesWithMultiplierGenerator())) * 6'000,
+//            generatePyramidSequence<UInt8 >(42, G(PrimesWithMultiplierGenerator())) * 6'000,
+//            generatePyramidSequence<Int16 >(42, G(PrimesWithMultiplierGenerator())) * 6'000,
+//            generatePyramidSequence<UInt16>(42, G(PrimesWithMultiplierGenerator())) * 6'000,
+//            generatePyramidSequence<Int32 >(42, G(PrimesWithMultiplierGenerator())) * 6'000,
+//            generatePyramidSequence<UInt32>(42, G(PrimesWithMultiplierGenerator())) * 6'000,
+//            generatePyramidSequence<Int64 >(42, G(PrimesWithMultiplierGenerator())) * 6'000,
+//            generatePyramidSequence<UInt64>(42, G(PrimesWithMultiplierGenerator())) * 6'000
+//        )
+//    ),
+//);
+
 }
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@ -127,6 +127,7 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingUInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \
    \
    M(SettingUInt64, min_bytes_to_use_direct_io, 0, "The minimum number of bytes for reading the data with O_DIRECT option during SELECT queries execution. 0 - disabled.", 0) \
+    M(SettingUInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
    \
    M(SettingBool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
    M(SettingBool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
--- a/dbms/src/Core/SortCursor.h
+++ b/dbms/src/Core/SortCursor.h
@ -22,8 +22,8 @@ namespace DB
  */
 struct SortCursorImpl
 {
-    ColumnRawPtrs all_columns;
    ColumnRawPtrs sort_columns;
+    ColumnRawPtrs all_columns;
    SortDescription desc;
    size_t sort_columns_size = 0;
    size_t pos = 0;
@ -110,21 +110,52 @@ using SortCursorImpls = std::vector<SortCursorImpl>;


 /// For easy copying.
-struct SortCursor
+template <typename Derived>
+struct SortCursorHelper
 {
    SortCursorImpl * impl;

-    SortCursor(SortCursorImpl * impl_) : impl(impl_) {}
+    const Derived & derived() const { return static_cast<const Derived &>(*this); }
+
+    SortCursorHelper(SortCursorImpl * impl_) : impl(impl_) {}
    SortCursorImpl * operator-> () { return impl; }
    const SortCursorImpl * operator-> () const { return impl; }

+    bool greater(const SortCursorHelper & rhs) const
+    {
+        return derived().greaterAt(rhs.derived(), impl->pos, rhs.impl->pos);
+    }
+
+    /// Inverted so that the priority queue elements are removed in ascending order.
+    bool operator< (const SortCursorHelper & rhs) const
+    {
+        return derived().greater(rhs.derived());
+    }
+
+    /// Checks that all rows in the current block of this cursor are less than or equal to all the rows of the current block of another cursor.
+    bool totallyLessOrEquals(const SortCursorHelper & rhs) const
+    {
+        if (impl->rows == 0 || rhs.impl->rows == 0)
+            return false;
+
+        /// The last row of this cursor is no larger than the first row of the another cursor.
+        return !derived().greaterAt(rhs.derived(), impl->rows - 1, 0);
+    }
+};
+
+
+struct SortCursor : SortCursorHelper<SortCursor>
+{
+    using SortCursorHelper<SortCursor>::SortCursorHelper;
+
    /// The specified row of this cursor is greater than the specified row of another cursor.
    bool greaterAt(const SortCursor & rhs, size_t lhs_pos, size_t rhs_pos) const
    {
        for (size_t i = 0; i < impl->sort_columns_size; ++i)
        {
-            int direction = impl->desc[i].direction;
-            int nulls_direction = impl->desc[i].nulls_direction;
+            const auto & desc = impl->desc[i];
+            int direction = desc.direction;
+            int nulls_direction = desc.nulls_direction;
            int res = direction * impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction);
            if (res > 0)
                return true;
@ -133,45 +164,37 @@ struct SortCursor
        }
        return impl->order > rhs.impl->order;
    }
+};

-    /// Checks that all rows in the current block of this cursor are less than or equal to all the rows of the current block of another cursor.
-    bool totallyLessOrEquals(const SortCursor & rhs) const
+
+/// For the case with a single column and when there is no order between different cursors.
+struct SimpleSortCursor : SortCursorHelper<SimpleSortCursor>
+{
+    using SortCursorHelper<SimpleSortCursor>::SortCursorHelper;
+
+    bool greaterAt(const SimpleSortCursor & rhs, size_t lhs_pos, size_t rhs_pos) const
    {
-        if (impl->rows == 0 || rhs.impl->rows == 0)
-            return false;
-
-        /// The last row of this cursor is no larger than the first row of the another cursor.
-        return !greaterAt(rhs, impl->rows - 1, 0);
-    }
-
-    bool greater(const SortCursor & rhs) const
-    {
-        return greaterAt(rhs, impl->pos, rhs.impl->pos);
-    }
-
-    /// Inverted so that the priority queue elements are removed in ascending order.
-    bool operator< (const SortCursor & rhs) const
-    {
-        return greater(rhs);
+        const auto & desc = impl->desc[0];
+        int direction = desc.direction;
+        int nulls_direction = desc.nulls_direction;
+        int res = impl->sort_columns[0]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[0]), nulls_direction);
+        return res != 0 && ((res > 0) == (direction > 0));
    }
 };


 /// Separate comparator for locale-sensitive string comparisons
-struct SortCursorWithCollation
+struct SortCursorWithCollation : SortCursorHelper<SortCursorWithCollation>
 {
-    SortCursorImpl * impl;
-
-    SortCursorWithCollation(SortCursorImpl * impl_) : impl(impl_) {}
-    SortCursorImpl * operator-> () { return impl; }
-    const SortCursorImpl * operator-> () const { return impl; }
+    using SortCursorHelper<SortCursorWithCollation>::SortCursorHelper;

    bool greaterAt(const SortCursorWithCollation & rhs, size_t lhs_pos, size_t rhs_pos) const
    {
        for (size_t i = 0; i < impl->sort_columns_size; ++i)
        {
-            int direction = impl->desc[i].direction;
-            int nulls_direction = impl->desc[i].nulls_direction;
+            const auto & desc = impl->desc[i];
+            int direction = desc.direction;
+            int nulls_direction = desc.nulls_direction;
            int res;
            if (impl->need_collation[i])
            {
@ -189,29 +212,11 @@ struct SortCursorWithCollation
        }
        return impl->order > rhs.impl->order;
    }
-
-    bool totallyLessOrEquals(const SortCursorWithCollation & rhs) const
-    {
-        if (impl->rows == 0 || rhs.impl->rows == 0)
-            return false;
-
-        /// The last row of this cursor is no larger than the first row of the another cursor.
-        return !greaterAt(rhs, impl->rows - 1, 0);
-    }
-
-    bool greater(const SortCursorWithCollation & rhs) const
-    {
-        return greaterAt(rhs, impl->pos, rhs.impl->pos);
-    }
-
-    bool operator< (const SortCursorWithCollation & rhs) const
-    {
-        return greater(rhs);
-    }
 };


 /** Allows to fetch data from multiple sort cursors in sorted order (merging sorted data streams).
+  * TODO: Replace with "Loser Tree", see https://en.wikipedia.org/wiki/K-way_merge_algorithm
  */
 template <typename Cursor>
 class SortingHeap
@ -225,7 +230,8 @@ public:
        size_t size = cursors.size();
        queue.reserve(size);
        for (size_t i = 0; i < size; ++i)
-            queue.emplace_back(&cursors[i]);
+            if (!cursors[i].empty())
+                queue.emplace_back(&cursors[i]);
        std::make_heap(queue.begin(), queue.end());
    }

@ -233,6 +239,10 @@ public:

    Cursor & current() { return queue.front(); }

+    size_t size() { return queue.size(); }
+
+    Cursor & nextChild() { return queue[nextChildIndex()]; }
+
    void next()
    {
        assert(isValid());
@ -246,34 +256,67 @@ public:
            removeTop();
    }

+    void replaceTop(Cursor new_top)
+    {
+        current() = new_top;
+        updateTop();
+    }
+
+    void removeTop()
+    {
+        std::pop_heap(queue.begin(), queue.end());
+        queue.pop_back();
+        next_idx = 0;
+    }
+
+    void push(SortCursorImpl & cursor)
+    {
+        queue.emplace_back(&cursor);
+        std::push_heap(queue.begin(), queue.end());
+        next_idx = 0;
+    }
+
 private:
    using Container = std::vector<Cursor>;
    Container queue;

+    /// Cache comparison between first and second child if the order in queue has not been changed.
+    size_t next_idx = 0;
+
+    size_t nextChildIndex()
+    {
+        if (next_idx == 0)
+        {
+            next_idx = 1;
+
+            if (queue.size() > 2 && queue[1] < queue[2])
+                ++next_idx;
+        }
+
+        return next_idx;
+    }
+
    /// This is adapted version of the function __sift_down from libc++.
    /// Why cannot simply use std::priority_queue?
    /// - because it doesn't support updating the top element and requires pop and push instead.
+    /// Also look at "Boost.Heap" library.
    void updateTop()
    {
        size_t size = queue.size();
        if (size < 2)
            return;

-        size_t child_idx = 1;
        auto begin = queue.begin();
-        auto child_it = begin + 1;

-        /// Right child exists and is greater than left child.
-        if (size > 2 && *child_it < *(child_it + 1))
-        {
-            ++child_it;
-            ++child_idx;
-        }
+        size_t child_idx = nextChildIndex();
+        auto child_it = begin + child_idx;

        /// Check if we are in order.
        if (*child_it < *begin)
            return;

+        next_idx = 0;
+
        auto curr_it = begin;
        auto top(std::move(*begin));
        do
@ -282,11 +325,12 @@ private:
            *curr_it = std::move(*child_it);
            curr_it = child_it;

-            if ((size - 2) / 2 < child_idx)
-                break;
-
            // recompute the child based off of the updated parent
            child_idx = 2 * child_idx + 1;
+
+            if (child_idx >= size)
+                break;
+
            child_it = begin + child_idx;

            if ((child_idx + 1) < size && *child_it < *(child_it + 1))
@ -300,12 +344,6 @@ private:
        } while (!(*child_it < top));
        *curr_it = std::move(top);
    }
-
-    void removeTop()
-    {
-        std::pop_heap(queue.begin(), queue.end());
-        queue.pop_back();
-    }
 };

 }
--- a/dbms/src/DataStreams/AggregatingSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/AggregatingSortedBlockInputStream.cpp
@ -138,14 +138,14 @@ Block AggregatingSortedBlockInputStream::readImpl()
 }


-void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue)
+void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue)
 {
    size_t merged_rows = 0;

    /// We take the rows in the correct order and put them in `merged_block`, while the rows are no more than `max_block_size`
-    while (!queue.empty())
+    while (queue.isValid())
    {
-        SortCursor current = queue.top();
+        SortCursor current = queue.current();

        setPrimaryKeyRef(next_key, current);

@ -167,8 +167,6 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
            return;
        }

-        queue.pop();
-
        if (key_differs)
        {
            current_key.swap(next_key);
@ -202,8 +200,7 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s

        if (!current->isLast())
        {
-            current->next();
-            queue.push(current);
+            queue.next();
        }
        else
        {
--- a/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h
+++ b/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h
@ -55,7 +55,7 @@ private:
    /** We support two different cursors - with Collation and without.
     *  Templates are used instead of polymorphic SortCursor and calls to virtual functions.
     */
-    void merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue);
+    void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);

    /** Extract all states of aggregate functions and merge them with the current group.
      */
--- a/dbms/src/DataStreams/CollapsingSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/CollapsingSortedBlockInputStream.cpp
@ -105,15 +105,15 @@ Block CollapsingSortedBlockInputStream::readImpl()
 }


-void CollapsingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue)
+void CollapsingSortedBlockInputStream::merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue)
 {

    MergeStopCondition stop_condition(average_block_sizes, max_block_size);
    size_t current_block_granularity;
    /// Take rows in correct order and put them into `merged_columns` until the rows no more than `max_block_size`
-    for (; !queue.empty(); ++current_pos)
+    for (; queue.isValid(); ++current_pos)
    {
-        SortCursor current = queue.top();
+        SortCursor current = queue.current();
        current_block_granularity = current->rows;

        if (current_key.empty())
@ -131,8 +131,6 @@ void CollapsingSortedBlockInputStream::merge(MutableColumns & merged_columns, st
            return;
        }

-        queue.pop();
-
        if (key_differs)
        {
            /// We write data for the previous primary key.
@ -185,8 +183,7 @@ void CollapsingSortedBlockInputStream::merge(MutableColumns & merged_columns, st

        if (!current->isLast())
        {
-            current->next();
-            queue.push(current);
+            queue.next();
        }
        else
        {
--- a/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h
+++ b/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h
@ -73,7 +73,7 @@ private:
    /** We support two different cursors - with Collation and without.
     *  Templates are used instead of polymorphic SortCursors and calls to virtual functions.
     */
-    void merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue);
+    void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);

    /// Output to result rows for the current primary key.
    void insertRows(MutableColumns & merged_columns, size_t block_size, MergeStopCondition & condition);
--- a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp
@ -161,7 +161,7 @@ Block GraphiteRollupSortedBlockInputStream::readImpl()
 }


-void GraphiteRollupSortedBlockInputStream::merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue)
+void GraphiteRollupSortedBlockInputStream::merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue)
 {
    const DateLUTImpl & date_lut = DateLUT::instance();

@ -173,9 +173,9 @@ void GraphiteRollupSortedBlockInputStream::merge(MutableColumns & merged_columns
    /// contribute towards current output row.
    /// Variables starting with next_* refer to the row at the top of the queue.

-    while (!queue.empty())
+    while (queue.isValid())
    {
-        SortCursor next_cursor = queue.top();
+        SortCursor next_cursor = queue.current();

        StringRef next_path = next_cursor->all_columns[path_column_num]->getDataAt(next_cursor->pos);
        bool new_path = is_first || next_path != current_group_path;
@ -253,12 +253,9 @@ void GraphiteRollupSortedBlockInputStream::merge(MutableColumns & merged_columns
            current_group_path = next_path;
        }

-        queue.pop();
-
        if (!next_cursor->isLast())
        {
-            next_cursor->next();
-            queue.push(next_cursor);
+            queue.next();
        }
        else
        {
--- a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.h
+++ b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.h
@ -225,7 +225,7 @@ private:
    UInt32 selectPrecision(const Graphite::Retentions & retentions, time_t time) const;


-    void merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue);
+    void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);

    /// Insert the values into the resulting columns, which will not be changed in the future.
    template <typename TSortCursor>
--- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp
+++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp
@ -150,10 +150,12 @@ MergeSortingBlocksBlockInputStream::MergeSortingBlocksBlockInputStream(

    blocks.swap(nonempty_blocks);

-    if (!has_collation)
+    if (has_collation)
+        queue_with_collation = SortingHeap<SortCursorWithCollation>(cursors);
+    else if (description.size() > 1)
        queue_without_collation = SortingHeap<SortCursor>(cursors);
    else
-        queue_with_collation = SortingHeap<SortCursorWithCollation>(cursors);
+        queue_simple = SortingHeap<SimpleSortCursor>(cursors);
 }


@ -169,9 +171,12 @@ Block MergeSortingBlocksBlockInputStream::readImpl()
        return res;
    }

-    return !has_collation
-        ? mergeImpl(queue_without_collation)
-        : mergeImpl(queue_with_collation);
+    if (has_collation)
+        return mergeImpl(queue_with_collation);
+    else if (description.size() > 1)
+        return mergeImpl(queue_without_collation);
+    else
+        return mergeImpl(queue_simple);
 }


@ -179,9 +184,18 @@ template <typename TSortingHeap>
 Block MergeSortingBlocksBlockInputStream::mergeImpl(TSortingHeap & queue)
 {
    size_t num_columns = header.columns();
-
    MutableColumns merged_columns = header.cloneEmptyColumns();
-    /// TODO: reserve (in each column)
+
+    /// Reserve
+    if (queue.isValid() && !blocks.empty())
+    {
+        /// The expected size of output block is the same as input block
+        size_t size_to_reserve = blocks[0].rows();
+        for (auto & column : merged_columns)
+            column->reserve(size_to_reserve);
+    }
+
+    /// TODO: Optimization when a single block left.

    /// Take rows from queue in right order and push to 'merged'.
    size_t merged_rows = 0;
@ -210,6 +224,9 @@ Block MergeSortingBlocksBlockInputStream::mergeImpl(TSortingHeap & queue)
            break;
    }

+    if (!queue.isValid())
+        blocks.clear();
+
    if (merged_rows == 0)
        return {};

--- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h
+++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h
@ -59,6 +59,7 @@ private:
    bool has_collation = false;

    SortingHeap<SortCursor> queue_without_collation;
+    SortingHeap<SimpleSortCursor> queue_simple;
    SortingHeap<SortCursorWithCollation> queue_with_collation;

    /** Two different cursors are supported - with and without Collation.
--- a/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp
@ -59,9 +59,9 @@ void MergingSortedBlockInputStream::init(MutableColumns & merged_columns)
        }

        if (has_collation)
-            initQueue(queue_with_collation);
+            queue_with_collation = SortingHeap<SortCursorWithCollation>(cursors);
        else
-            initQueue(queue_without_collation);
+            queue_without_collation = SortingHeap<SortCursor>(cursors);
    }

    /// Let's check that all source blocks have the same structure.
@ -82,15 +82,6 @@ void MergingSortedBlockInputStream::init(MutableColumns & merged_columns)
 }


-template <typename TSortCursor>
-void MergingSortedBlockInputStream::initQueue(std::priority_queue<TSortCursor> & queue)
-{
-    for (size_t i = 0; i < cursors.size(); ++i)
-        if (!cursors[i].empty())
-            queue.push(TSortCursor(&cursors[i]));
-}
-
-
 Block MergingSortedBlockInputStream::readImpl()
 {
    if (finished)
@ -115,7 +106,7 @@ Block MergingSortedBlockInputStream::readImpl()


 template <typename TSortCursor>
-void MergingSortedBlockInputStream::fetchNextBlock(const TSortCursor & current, std::priority_queue<TSortCursor> & queue)
+void MergingSortedBlockInputStream::fetchNextBlock(const TSortCursor & current, SortingHeap<TSortCursor> & queue)
 {
    size_t order = current->order;
    size_t size = cursors.size();
@ -125,15 +116,19 @@ void MergingSortedBlockInputStream::fetchNextBlock(const TSortCursor & current,

    while (true)
    {
-        source_blocks[order] = new detail::SharedBlock(children[order]->read());
+        source_blocks[order] = new detail::SharedBlock(children[order]->read());    /// intrusive ptr

        if (!*source_blocks[order])
+        {
+            queue.removeTop();
            break;
+        }

        if (source_blocks[order]->rows())
        {
            cursors[order].reset(*source_blocks[order]);
-            queue.push(TSortCursor(&cursors[order]));
+            queue.replaceTop(&cursors[order]);
+
            source_blocks[order]->all_columns = cursors[order].all_columns;
            source_blocks[order]->sort_columns = cursors[order].sort_columns;
            break;
@ -154,19 +149,14 @@ bool MergingSortedBlockInputStream::MergeStopCondition::checkStop() const
    return sum_rows_count >= average;
 }

-template
-void MergingSortedBlockInputStream::fetchNextBlock<SortCursor>(const SortCursor & current, std::priority_queue<SortCursor> & queue);

-template
-void MergingSortedBlockInputStream::fetchNextBlock<SortCursorWithCollation>(const SortCursorWithCollation & current, std::priority_queue<SortCursorWithCollation> & queue);
-
-
-template <typename TSortCursor>
-void MergingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::priority_queue<TSortCursor> & queue)
+template <typename TSortingHeap>
+void MergingSortedBlockInputStream::merge(MutableColumns & merged_columns, TSortingHeap & queue)
 {
    size_t merged_rows = 0;

    MergeStopCondition stop_condition(average_block_sizes, max_block_size);
+
    /** Increase row counters.
      * Return true if it's time to finish generating the current data block.
      */
@ -186,123 +176,100 @@ void MergingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::
        return stop_condition.checkStop();
    };

-    /// Take rows in required order and put them into `merged_columns`, while the rows are no more than `max_block_size`
-    while (!queue.empty())
+    /// Take rows in required order and put them into `merged_columns`, while the number of rows are no more than `max_block_size`
+    while (queue.isValid())
    {
-        TSortCursor current = queue.top();
+        auto current = queue.current();
        size_t current_block_granularity = current->rows;
-        queue.pop();

-        while (true)
+        /** And what if the block is totally less or equal than the rest for the current cursor?
+          * Or is there only one data source left in the queue? Then you can take the entire block on current cursor.
+          */
+        if (current->isFirst()
+            && (queue.size() == 1
+                || (queue.size() >= 2 && current.totallyLessOrEquals(queue.nextChild()))))
        {
-            /** And what if the block is totally less or equal than the rest for the current cursor?
-              * Or is there only one data source left in the queue? Then you can take the entire block on current cursor.
-              */
-            if (current->isFirst() && (queue.empty() || current.totallyLessOrEquals(queue.top())))
+//            std::cerr << "current block is totally less or equals\n";
+
+            /// If there are already data in the current block, we first return it. We'll get here again the next time we call the merge function.
+            if (merged_rows != 0)
            {
-    //            std::cerr << "current block is totally less or equals\n";
-
-                /// If there are already data in the current block, we first return it. We'll get here again the next time we call the merge function.
-                if (merged_rows != 0)
-                {
-                    //std::cerr << "merged rows is non-zero\n";
-                    queue.push(current);
-                    return;
-                }
-
-                /// Actually, current->order stores source number (i.e. cursors[current->order] == current)
-                size_t source_num = current->order;
-
-                if (source_num >= cursors.size())
-                    throw Exception("Logical error in MergingSortedBlockInputStream", ErrorCodes::LOGICAL_ERROR);
-
-                for (size_t i = 0; i < num_columns; ++i)
-                    merged_columns[i] = (*std::move(source_blocks[source_num]->getByPosition(i).column)).mutate();
-
-    //            std::cerr << "copied columns\n";
-
-                merged_rows = merged_columns.at(0)->size();
-
-                /// Limit output
-                if (limit && total_merged_rows + merged_rows > limit)
-                {
-                    merged_rows = limit - total_merged_rows;
-                    for (size_t i = 0; i < num_columns; ++i)
-                    {
-                        auto & column = merged_columns[i];
-                        column = (*column->cut(0, merged_rows)).mutate();
-                    }
-
-                    cancel(false);
-                    finished = true;
-                }
-
-                /// Write order of rows for other columns
-                /// this data will be used in grather stream
-                if (out_row_sources_buf)
-                {
-                    RowSourcePart row_source(source_num);
-                    for (size_t i = 0; i < merged_rows; ++i)
-                        out_row_sources_buf->write(row_source.data);
-                }
-
-                //std::cerr << "fetching next block\n";
-
-                total_merged_rows += merged_rows;
-                fetchNextBlock(current, queue);
+                //std::cerr << "merged rows is non-zero\n";
                return;
            }

-    //        std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n";
-    //        std::cerr << "Inserting row\n";
-            for (size_t i = 0; i < num_columns; ++i)
-                merged_columns[i]->insertFrom(*current->all_columns[i], current->pos);
+            /// Actually, current->order stores source number (i.e. cursors[current->order] == current)
+            size_t source_num = current->order;

+            if (source_num >= cursors.size())
+                throw Exception("Logical error in MergingSortedBlockInputStream", ErrorCodes::LOGICAL_ERROR);
+
+            for (size_t i = 0; i < num_columns; ++i)
+                merged_columns[i] = (*std::move(source_blocks[source_num]->getByPosition(i).column)).mutate();
+
+//            std::cerr << "copied columns\n";
+
+            merged_rows = merged_columns.at(0)->size();
+
+            /// Limit output
+            if (limit && total_merged_rows + merged_rows > limit)
+            {
+                merged_rows = limit - total_merged_rows;
+                for (size_t i = 0; i < num_columns; ++i)
+                {
+                    auto & column = merged_columns[i];
+                    column = (*column->cut(0, merged_rows)).mutate();
+                }
+
+                cancel(false);
+                finished = true;
+            }
+
+            /// Write order of rows for other columns
+            /// this data will be used in grather stream
            if (out_row_sources_buf)
            {
-                /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl)
-                RowSourcePart row_source(current->order);
-                out_row_sources_buf->write(row_source.data);
+                RowSourcePart row_source(source_num);
+                for (size_t i = 0; i < merged_rows; ++i)
+                    out_row_sources_buf->write(row_source.data);
            }

-            if (!current->isLast())
-            {
-    //            std::cerr << "moving to next row\n";
-                current->next();
+            //std::cerr << "fetching next block\n";

-                if (queue.empty() || !(current.greater(queue.top())))
-                {
-                    if (count_row_and_check_limit(current_block_granularity))
-                    {
-    //                    std::cerr << "pushing back to queue\n";
-                        queue.push(current);
-                        return;
-                    }
+            total_merged_rows += merged_rows;
+            fetchNextBlock(current, queue);
+            return;
+        }

-                    /// Do not put the cursor back in the queue, but continue to work with the current cursor.
-    //                std::cerr << "current is still on top, using current row\n";
-                    continue;
-                }
-                else
-                {
-    //                std::cerr << "next row is not least, pushing back to queue\n";
-                    queue.push(current);
-                }
-            }
-            else
-            {
-                /// We get the next block from the corresponding source, if there is one.
-    //            std::cerr << "It was last row, fetching next block\n";
-                fetchNextBlock(current, queue);
-            }
+//        std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n";
+//        std::cerr << "Inserting row\n";
+        for (size_t i = 0; i < num_columns; ++i)
+            merged_columns[i]->insertFrom(*current->all_columns[i], current->pos);

-            break;
+        if (out_row_sources_buf)
+        {
+            /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl)
+            RowSourcePart row_source(current->order);
+            out_row_sources_buf->write(row_source.data);
+        }
+
+        if (!current->isLast())
+        {
+//            std::cerr << "moving to next row\n";
+            queue.next();
+        }
+        else
+        {
+            /// We get the next block from the corresponding source, if there is one.
+//            std::cerr << "It was last row, fetching next block\n";
+            fetchNextBlock(current, queue);
        }

        if (count_row_and_check_limit(current_block_granularity))
            return;
    }

+    /// We have read all data. Ask childs to cancel providing more data.
    cancel(false);
    finished = true;
 }
--- a/dbms/src/DataStreams/MergingSortedBlockInputStream.h
+++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.h
@ -1,7 +1,5 @@
 #pragma once

-#include <queue>
-
 #include <boost/smart_ptr/intrusive_ptr.hpp>

 #include <common/logger_useful.h>
@ -87,7 +85,7 @@ protected:

    /// Gets the next block from the source corresponding to the `current`.
    template <typename TSortCursor>
-    void fetchNextBlock(const TSortCursor & current, std::priority_queue<TSortCursor> & queue);
+    void fetchNextBlock(const TSortCursor & current, SortingHeap<TSortCursor> & queue);


    Block header;
@ -109,14 +107,10 @@ protected:
    size_t num_columns = 0;
    std::vector<SharedBlockPtr> source_blocks;

-    using CursorImpls = std::vector<SortCursorImpl>;
-    CursorImpls cursors;
+    SortCursorImpls cursors;

-    using Queue = std::priority_queue<SortCursor>;
-    Queue queue_without_collation;
-
-    using QueueWithCollation = std::priority_queue<SortCursorWithCollation>;
-    QueueWithCollation queue_with_collation;
+    SortingHeap<SortCursor> queue_without_collation;
+    SortingHeap<SortCursorWithCollation> queue_with_collation;

    /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step)
    /// If it is not nullptr then it should be populated during execution
@ -177,13 +171,10 @@ protected:
 private:

    /** We support two different cursors - with Collation and without.
-     * Templates are used instead of polymorphic SortCursor and calls to virtual functions.
-     */
-    template <typename TSortCursor>
-    void initQueue(std::priority_queue<TSortCursor> & queue);
-
-    template <typename TSortCursor>
-    void merge(MutableColumns & merged_columns, std::priority_queue<TSortCursor> & queue);
+      * Templates are used instead of polymorphic SortCursor and calls to virtual functions.
+      */
+    template <typename TSortingHeap>
+    void merge(MutableColumns & merged_columns, TSortingHeap & queue);

    Logger * log = &Logger::get("MergingSortedBlockInputStream");

--- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp
@ -48,13 +48,14 @@ Block ReplacingSortedBlockInputStream::readImpl()
 }


-void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue)
+void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue)
 {
    MergeStopCondition stop_condition(average_block_sizes, max_block_size);
+
    /// Take the rows in needed order and put them into `merged_columns` until rows no more than `max_block_size`
-    while (!queue.empty())
+    while (queue.isValid())
    {
-        SortCursor current = queue.top();
+        SortCursor current = queue.current();
        size_t current_block_granularity = current->rows;

        if (current_key.empty())
@ -68,8 +69,6 @@ void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, std
        if (key_differs && stop_condition.checkStop())
            return;

-        queue.pop();
-
        if (key_differs)
        {
            /// Write the data for the previous primary key.
@ -98,8 +97,7 @@ void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, std

        if (!current->isLast())
        {
-            current->next();
-            queue.push(current);
+            queue.next();
        }
        else
        {
--- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h
+++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h
@ -52,7 +52,7 @@ private:
    /// Sources of rows with the current primary key.
    PODArray<RowSourcePart> current_row_sources;

-    void merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue);
+    void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);

    /// Output into result the rows for current primary key.
    void insertRow(MutableColumns & merged_columns);
--- a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp
@ -314,14 +314,14 @@ Block SummingSortedBlockInputStream::readImpl()
 }


-void SummingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue)
+void SummingSortedBlockInputStream::merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue)
 {
    merged_rows = 0;

    /// Take the rows in needed order and put them in `merged_columns` until rows no more than `max_block_size`
-    while (!queue.empty())
+    while (queue.isValid())
    {
-        SortCursor current = queue.top();
+        SortCursor current = queue.current();

        setPrimaryKeyRef(next_key, current);

@ -383,12 +383,9 @@ void SummingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::
                    current_row_is_zero = false;
        }

-        queue.pop();
-
        if (!current->isLast())
        {
-            current->next();
-            queue.push(current);
+            queue.next();
        }
        else
        {
--- a/dbms/src/DataStreams/SummingSortedBlockInputStream.h
+++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.h
@ -1,5 +1,7 @@
 #pragma once

+#include <queue>
+
 #include <Core/Row.h>
 #include <Core/ColumnNumbers.h>
 #include <Common/AlignedBuffer.h>
@ -140,7 +142,7 @@ private:
    /** We support two different cursors - with Collation and without.
     *  Templates are used instead of polymorphic SortCursor and calls to virtual functions.
     */
-    void merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue);
+    void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);

    /// Insert the summed row for the current group into the result and updates some of per-block flags if the row is not "zero".
    void insertCurrentRowIfNeeded(MutableColumns & merged_columns);
--- a/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.cpp
@ -82,21 +82,18 @@ Block VersionedCollapsingSortedBlockInputStream::readImpl()
 }


-void VersionedCollapsingSortedBlockInputStream::merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue)
+void VersionedCollapsingSortedBlockInputStream::merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue)
 {
    MergeStopCondition stop_condition(average_block_sizes, max_block_size);

    auto update_queue = [this, & queue](SortCursor & cursor)
    {
-        queue.pop();
-
        if (out_row_sources_buf)
            current_row_sources.emplace(cursor->order, true);

        if (!cursor->isLast())
        {
-            cursor->next();
-            queue.push(cursor);
+            queue.next();
        }
        else
        {
@ -106,9 +103,9 @@ void VersionedCollapsingSortedBlockInputStream::merge(MutableColumns & merged_co
    };

    /// Take rows in correct order and put them into `merged_columns` until the rows no more than `max_block_size`
-    while (!queue.empty())
+    while (queue.isValid())
    {
-        SortCursor current = queue.top();
+        SortCursor current = queue.current();
        size_t current_block_granularity = current->rows;

        SharedBlockRowRef next_key;
--- a/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h
+++ b/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h
@ -5,7 +5,7 @@
 #include <DataStreams/MergingSortedBlockInputStream.h>
 #include <DataStreams/ColumnGathererStream.h>

-#include <deque>
+#include <queue>


 namespace DB
@ -204,7 +204,7 @@ private:
    /// Sources of rows for VERTICAL merge algorithm. Size equals to (size + number of gaps) in current_keys.
    std::queue<RowSourcePart> current_row_sources;

-    void merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue);
+    void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);

    /// Output to result row for the current primary key.
    void insertRow(size_t skip_rows, const SharedBlockRowRef & row, MutableColumns & merged_columns);
--- a/dbms/src/DataStreams/tests/union_stream2.cpp
+++ b/dbms/src/DataStreams/tests/union_stream2.cpp
@ -57,6 +57,6 @@ catch (const Exception & e)
    std::cerr << e.what() << ", " << e.displayText() << std::endl
        << std::endl
        << "Stack trace:" << std::endl
-        << e.getStackTrace().toString();
+        << e.getStackTraceString();
    return 1;
 }
--- a/dbms/src/Databases/DatabaseLazy.cpp
+++ b/dbms/src/Databases/DatabaseLazy.cpp
@ -23,7 +23,6 @@ namespace ErrorCodes
    extern const int TABLE_ALREADY_EXISTS;
    extern const int UNKNOWN_TABLE;
    extern const int UNSUPPORTED_METHOD;
-    extern const int CANNOT_CREATE_TABLE_FROM_METADATA;
    extern const int LOGICAL_ERROR;
 }

@ -255,10 +254,10 @@ StoragePtr DatabaseLazy::loadTable(const Context & context, const String & table
            return it->second.table = table;
        }
    }
-    catch (const Exception & e)
+    catch (Exception & e)
    {
-        throw Exception("Cannot create table from metadata file " + table_metadata_path + ". Error: " + DB::getCurrentExceptionMessage(true),
-                e, DB::ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA);
+        e.addMessage("Cannot create table from metadata file " + table_metadata_path);
+        throw;
    }
 }

--- a/dbms/src/Databases/DatabaseOrdinary.cpp
+++ b/dbms/src/Databases/DatabaseOrdinary.cpp
@ -36,7 +36,6 @@ namespace DB

 namespace ErrorCodes
 {
-    extern const int CANNOT_CREATE_TABLE_FROM_METADATA;
    extern const int CANNOT_CREATE_DICTIONARY_FROM_METADATA;
    extern const int EMPTY_LIST_OF_COLUMNS_PASSED;
    extern const int CANNOT_PARSE_TEXT;
@ -66,13 +65,10 @@ namespace
                = createTableFromAST(query, database_name, database.getTableDataPath(query), context, has_force_restore_data_flag);
            database.attachTable(table_name, table);
        }
-        catch (const Exception & e)
+        catch (Exception & e)
        {
-            throw Exception(
-                "Cannot attach table '" + query.table + "' from query " + serializeAST(query)
-                    + ". Error: " + DB::getCurrentExceptionMessage(true),
-                e,
-                DB::ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA);
+            e.addMessage("Cannot attach table '" + backQuote(query.table) + "' from query " + serializeAST(query));
+            throw;
        }
    }

@ -87,13 +83,10 @@ namespace
        {
            database.attachDictionary(query.table, context);
        }
-        catch (const Exception & e)
+        catch (Exception & e)
        {
-            throw Exception(
-                "Cannot create dictionary '" + query.table + "' from query " + serializeAST(query)
-                    + ". Error: " + DB::getCurrentExceptionMessage(true),
-                e,
-                DB::ErrorCodes::CANNOT_CREATE_DICTIONARY_FROM_METADATA);
+            e.addMessage("Cannot attach table '" + backQuote(query.table) + "' from query " + serializeAST(query));
+            throw;
        }
    }

@ -142,10 +135,10 @@ void DatabaseOrdinary::loadStoredObjects(
                total_dictionaries += create_query->is_dictionary;
            }
        }
-        catch (const Exception & e)
+        catch (Exception & e)
        {
-            throw Exception(
-                "Cannot parse definition from metadata file " + full_path + ". Error: " + DB::getCurrentExceptionMessage(true), e, ErrorCodes::CANNOT_PARSE_TEXT);
+            e.addMessage("Cannot parse definition from metadata file " + full_path);
+            throw;
        }

    });
--- a/dbms/src/Functions/randomPrintableASCII.cpp
+++ b/dbms/src/Functions/randomPrintableASCII.cpp
@ -74,6 +74,7 @@ public:
            data_to.resize(next_offset);
            offsets_to[row_num] = next_offset;

+            auto * data_to_ptr = data_to.data();    /// avoid assert on array indexing after end
            for (size_t pos = offset, end = offset + length; pos < end; pos += 4)    /// We have padding in column buffers that we can overwrite.
            {
                UInt64 rand = thread_local_rng();
@ -86,10 +87,10 @@ public:
                /// Printable characters are from range [32; 126].
                /// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/

-                data_to[pos + 0] = 32 + ((rand1 * 95) >> 16);
-                data_to[pos + 1] = 32 + ((rand2 * 95) >> 16);
-                data_to[pos + 2] = 32 + ((rand3 * 95) >> 16);
-                data_to[pos + 3] = 32 + ((rand4 * 95) >> 16);
+                data_to_ptr[pos + 0] = 32 + ((rand1 * 95) >> 16);
+                data_to_ptr[pos + 1] = 32 + ((rand2 * 95) >> 16);
+                data_to_ptr[pos + 2] = 32 + ((rand3 * 95) >> 16);
+                data_to_ptr[pos + 3] = 32 + ((rand4 * 95) >> 16);

                /// NOTE gcc failed to vectorize this code (aliasing of char?)
                /// TODO Implement SIMD optimizations from Danila Kutenin.
--- a/dbms/src/Functions/trap.cpp
+++ b/dbms/src/Functions/trap.cpp
@ -124,6 +124,10 @@ public:
                t1.join();
                t2.join();
            }
+            else if (mode == "throw exception")
+            {
+                std::vector<int>().at(0);
+            }
            else if (mode == "access context")
            {
                (void)context.getCurrentQueryId();
--- a/dbms/src/IO/BitHelpers.h
+++ b/dbms/src/IO/BitHelpers.h
@ -1,9 +1,10 @@
 #pragma once

-#include <IO/ReadBuffer.h>
-#include <IO/WriteBuffer.h>
 #include <Core/Types.h>
 #include <Common/BitHelpers.h>
+#include <Common/Exception.h>
+
+#include <string.h>

 #if defined(__OpenBSD__) || defined(__FreeBSD__)
 #   include <sys/endian.h>
@ -14,9 +15,16 @@
 #   define be64toh(x) OSSwapBigToHostInt64(x)
 #endif

+
 namespace DB
 {

+namespace ErrorCodes
+{
+extern const int CANNOT_WRITE_AFTER_END_OF_BUFFER;
+extern const int ATTEMPT_TO_READ_AFTER_EOF;
+}
+
 /** Reads data from underlying ReadBuffer bit by bit, max 64 bits at once.
 *
 * reads MSB bits first, imagine that you have a data:
@ -34,15 +42,20 @@ namespace DB

 class BitReader
 {
-    ReadBuffer & buf;
+    using BufferType = unsigned __int128;

-    UInt64 bits_buffer;
+    const char * source_begin;
+    const char * source_current;
+    const char * source_end;
+
+    BufferType bits_buffer;
    UInt8 bits_count;
-    static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;

 public:
-    BitReader(ReadBuffer & buf_)
-        : buf(buf_),
+    BitReader(const char * begin, size_t size)
+        : source_begin(begin),
+          source_current(begin),
+          source_end(begin + size),
          bits_buffer(0),
          bits_count(0)
    {}
@ -50,44 +63,21 @@ public:
    ~BitReader()
    {}

-    inline UInt64 readBits(UInt8 bits)
+    // reads bits_to_read high-bits from bits_buffer
+    inline UInt64 readBits(UInt8 bits_to_read)
    {
-        UInt64 result = 0;
-        bits = std::min(static_cast<UInt8>(sizeof(result) * 8), bits);
+        if (bits_to_read > bits_count)
+            fillBitBuffer();

-        while (bits != 0)
-        {
-            if (bits_count == 0)
-            {
-                fillBuffer();
-                if (bits_count == 0)
-                {
-                    // EOF.
-                    break;
-                }
-            }
-
-            const auto to_read = std::min(bits, bits_count);
-
-            const UInt64 v = bits_buffer >> (bits_count - to_read);
-            const UInt64 mask = maskLowBits<UInt64>(to_read);
-            const UInt64 value = v & mask;
-            result |= value;
-
-            // unset bits that were read
-            bits_buffer &= ~(mask << (bits_count - to_read));
-            bits_count -= to_read;
-            bits -= to_read;
-
-            result <<= std::min(bits, BIT_BUFFER_SIZE);
-        }
-
-        return result;
+        return getBitsFromBitBuffer<CONSUME>(bits_to_read);
    }

-    inline UInt64 peekBits(UInt8 /*bits*/)
+    inline UInt8 peekByte()
    {
-        return 0;
+        if (bits_count < 8)
+            fillBitBuffer();
+
+        return getBitsFromBitBuffer<PEEK>(8);
    }

    inline UInt8 readBit()
@ -95,34 +85,95 @@ public:
        return static_cast<UInt8>(readBits(1));
    }

+    // skip bits from bits_buffer
+    inline void skipBufferedBits(UInt8 bits)
+    {
+        bits_buffer <<= bits;
+        bits_count -= bits;
+    }
+
+
    inline bool eof() const
    {
-        return bits_count == 0 && buf.eof();
+        return bits_count == 0 && source_current >= source_end;
+    }
+
+    // number of bits that was already read by clients with readBits()
+    inline UInt64 count() const
+    {
+        return (source_current - source_begin) * 8 - bits_count;
+    }
+
+    inline UInt64 remaining() const
+    {
+        return (source_end - source_current) * 8 + bits_count;
    }

 private:
-    void fillBuffer()
+    enum GetBitsMode {CONSUME, PEEK};
+    // read data from internal buffer, if it has not enough bits, result is undefined.
+    template <GetBitsMode mode>
+    inline UInt64 getBitsFromBitBuffer(UInt8 bits_to_read)
    {
-        auto read = buf.read(reinterpret_cast<char *>(&bits_buffer), BIT_BUFFER_SIZE / 8);
-        bits_buffer = be64toh(bits_buffer);
-        bits_buffer >>= BIT_BUFFER_SIZE - read * 8;
+        // push down the high-bits
+        const UInt64 result = static_cast<UInt64>(bits_buffer >> (sizeof(bits_buffer) * 8 - bits_to_read));

-        bits_count = static_cast<UInt8>(read) * 8;
+        if constexpr (mode == CONSUME)
+        {
+            // 'erase' high-bits that were have read
+            skipBufferedBits(bits_to_read);
+        }
+
+        return result;
+    }
+
+
+    // Fills internal bits_buffer with data from source, reads at most 64 bits
+    size_t fillBitBuffer()
+    {
+        const size_t available = source_end - source_current;
+        const auto bytes_to_read = std::min<size_t>(64 / 8, available);
+        if (available == 0)
+        {
+            if (bytes_to_read == 0)
+                return 0;
+
+            throw Exception("Buffer is empty, but requested to read "
+                            + std::to_string(bytes_to_read) + " more bytes.",
+                            ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
+        }
+
+        UInt64 tmp_buffer = 0;
+        memcpy(&tmp_buffer, source_current, bytes_to_read);
+        source_current += bytes_to_read;
+
+        tmp_buffer = be64toh(tmp_buffer);
+
+        bits_buffer |= BufferType(tmp_buffer) << ((sizeof(BufferType) - sizeof(tmp_buffer)) * 8 - bits_count);
+        bits_count += static_cast<UInt8>(bytes_to_read) * 8;
+
+        return bytes_to_read;
    }
 };

 class BitWriter
 {
-    WriteBuffer & buf;
+    using BufferType = unsigned __int128;

-    UInt64 bits_buffer;
+    char * dest_begin;
+    char * dest_current;
+    char * dest_end;
+
+    BufferType bits_buffer;
    UInt8 bits_count;

    static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8;

 public:
-    BitWriter(WriteBuffer & buf_)
-        : buf(buf_),
+    BitWriter(char * begin, size_t size)
+        : dest_begin(begin),
+          dest_current(begin),
+          dest_end(begin + size),
          bits_buffer(0),
          bits_count(0)
    {}
@ -132,54 +183,59 @@ public:
        flush();
    }

-    inline void writeBits(UInt8 bits, UInt64 value)
+    // write `bits_to_write` low-bits of `value` to the buffer
+    inline void writeBits(UInt8 bits_to_write, UInt64 value)
    {
-        bits = std::min(static_cast<UInt8>(sizeof(value) * 8), bits);
-
-        while (bits > 0)
+        UInt32 capacity = BIT_BUFFER_SIZE - bits_count;
+        if (capacity < bits_to_write)
        {
-            auto v = value;
-            auto to_write = bits;
-
-            const UInt8 capacity = BIT_BUFFER_SIZE - bits_count;
-            if (capacity < bits)
-            {
-                v >>= bits - capacity;
-                to_write = capacity;
-            }
-
-            const UInt64 mask = maskLowBits<UInt64>(to_write);
-            v &= mask;
-
-            bits_buffer <<= to_write;
-            bits_buffer |= v;
-            bits_count += to_write;
-
-            if (bits_count < BIT_BUFFER_SIZE)
-                break;
-
            doFlush();
-            bits -= to_write;
+            capacity = BIT_BUFFER_SIZE - bits_count;
        }
+
+//      write low bits of value as high bits of bits_buffer
+        const UInt64 mask = maskLowBits<UInt64>(bits_to_write);
+        BufferType v = value & mask;
+        v <<= capacity - bits_to_write;
+
+        bits_buffer |= v;
+        bits_count += bits_to_write;
    }

+    // flush contents of bits_buffer to the dest_current, partial bytes are completed with zeroes.
    inline void flush()
    {
-        if (bits_count != 0)
-        {
-            bits_buffer <<= (BIT_BUFFER_SIZE - bits_count);
+        bits_count = (bits_count + 8 - 1) & ~(8 - 1); // align UP to 8-bytes, so doFlush will write ALL data from bits_buffer
+        while (bits_count != 0)
            doFlush();
-        }
+    }
+
+    inline UInt64 count() const
+    {
+        return (dest_current - dest_begin) * 8 + bits_count;
    }

 private:
    void doFlush()
    {
-        bits_buffer = htobe64(bits_buffer);
-        buf.write(reinterpret_cast<const char *>(&bits_buffer), (bits_count + 7) / 8);
+        // write whole bytes to the dest_current, leaving partial bits in bits_buffer
+        const size_t available = dest_end - dest_current;
+        const size_t to_write = std::min<size_t>(sizeof(UInt64), bits_count / 8); // align to 8-bit boundary

-        bits_count = 0;
-        bits_buffer = 0;
+        if (available < to_write)
+        {
+            throw Exception("Can not write past end of buffer. Space available "
+                            + std::to_string(available) + " bytes, required to write: "
+                            + std::to_string(to_write) + ".",
+                            ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER);
+        }
+
+        const auto tmp_buffer = htobe64(static_cast<UInt64>(bits_buffer >> (sizeof(bits_buffer) - sizeof(UInt64)) * 8));
+        memcpy(dest_current, &tmp_buffer, to_write);
+        dest_current += to_write;
+
+        bits_buffer <<= to_write * 8;
+        bits_count -= to_write * 8;
    }
 };

--- a/dbms/src/IO/BrotliWriteBuffer.cpp
+++ b/dbms/src/IO/BrotliWriteBuffer.cpp
@ -30,14 +30,14 @@ public:
    BrotliEncoderState * state;
 };

-BrotliWriteBuffer::BrotliWriteBuffer(WriteBuffer & out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
-        : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
-        , brotli(std::make_unique<BrotliStateWrapper>())
-        , in_available(0)
-        , in_data(nullptr)
-        , out_capacity(0)
-        , out_data(nullptr)
-        , out(out_)
+BrotliWriteBuffer::BrotliWriteBuffer(std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
+    : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
+    , brotli(std::make_unique<BrotliStateWrapper>())
+    , in_available(0)
+    , in_data(nullptr)
+    , out_capacity(0)
+    , out_data(nullptr)
+    , out(std::move(out_))
 {
    BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_QUALITY, static_cast<uint32_t>(compression_level));
    // Set LZ77 window size. According to brotli sources default value is 24 (c/tools/brotli.c:81)
@ -68,9 +68,9 @@ void BrotliWriteBuffer::nextImpl()

    do
    {
-        out.nextIfAtEnd();
-        out_data = reinterpret_cast<unsigned char *>(out.position());
-        out_capacity = out.buffer().end() - out.position();
+        out->nextIfAtEnd();
+        out_data = reinterpret_cast<unsigned char *>(out->position());
+        out_capacity = out->buffer().end() - out->position();

        int result = BrotliEncoderCompressStream(
                brotli->state,
@ -81,7 +81,7 @@ void BrotliWriteBuffer::nextImpl()
                &out_data,
                nullptr);

-        out.position() = out.buffer().end() - out_capacity;
+        out->position() = out->buffer().end() - out_capacity;

        if (result == 0)
        {
@ -100,9 +100,9 @@ void BrotliWriteBuffer::finish()

    while (true)
    {
-        out.nextIfAtEnd();
-        out_data = reinterpret_cast<unsigned char *>(out.position());
-        out_capacity = out.buffer().end() - out.position();
+        out->nextIfAtEnd();
+        out_data = reinterpret_cast<unsigned char *>(out->position());
+        out_capacity = out->buffer().end() - out->position();

        int result = BrotliEncoderCompressStream(
                brotli->state,
@ -113,7 +113,7 @@ void BrotliWriteBuffer::finish()
                &out_data,
                nullptr);

-        out.position() = out.buffer().end() - out_capacity;
+        out->position() = out->buffer().end() - out_capacity;

        if (BrotliEncoderIsFinished(brotli->state))
        {
--- a/dbms/src/IO/BrotliWriteBuffer.h
+++ b/dbms/src/IO/BrotliWriteBuffer.h
@ -10,11 +10,11 @@ class BrotliWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
 {
 public:
    BrotliWriteBuffer(
-            WriteBuffer & out_,
-            int compression_level,
-            size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
-            char * existing_memory = nullptr,
-            size_t alignment = 0);
+        std::unique_ptr<WriteBuffer> out_,
+        int compression_level,
+        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+        char * existing_memory = nullptr,
+        size_t alignment = 0);

    ~BrotliWriteBuffer() override;

@ -30,9 +30,9 @@ private:
    const uint8_t * in_data;

    size_t out_capacity;
-    uint8_t  * out_data;
+    uint8_t * out_data;

-    WriteBuffer & out;
+    std::unique_ptr<WriteBuffer> out;

    bool finished = false;
 };
--- a/dbms/src/IO/CompressionMethod.cpp
+++ b/dbms/src/IO/CompressionMethod.cpp
@ -0,0 +1,104 @@
+#include <IO/CompressionMethod.h>
+
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ZlibInflatingReadBuffer.h>
+#include <IO/ZlibDeflatingWriteBuffer.h>
+#include <IO/BrotliReadBuffer.h>
+#include <IO/BrotliWriteBuffer.h>
+
+#include <Common/config.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+
+std::string toContentEncodingName(CompressionMethod method)
+{
+    switch (method)
+    {
+        case CompressionMethod::Gzip:   return "gzip";
+        case CompressionMethod::Zlib:   return "deflate";
+        case CompressionMethod::Brotli: return "br";
+        case CompressionMethod::None:   return "";
+    }
+    __builtin_unreachable();
+}
+
+
+CompressionMethod chooseCompressionMethod(const std::string & path, const std::string & hint)
+{
+    std::string file_extension;
+    if (hint.empty() || hint == "auto")
+    {
+        auto pos = path.find_last_of('.');
+        if (pos != std::string::npos)
+            file_extension = path.substr(pos + 1, std::string::npos);
+    }
+
+    const std::string * method_str = file_extension.empty() ? &hint : &file_extension;
+
+    if (*method_str == "gzip" || *method_str == "gz")
+        return CompressionMethod::Gzip;
+    if (*method_str == "deflate")
+        return CompressionMethod::Zlib;
+    if (*method_str == "brotli" || *method_str == "br")
+        return CompressionMethod::Brotli;
+    if (hint.empty() || hint == "auto" || hint == "none")
+        return CompressionMethod::None;
+
+    throw Exception("Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'br' are supported as compression methods",
+        ErrorCodes::NOT_IMPLEMENTED);
+}
+
+
+std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
+    std::unique_ptr<ReadBuffer> nested,
+    CompressionMethod method,
+    size_t buf_size,
+    char * existing_memory,
+    size_t alignment)
+{
+    if (method == CompressionMethod::Gzip || method == CompressionMethod::Zlib)
+        return std::make_unique<ZlibInflatingReadBuffer>(std::move(nested), method, buf_size, existing_memory, alignment);
+#if USE_BROTLI
+    if (method == CompressionMethod::Brotli)
+        return std::make_unique<BrotliReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
+#endif
+
+    if (method == CompressionMethod::None)
+        return nested;
+
+    throw Exception("Unsupported compression method", ErrorCodes::NOT_IMPLEMENTED);
+}
+
+
+std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
+    std::unique_ptr<WriteBuffer> nested,
+    CompressionMethod method,
+    int level,
+    size_t buf_size,
+    char * existing_memory,
+    size_t alignment)
+{
+    if (method == DB::CompressionMethod::Gzip || method == CompressionMethod::Zlib)
+        return std::make_unique<ZlibDeflatingWriteBuffer>(std::move(nested), method, level, buf_size, existing_memory, alignment);
+
+#if USE_BROTLI
+    if (method == DB::CompressionMethod::Brotli)
+        return std::make_unique<BrotliWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
+#endif
+
+    if (method == CompressionMethod::None)
+        return nested;
+
+    throw Exception("Unsupported compression method", ErrorCodes::NOT_IMPLEMENTED);
+}
+
+}
--- a/dbms/src/IO/CompressionMethod.h
+++ b/dbms/src/IO/CompressionMethod.h
@ -1,18 +1,57 @@
 #pragma once

+#include <string>
+#include <memory>
+
+#include <Core/Defines.h>
+
+
 namespace DB
 {

+class ReadBuffer;
+class WriteBuffer;
+
+/** These are "generally recognizable" compression methods for data import/export.
+  * Do not mess with more efficient compression methods used by ClickHouse internally
+  *  (they use non-standard framing, indexes, checksums...)
+  */
+
 enum class CompressionMethod
 {
+    None,
    /// DEFLATE compression with gzip header and CRC32 checksum.
    /// This option corresponds to files produced by gzip(1) or HTTP Content-Encoding: gzip.
    Gzip,
    /// DEFLATE compression with zlib header and Adler32 checksum.
    /// This option corresponds to HTTP Content-Encoding: deflate.
    Zlib,
-    Brotli,
-    None
+    Brotli
 };

+/// How the compression method is named in HTTP.
+std::string toContentEncodingName(CompressionMethod method);
+
+/** Choose compression method from path and hint.
+  * if hint is "auto" or empty string, then path is analyzed,
+  *  otherwise path parameter is ignored and hint is used as compression method name.
+  * path is arbitrary string that will be analyzed for file extension (gz, br...) that determines compression.
+  */
+CompressionMethod chooseCompressionMethod(const std::string & path, const std::string & hint);
+
+std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
+    std::unique_ptr<ReadBuffer> nested,
+    CompressionMethod method,
+    size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+    char * existing_memory = nullptr,
+    size_t alignment = 0);
+
+std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
+    std::unique_ptr<WriteBuffer> nested,
+    CompressionMethod method,
+    int level,
+    size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+    char * existing_memory = nullptr,
+    size_t alignment = 0);
+
 }
--- a/dbms/src/IO/MMapReadBufferFromFile.cpp
+++ b/dbms/src/IO/MMapReadBufferFromFile.cpp
@ -22,7 +22,7 @@ namespace ErrorCodes
 }


-void MMapReadBufferFromFile::open(const std::string & file_name)
+void MMapReadBufferFromFile::open()
 {
    ProfileEvents::increment(ProfileEvents::FileOpen);

@ -34,16 +34,24 @@ void MMapReadBufferFromFile::open(const std::string & file_name)
 }


-MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name, size_t offset, size_t length_)
+std::string MMapReadBufferFromFile::getFileName() const
 {
-    open(file_name);
+    return file_name;
+}
+
+
+MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_)
+    : file_name(file_name_)
+{
+    open();
    init(fd, offset, length_);
 }


-MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name, size_t offset)
+MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name_, size_t offset)
+    : file_name(file_name_)
 {
-    open(file_name);
+    open();
    init(fd, offset);
 }

--- a/dbms/src/IO/MMapReadBufferFromFile.h
+++ b/dbms/src/IO/MMapReadBufferFromFile.h
@ -16,21 +16,24 @@ namespace DB
 class MMapReadBufferFromFile : public MMapReadBufferFromFileDescriptor
 {
 public:
-    MMapReadBufferFromFile(const std::string & file_name, size_t offset, size_t length_);
+    MMapReadBufferFromFile(const std::string & file_name_, size_t offset, size_t length_);

    /// Map till end of file.
-    MMapReadBufferFromFile(const std::string & file_name, size_t offset);
+    MMapReadBufferFromFile(const std::string & file_name_, size_t offset);

    ~MMapReadBufferFromFile() override;

    void close();

+    std::string getFileName() const override;
+
 private:
    int fd = -1;
+    std::string file_name;

    CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};

-    void open(const std::string & file_name);
+    void open();
 };

 }
--- a/dbms/src/IO/MMapReadBufferFromFileDescriptor.cpp
+++ b/dbms/src/IO/MMapReadBufferFromFileDescriptor.cpp
@ -5,6 +5,8 @@

 #include <Common/ProfileEvents.h>
 #include <Common/formatReadable.h>
+#include <Common/Exception.h>
+#include <IO/WriteHelpers.h>
 #include <IO/MMapReadBufferFromFileDescriptor.h>


@ -18,6 +20,8 @@ namespace ErrorCodes
    extern const int CANNOT_STAT;
    extern const int BAD_ARGUMENTS;
    extern const int LOGICAL_ERROR;
+    extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int CANNOT_SEEK_THROUGH_FILE;
 }


@ -34,6 +38,7 @@ void MMapReadBufferFromFileDescriptor::init(int fd_, size_t offset, size_t lengt
                ErrorCodes::CANNOT_ALLOCATE_MEMORY);

        BufferBase::set(static_cast<char *>(buf), length, 0);
+        ReadBuffer::padded = (length % 4096) > 0 && (length % 4096) <= (4096 - 15); /// TODO determine page size
    }
 }

@ -58,14 +63,12 @@ void MMapReadBufferFromFileDescriptor::init(int fd_, size_t offset)


 MMapReadBufferFromFileDescriptor::MMapReadBufferFromFileDescriptor(int fd_, size_t offset_, size_t length_)
-    : MMapReadBufferFromFileDescriptor()
 {
    init(fd_, offset_, length_);
 }


 MMapReadBufferFromFileDescriptor::MMapReadBufferFromFileDescriptor(int fd_, size_t offset_)
-    : MMapReadBufferFromFileDescriptor()
 {
    init(fd_, offset_);
 }
@ -87,4 +90,39 @@ void MMapReadBufferFromFileDescriptor::finish()
    length = 0;
 }

+std::string MMapReadBufferFromFileDescriptor::getFileName() const
+{
+    return "(fd = " + toString(fd) + ")";
+}
+
+int MMapReadBufferFromFileDescriptor::getFD() const
+{
+    return fd;
+}
+
+off_t MMapReadBufferFromFileDescriptor::getPositionInFile()
+{
+    return count();
+}
+
+off_t MMapReadBufferFromFileDescriptor::doSeek(off_t offset, int whence)
+{
+    off_t new_pos;
+    if (whence == SEEK_SET)
+        new_pos = offset;
+    else if (whence == SEEK_CUR)
+        new_pos = count() + offset;
+    else
+        throw Exception("MMapReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+
+    working_buffer = internal_buffer;
+    if (new_pos < 0 || new_pos > off_t(working_buffer.size()))
+        throw Exception("Cannot seek through file " + getFileName()
+            + " because seek position (" + toString(new_pos) + ") is out of bounds [0, " + toString(working_buffer.size()) + "]",
+            ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+
+    position() = working_buffer.begin() + new_pos;
+    return new_pos;
+}
+
 }
--- a/dbms/src/IO/MMapReadBufferFromFileDescriptor.h
+++ b/dbms/src/IO/MMapReadBufferFromFileDescriptor.h
@ -1,6 +1,6 @@
 #pragma once

-#include <IO/ReadBuffer.h>
+#include <IO/ReadBufferFromFileBase.h>


 namespace DB
@ -11,14 +11,16 @@ namespace DB
  * Also you cannot control whether and how long actual IO take place,
  *  so this method is not manageable and not recommended for anything except benchmarks.
  */
-class MMapReadBufferFromFileDescriptor : public ReadBuffer
+class MMapReadBufferFromFileDescriptor : public ReadBufferFromFileBase
 {
 protected:
-    MMapReadBufferFromFileDescriptor() : ReadBuffer(nullptr, 0) {}
+    MMapReadBufferFromFileDescriptor() {}

    void init(int fd_, size_t offset, size_t length_);
    void init(int fd_, size_t offset);

+    off_t doSeek(off_t off, int whence) override;
+
 public:
    MMapReadBufferFromFileDescriptor(int fd_, size_t offset_, size_t length_);

@ -30,6 +32,10 @@ public:
    /// unmap memory before call to destructor
    void finish();

+    off_t getPositionInFile() override;
+    std::string getFileName() const override;
+    int getFD() const override;
+
 private:
    size_t length = 0;
    int fd = -1;
--- a/dbms/src/IO/ReadBufferFromFileBase.cpp
+++ b/dbms/src/IO/ReadBufferFromFileBase.cpp
@ -3,6 +3,11 @@
 namespace DB
 {

+ReadBufferFromFileBase::ReadBufferFromFileBase()
+    : BufferWithOwnMemory<ReadBuffer>(0)
+{
+}
+
 ReadBufferFromFileBase::ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment)
    : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
 {
--- a/dbms/src/IO/ReadBufferFromFileBase.h
+++ b/dbms/src/IO/ReadBufferFromFileBase.h
@ -14,6 +14,7 @@ namespace DB
 class ReadBufferFromFileBase : public BufferWithOwnMemory<ReadBuffer>
 {
 public:
+    ReadBufferFromFileBase();
    ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment);
    ReadBufferFromFileBase(ReadBufferFromFileBase &&) = default;
    ~ReadBufferFromFileBase() override;
--- a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp
@ -101,10 +101,12 @@ bool ReadBufferFromFileDescriptor::nextImpl()
 /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen.
 off_t ReadBufferFromFileDescriptor::doSeek(off_t offset, int whence)
 {
-    off_t new_pos = offset;
-    if (whence == SEEK_CUR)
+    off_t new_pos;
+    if (whence == SEEK_SET)
+        new_pos = offset;
+    else if (whence == SEEK_CUR)
        new_pos = pos_in_file - (working_buffer.end() - pos) + offset;
-    else if (whence != SEEK_SET)
+    else
        throw Exception("ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND);

    /// Position is unchanged.
--- a/dbms/src/IO/ReadHelpers.cpp
+++ b/dbms/src/IO/ReadHelpers.cpp
@ -965,7 +965,7 @@ void readException(Exception & e, ReadBuffer & buf, const String & additional_me
    String name;
    String message;
    String stack_trace;
-    bool has_nested = false;
+    bool has_nested = false;    /// Obsolete

    readBinary(code, buf);
    readBinary(name, buf);
@ -986,14 +986,7 @@ void readException(Exception & e, ReadBuffer & buf, const String & additional_me
    if (!stack_trace.empty())
        out << " Stack trace:\n\n" << stack_trace;

-    if (has_nested)
-    {
-        Exception nested;
-        readException(nested, buf);
-        e = Exception(out.str(), nested, code);
-    }
-    else
-        e = Exception(out.str(), code);
+    e = Exception(out.str(), code);
 }

 void readAndThrowException(ReadBuffer & buf, const String & additional_message)
--- a/dbms/src/IO/ReadHelpers.h
+++ b/dbms/src/IO/ReadHelpers.h
@ -29,22 +29,13 @@
 #include <IO/CompressionMethod.h>
 #include <IO/ReadBuffer.h>
 #include <IO/ReadBufferFromMemory.h>
+#include <IO/BufferWithOwnMemory.h>
 #include <IO/VarInt.h>
-#include <IO/ZlibInflatingReadBuffer.h>

 #include <DataTypes/DataTypeDateTime.h>

-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wdouble-promotion"
-#endif
-
 #include <double-conversion/double-conversion.h>

-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-

 /// 1 GiB
 #define DEFAULT_MAX_STRING_SIZE (1ULL << 30)
@ -1024,21 +1015,11 @@ void skipToNextLineOrEOF(ReadBuffer & buf);
 /// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences.
 void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);

-template <class TReadBuffer, class... Types>
-std::unique_ptr<ReadBuffer> getReadBuffer(const DB::CompressionMethod method, Types&&... args)
-{
-    if (method == DB::CompressionMethod::Gzip)
-    {
-        auto read_buf = std::make_unique<TReadBuffer>(std::forward<Types>(args)...);
-        return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method);
-    }
-    return std::make_unique<TReadBuffer>(args...);
-}

 /** This function just copies the data from buffer's internal position (in.position())
  * to current position (from arguments) into memory.
  */
-void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current);
+void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current);

 /** This function is negative to eof().
  * In fact it returns whether the data was loaded to internal ReadBuffers's buffer or not.
@ -1047,6 +1028,6 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current);
  * of our buffer and the current cursor in the end of the buffer. When we call eof() it calls next().
  * And this function can fill the buffer with new data, so we will lose the data from previous buffer state.
  */
-bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current);
+bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current);

 }
--- a/dbms/src/IO/WriteBufferFromHTTPServerResponse.cpp
+++ b/dbms/src/IO/WriteBufferFromHTTPServerResponse.cpp
@ -105,67 +105,41 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
        {
            if (compress)
            {
-                if (compression_method == CompressionMethod::Gzip)
-                {
-#if defined(POCO_CLICKHOUSE_PATCH)
-                    *response_header_ostr << "Content-Encoding: gzip\r\n";
-#else
-                    response.set("Content-Encoding", "gzip");
-                    response_body_ostr = &(response.send());
-#endif
-                    out_raw = std::make_unique<WriteBufferFromOStream>(*response_body_ostr);
-                    deflating_buf.emplace(std::move(out_raw), compression_method, compression_level, working_buffer.size(), working_buffer.begin());
-                    out = &*deflating_buf;
-                }
-                else if (compression_method == CompressionMethod::Zlib)
-                {
-#if defined(POCO_CLICKHOUSE_PATCH)
-                    *response_header_ostr << "Content-Encoding: deflate\r\n";
-#else
-                    response.set("Content-Encoding", "deflate");
-                    response_body_ostr = &(response.send());
-#endif
-                    out_raw = std::make_unique<WriteBufferFromOStream>(*response_body_ostr);
-                    deflating_buf.emplace(std::move(out_raw), compression_method, compression_level, working_buffer.size(), working_buffer.begin());
-                    out = &*deflating_buf;
-                }
-#if USE_BROTLI
-                else if (compression_method == CompressionMethod::Brotli)
-                {
-#if defined(POCO_CLICKHOUSE_PATCH)
-                    *response_header_ostr << "Content-Encoding: br\r\n";
-#else
-                    response.set("Content-Encoding", "br");
-                    response_body_ostr = &(response.send());
-#endif
-                    out_raw = std::make_unique<WriteBufferFromOStream>(*response_body_ostr);
-                    brotli_buf.emplace(*out_raw, compression_level, working_buffer.size(), working_buffer.begin());
-                    out = &*brotli_buf;
-                }
-#endif
+                auto content_encoding_name = toContentEncodingName(compression_method);

-                else
-                    throw Exception("Logical error: unknown compression method passed to WriteBufferFromHTTPServerResponse",
-                                    ErrorCodes::LOGICAL_ERROR);
-                /// Use memory allocated for the outer buffer in the buffer pointed to by out. This avoids extra allocation and copy.
+#if defined(POCO_CLICKHOUSE_PATCH)
+                *response_header_ostr << "Content-Encoding: " << content_encoding_name << "\r\n";
+#else
+                response.set("Content-Encoding", content_encoding_name);
+#endif
            }
-            else
-            {
+
 #if !defined(POCO_CLICKHOUSE_PATCH)
-                response_body_ostr = &(response.send());
+            response_body_ostr = &(response.send());
 #endif

-                out_raw = std::make_unique<WriteBufferFromOStream>(*response_body_ostr, working_buffer.size(), working_buffer.begin());
-                out = &*out_raw;
-            }
+            /// We reuse our buffer in "out" to avoid extra allocations and copies.
+
+            if (compress)
+                out = wrapWriteBufferWithCompressionMethod(
+                    std::make_unique<WriteBufferFromOStream>(*response_body_ostr),
+                    compress ? compression_method : CompressionMethod::None,
+                    compression_level,
+                    working_buffer.size(),
+                    working_buffer.begin());
+            else
+                out = std::make_unique<WriteBufferFromOStream>(
+                    *response_body_ostr,
+                    working_buffer.size(),
+                    working_buffer.begin());
        }

        finishSendHeaders();
-
    }

    if (out)
    {
+        out->buffer() = buffer();
        out->position() = position();
        out->next();
    }
@ -177,9 +151,8 @@ WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
    Poco::Net::HTTPServerResponse & response_,
    unsigned keep_alive_timeout_,
    bool compress_,
-    CompressionMethod compression_method_,
-    size_t size)
-    : BufferWithOwnMemory<WriteBuffer>(size)
+    CompressionMethod compression_method_)
+    : BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE)
    , request(request_)
    , response(response_)
    , keep_alive_timeout(keep_alive_timeout_)
@ -215,6 +188,9 @@ void WriteBufferFromHTTPServerResponse::finalize()
    if (offset())
    {
        next();
+
+        if (out)
+            out.reset();
    }
    else
    {
--- a/dbms/src/IO/WriteBufferFromHTTPServerResponse.h
+++ b/dbms/src/IO/WriteBufferFromHTTPServerResponse.h
@ -8,8 +8,6 @@
 #include <IO/WriteBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/WriteBufferFromOStream.h>
-#include <IO/ZlibDeflatingWriteBuffer.h>
-#include <IO/BrotliWriteBuffer.h>
 #include <IO/HTTPCommon.h>
 #include <IO/Progress.h>
 #include <Common/NetException.h>
@ -52,7 +50,7 @@ private:
    unsigned keep_alive_timeout = 0;
    bool compress = false;
    CompressionMethod compression_method;
-    int compression_level = Z_DEFAULT_COMPRESSION;
+    int compression_level = 1;

    std::ostream * response_body_ostr = nullptr;

@ -60,13 +58,7 @@ private:
    std::ostream * response_header_ostr = nullptr;
 #endif

-    std::unique_ptr<WriteBufferFromOStream> out_raw;
-    std::optional<ZlibDeflatingWriteBuffer> deflating_buf;
-#if USE_BROTLI
-    std::optional<BrotliWriteBuffer> brotli_buf;
-#endif
-
-    WriteBuffer * out = nullptr;     /// Uncompressed HTTP body is written to this buffer. Points to out_raw or possibly to deflating_buf.
+    std::unique_ptr<WriteBuffer> out;

    bool headers_started_sending = false;
    bool headers_finished_sending = false;    /// If true, you could not add any headers.
@ -99,8 +91,7 @@ public:
        Poco::Net::HTTPServerResponse & response_,
        unsigned keep_alive_timeout_,
        bool compress_ = false,        /// If true - set Content-Encoding header and compress the result.
-        CompressionMethod compression_method_ = CompressionMethod::Gzip,
-        size_t size = DBMS_DEFAULT_BUFFER_SIZE);
+        CompressionMethod compression_method_ = CompressionMethod::None);

    /// Writes progess in repeating HTTP headers.
    void onProgress(const Progress & progress);
--- a/dbms/src/IO/WriteHelpers.cpp
+++ b/dbms/src/IO/WriteHelpers.cpp
@ -48,7 +48,6 @@ void formatUUID(std::reverse_iterator<const UInt8 *> src16, UInt8 * dst36)
 }


-
 void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trace)
 {
    writeBinary(e.code(), buf);
@ -56,14 +55,11 @@ void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trac
    writeBinary(e.displayText(), buf);

    if (with_stack_trace)
-        writeBinary(e.getStackTrace().toString(), buf);
+        writeBinary(e.getStackTraceString(), buf);
    else
        writeBinary(String(), buf);

-    bool has_nested = e.nested() != nullptr;
+    bool has_nested = false;
    writeBinary(has_nested, buf);
-
-    if (has_nested)
-        writeException(Exception(Exception::CreateFromPoco, *e.nested()), buf, with_stack_trace);
 }
 }
--- a/dbms/src/IO/WriteHelpers.h
+++ b/dbms/src/IO/WriteHelpers.h
@ -26,10 +26,12 @@
 #include <IO/VarInt.h>
 #include <IO/DoubleConverter.h>
 #include <IO/WriteBufferFromString.h>
-#include <IO/ZlibDeflatingWriteBuffer.h>
+
+#include <ryu/ryu.h>

 #include <Formats/FormatSettings.h>

+
 namespace DB
 {

@ -115,21 +117,108 @@ inline void writeBoolText(bool x, WriteBuffer & buf)
    writeChar(x ? '1' : '0', buf);
 }

-template <typename T>
-inline size_t writeFloatTextFastPath(T x, char * buffer, int len)
+
+struct DecomposedFloat64
 {
-    using Converter = DoubleConverter<false>;
-    double_conversion::StringBuilder builder{buffer, len};
+    DecomposedFloat64(double x)
+    {
+        memcpy(&x_uint, &x, sizeof(x));
+    }
+
+    uint64_t x_uint;
+
+    bool sign() const
+    {
+        return x_uint >> 63;
+    }
+
+    uint16_t exponent() const
+    {
+        return (x_uint >> 52) & 0x7FF;
+    }
+
+    int16_t normalized_exponent() const
+    {
+        return int16_t(exponent()) - 1023;
+    }
+
+    uint64_t mantissa() const
+    {
+        return x_uint & 0x5affffffffffffful;
+    }
+
+    /// NOTE Probably floating point instructions can be better.
+    bool is_inside_int64() const
+    {
+        return x_uint == 0
+            || (normalized_exponent() >= 0 && normalized_exponent() <= 52
+                && ((mantissa() & ((1ULL << (52 - normalized_exponent())) - 1)) == 0));
+    }
+};
+
+struct DecomposedFloat32
+{
+    DecomposedFloat32(float x)
+    {
+        memcpy(&x_uint, &x, sizeof(x));
+    }
+
+    uint32_t x_uint;
+
+    bool sign() const
+    {
+        return x_uint >> 31;
+    }
+
+    uint16_t exponent() const
+    {
+        return (x_uint >> 23) & 0xFF;
+    }
+
+    int16_t normalized_exponent() const
+    {
+        return int16_t(exponent()) - 127;
+    }
+
+    uint32_t mantissa() const
+    {
+        return x_uint & 0x7fffff;
+    }
+
+    bool is_inside_int32() const
+    {
+        return x_uint == 0
+            || (normalized_exponent() >= 0 && normalized_exponent() <= 23
+                && ((mantissa() & ((1ULL << (23 - normalized_exponent())) - 1)) == 0));
+    }
+};
+
+template <typename T>
+inline size_t writeFloatTextFastPath(T x, char * buffer)
+{
+    int result = 0;

-    bool result = false;
    if constexpr (std::is_same_v<T, double>)
-        result = Converter::instance().ToShortest(x, &builder);
-    else
-        result = Converter::instance().ToShortestSingle(x, &builder);
+    {
+        /// The library Ryu has low performance on integers.
+        /// This workaround improves performance 6..10 times.

-    if (!result)
+        if (DecomposedFloat64(x).is_inside_int64())
+            result = itoa(Int64(x), buffer) - buffer;
+        else
+            result = d2s_buffered_n(x, buffer);
+    }
+    else
+    {
+        if (DecomposedFloat32(x).is_inside_int32())
+            result = itoa(Int32(x), buffer) - buffer;
+        else
+            result = f2s_buffered_n(x, buffer);
+    }
+
+    if (result <= 0)
        throw Exception("Cannot print floating point number", ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER);
-    return builder.position();
+    return result;
 }

 template <typename T>
@ -140,23 +229,13 @@ inline void writeFloatText(T x, WriteBuffer & buf)
    using Converter = DoubleConverter<false>;
    if (likely(buf.available() >= Converter::MAX_REPRESENTATION_LENGTH))
    {
-        buf.position() += writeFloatTextFastPath(x, buf.position(), Converter::MAX_REPRESENTATION_LENGTH);
+        buf.position() += writeFloatTextFastPath(x, buf.position());
        return;
    }

    Converter::BufferType buffer;
-    double_conversion::StringBuilder builder{buffer, sizeof(buffer)};
-
-    bool result = false;
-    if constexpr (std::is_same_v<T, double>)
-        result = Converter::instance().ToShortest(x, &builder);
-    else
-        result = Converter::instance().ToShortestSingle(x, &builder);
-
-    if (!result)
-        throw Exception("Cannot print floating point number", ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER);
-
-    buf.write(buffer, builder.position());
+    size_t result = writeFloatTextFastPath(x, buffer);
+    buf.write(buffer, result);
 }


@ -955,15 +1034,4 @@ inline String toString(const T & x)
    return buf.str();
 }

-template <class TWriteBuffer, class... Types>
-std::unique_ptr<WriteBuffer> getWriteBuffer(const DB::CompressionMethod method, Types&&... args)
-{
-    if (method == DB::CompressionMethod::Gzip)
-    {
-        auto write_buf = std::make_unique<TWriteBuffer>(std::forward<Types>(args)...);
-        return std::make_unique<ZlibDeflatingWriteBuffer>(std::move(write_buf), method, 1 /* compression level */);
-    }
-    return std::make_unique<TWriteBuffer>(args...);
-}
-
 }
--- a/dbms/src/IO/ZlibDeflatingWriteBuffer.cpp
+++ b/dbms/src/IO/ZlibDeflatingWriteBuffer.cpp
@ -5,6 +5,12 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int ZLIB_DEFLATE_FAILED;
+}
+
+
 ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
        std::unique_ptr<WriteBuffer> out_,
        CompressionMethod compression_method,
@ -84,6 +90,21 @@ void ZlibDeflatingWriteBuffer::finish()

    next();

+    /// https://github.com/zlib-ng/zlib-ng/issues/494
+    do
+    {
+        out->nextIfAtEnd();
+        zstr.next_out = reinterpret_cast<unsigned char *>(out->position());
+        zstr.avail_out = out->buffer().end() - out->position();
+
+        int rc = deflate(&zstr, Z_FULL_FLUSH);
+        out->position() = out->buffer().end() - zstr.avail_out;
+
+        if (rc != Z_OK)
+            throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
+    }
+    while (zstr.avail_out == 0);
+
    while (true)
    {
        out->nextIfAtEnd();
--- a/dbms/src/IO/ZlibDeflatingWriteBuffer.h
+++ b/dbms/src/IO/ZlibDeflatingWriteBuffer.h
@ -10,11 +10,6 @@
 namespace DB
 {

-namespace ErrorCodes
-{
-    extern const int ZLIB_DEFLATE_FAILED;
-}
-
 /// Performs compression using zlib library and writes compressed data to out_ WriteBuffer.
 class ZlibDeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
 {
--- a/dbms/src/IO/createReadBufferFromFileBase.cpp
+++ b/dbms/src/IO/createReadBufferFromFileBase.cpp
@ -3,6 +3,7 @@
 #if defined(__linux__) || defined(__FreeBSD__)
 #include <IO/ReadBufferAIO.h>
 #endif
+#include <IO/MMapReadBufferFromFile.h>
 #include <Common/ProfileEvents.h>


@ -11,13 +12,17 @@ namespace ProfileEvents
    extern const Event CreatedReadBufferOrdinary;
    extern const Event CreatedReadBufferAIO;
    extern const Event CreatedReadBufferAIOFailed;
+    extern const Event CreatedReadBufferMMap;
+    extern const Event CreatedReadBufferMMapFailed;
 }

 namespace DB
 {

-std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(const std::string & filename_, size_t estimated_size,
-        size_t aio_threshold, size_t buffer_size_, int flags_, char * existing_memory_, size_t alignment)
+std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
+    const std::string & filename_,
+    size_t estimated_size, size_t aio_threshold, size_t mmap_threshold,
+    size_t buffer_size_, int flags_, char * existing_memory_, size_t alignment)
 {
 #if defined(__linux__) || defined(__FreeBSD__)
    if (aio_threshold && estimated_size >= aio_threshold)
@ -40,6 +45,21 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(const std::
    (void)estimated_size;
 #endif

+    if (!existing_memory_ && mmap_threshold && estimated_size >= mmap_threshold)
+    {
+        try
+        {
+            auto res = std::make_unique<MMapReadBufferFromFile>(filename_, 0);
+            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
+            return res;
+        }
+        catch (const ErrnoException &)
+        {
+            /// Fallback if mmap is not supported (example: pipe).
+            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed);
+        }
+    }
+
    ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
    return std::make_unique<ReadBufferFromFile>(filename_, buffer_size_, flags_, existing_memory_, alignment);
 }
--- a/dbms/src/IO/createReadBufferFromFileBase.h
+++ b/dbms/src/IO/createReadBufferFromFileBase.h
@ -19,6 +19,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
    const std::string & filename_,
    size_t estimated_size,
    size_t aio_threshold,
+    size_t mmap_threshold,
    size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
    int flags_ = -1,
    char * existing_memory_ = nullptr,
--- a/dbms/src/IO/tests/CMakeLists.txt
+++ b/dbms/src/IO/tests/CMakeLists.txt
@ -78,7 +78,7 @@ add_executable (parse_date_time_best_effort parse_date_time_best_effort.cpp)
 target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io)

 add_executable (zlib_ng_bug zlib_ng_bug.cpp)
-target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY})
-if(NOT USE_INTERNAL_POCO_LIBRARY)
-    target_include_directories(zlib_ng_bug SYSTEM BEFORE PRIVATE ${Poco_INCLUDE_DIRS})
-endif()
+target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY} ${ZLIB_LIBRARY})
+
+add_executable (ryu_test ryu_test.cpp)
+target_link_libraries (ryu_test PRIVATE ryu)
--- a/dbms/src/IO/tests/gtest_bit_io.cpp
+++ b/dbms/src/IO/tests/gtest_bit_io.cpp
@ -36,11 +36,11 @@ std::string bin(const T & value, size_t bits = sizeof(T)*8)
            .to_string().substr(MAX_BITS - bits, bits);
 }

+// gets N low bits of value
 template <typename T>
 T getBits(UInt8 bits, const T & value)
 {
-    const T mask = ((static_cast<T>(1) << static_cast<T>(bits)) - 1);
-    return value & mask;
+    return value & maskLowBits<T>(bits);
 }

 template <typename T>
@ -83,12 +83,36 @@ std::string dumpContents(const T& container,
    return sstr.str();
 }

+template <typename ValueLeft, typename ValueRight>
+::testing::AssertionResult BinaryEqual(const ValueLeft & left, const ValueRight & right)
+{
+//    ::testing::AssertionResult result = ::testing::AssertionSuccess();
+    if (sizeof(left) != sizeof(right))
+        return ::testing::AssertionFailure()
+                << "Sizes do not match, expected: " << sizeof(left) << " actual: " << sizeof(right);
+
+    const auto size = std::min(sizeof(left), sizeof(right));
+    if (memcmp(&left, &right, size) != 0)
+    {
+        const auto l_bits = left ? static_cast<size_t>(std::log2(left)) : 0;
+        const auto r_bits = right ? static_cast<size_t>(std::log2(right)) : 0;
+        const size_t bits = std::max(l_bits, r_bits) + 1;
+
+        return ::testing::AssertionFailure()
+                << "Values are binary different,\n"
+                << "\texpected: 0b" << bin(left, bits) << " (" << std::hex << left << "),\n"
+                << "\tactual  : 0b" << bin(right, bits) << " (" <<std::hex << right << ").";
+    }
+
+    return ::testing::AssertionSuccess();
+}
+
 struct TestCaseParameter
 {
    std::vector<std::pair<UInt8, UInt64>> bits_and_vals;
    std::string expected_buffer_binary;

-    explicit TestCaseParameter(std::vector<std::pair<UInt8, UInt64>> vals, std::string binary = std::string{})
+    TestCaseParameter(std::vector<std::pair<UInt8, UInt64>> vals, std::string binary = std::string{})
        : bits_and_vals(std::move(vals)),
          expected_buffer_binary(binary)
    {}
@ -114,8 +138,7 @@ TEST_P(BitIO, WriteAndRead)
    PODArray<char> data(max_buffer_size);

    {
-        WriteBuffer write_buffer(data.data(), data.size());
-        BitWriter writer(write_buffer);
+        BitWriter writer(data.data(), data.size());
        for (const auto & bv : bits_and_vals)
        {
            writer.writeBits(bv.first, bv.second);
@ -133,38 +156,73 @@ TEST_P(BitIO, WriteAndRead)
            ASSERT_EQ(expected_buffer_binary, actual_buffer_binary);
        }

-        BitReader reader(read_buffer);
+        BitReader reader(data.data(), data.size());

+        int bitpos = 0;
        int item = 0;
        for (const auto & bv : bits_and_vals)
        {
            SCOPED_TRACE(::testing::Message()
-                         << "item #" << item << ", width: " << static_cast<UInt32>(bv.first)
-                         << ", value: " << bin(bv.second)
-                         << ".\n\n\nBuffer memory:\n" << dumpContents(data));
+                         << "item #" << item << " of " << bits_and_vals.size() << ", width: " << static_cast<UInt32>(bv.first)
+                         << ", value: " << bv.second << "(" << bin(bv.second) << ")"
+                         << ", at bit position: " << std::dec << reader.count()
+                         << ".\nBuffer memory:\n" << dumpContents(data));

-            //EXPECT_EQ(getBits(bv.first, bv.second), reader.peekBits(bv.first));
-            EXPECT_EQ(getBits(bv.first, bv.second), reader.readBits(bv.first));
+//            const UInt8 next_byte = getBits(bv.first, bv.second) &
+            ASSERT_TRUE(BinaryEqual(getBits(bv.first, bv.second), reader.readBits(bv.first)));

            ++item;
+            bitpos += bv.first;
        }
    }
 }

 INSTANTIATE_TEST_CASE_P(Simple,
-        BitIO,
-        ::testing::Values(
-            TestCaseParameter(
-                {{9, 0xFFFFFFFF}, {9, 0x00}, {9, 0xFFFFFFFF}, {9, 0x00}, {9, 0xFFFFFFFF}},
-                "11111111 10000000 00111111 11100000 00001111 11111000 "),
-            TestCaseParameter(
-                {{7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {3, 0xFFFF}},
-                "01111110 11111101 11111011 11110111 11101111 11011111 10111111 01111111 11000000 "),
-            TestCaseParameter({{33, 0xFF110d0b07050300}, {33, 0xAAEE29251f1d1713}}),
-            TestCaseParameter({{33, BIT_PATTERN}, {33, BIT_PATTERN}}),
-            TestCaseParameter({{24, 0xFFFFFFFF}},
-                "11111111 11111111 11111111 ")
-),);
+    BitIO,
+    ::testing::ValuesIn(std::initializer_list<TestCaseParameter>{
+        {
+            {{9, 0xFFFFFFFF}, {9, 0x00}, {9, 0xFFFFFFFF}, {9, 0x00}, {9, 0xFFFFFFFF}},
+            "11111111 10000000 00111111 11100000 00001111 11111000 "
+        },
+        {
+            {{7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {7, 0x3f}, {3, 0xFFFF}},
+            "01111110 11111101 11111011 11110111 11101111 11011111 10111111 01111111 11000000 "
+        },
+        {
+            {{33, 0xFF110d0b07050300}, {33, 0xAAEE29251f1d1713}}
+        },
+        {
+            {{33, BIT_PATTERN}, {33, BIT_PATTERN}}
+        },
+        {
+            {{24, 0xFFFFFFFF}},
+            "11111111 11111111 11111111 "
+        },
+        {
+            // Note that we take only N lower bits of the number: {3, 0b01011} => 011
+            {{5, 0b01010}, {3, 0b111}, {7, 0b11001100}, {6, 0}, {5, 0b11111111}, {4, 0}, {3, 0b101}, {2, 0}, {1, 0b11111111}},
+            "01010111 10011000 00000111 11000010 10010000 "
+        },
+        {
+            {{64, BIT_PATTERN}, {56, BIT_PATTERN} , {4, 0b1111}, {4, 0}, // 128
+             {8, 0b11111111}, {64, BIT_PATTERN}, {48, BIT_PATTERN}, {8, 0}}, // 256
+            "11101011 11101111 10111010 11101111 10101111 10111010 11101011 10101001 " // 64
+            "11101111 10111010 11101111 10101111 10111010 11101011 10101001 11110000 " // 128
+            "11111111 11101011 11101111 10111010 11101111 10101111 10111010 11101011 " // 192
+            "10101001 10111010 11101111 10101111 10111010 11101011 10101001 00000000 " // 256
+        },
+        {
+            {{64, BIT_PATTERN}, {56, BIT_PATTERN} , {5, 0b11111}, {3, 0}, // 128
+             {8, 0b11111111}, {64, BIT_PATTERN}, {48, BIT_PATTERN}, {8, 0}, //256
+             {32, BIT_PATTERN}, {12, 0xff}, {8, 0}, {12, 0xAEff}},
+            "11101011 11101111 10111010 11101111 10101111 10111010 11101011 10101001 " // 64
+            "11101111 10111010 11101111 10101111 10111010 11101011 10101001 11111000 " // 128
+            "11111111 11101011 11101111 10111010 11101111 10101111 10111010 11101011 " // 192
+            "10101001 10111010 11101111 10101111 10111010 11101011 10101001 00000000 " // 256
+            "10101111 10111010 11101011 10101001 00001111 11110000 00001110 11111111 " // 320
+        }
+    }),
+);

 TestCaseParameter primes_case(UInt8 repeat_times, UInt64 pattern)
 {
--- a/dbms/src/IO/tests/ryu_test.cpp
+++ b/dbms/src/IO/tests/ryu_test.cpp
@ -0,0 +1,92 @@
+#include <string>
+#include <iostream>
+#include <ryu/ryu.h>
+
+
+struct DecomposedFloat64
+{
+    DecomposedFloat64(double x)
+    {
+        memcpy(&x_uint, &x, sizeof(x));
+    }
+
+    uint64_t x_uint;
+
+    bool sign() const
+    {
+        return x_uint >> 63;
+    }
+
+    uint16_t exponent() const
+    {
+        return (x_uint >> 52) & 0x7FF;
+    }
+
+    int16_t normalized_exponent() const
+    {
+        return int16_t(exponent()) - 1023;
+    }
+
+    uint64_t mantissa() const
+    {
+        return x_uint & 0x5affffffffffffful;
+    }
+
+    bool is_inside_int64() const
+    {
+        return x_uint == 0
+            || (normalized_exponent() >= 0 && normalized_exponent() <= 52
+                && ((mantissa() & ((1ULL << (52 - normalized_exponent())) - 1)) == 0));
+    }
+};
+
+struct DecomposedFloat32
+{
+    DecomposedFloat32(float x)
+    {
+        memcpy(&x_uint, &x, sizeof(x));
+    }
+
+    uint32_t x_uint;
+
+    bool sign() const
+    {
+        return x_uint >> 31;
+    }
+
+    uint16_t exponent() const
+    {
+        return (x_uint >> 23) & 0xFF;
+    }
+
+    int16_t normalized_exponent() const
+    {
+        return int16_t(exponent()) - 127;
+    }
+
+    uint32_t mantissa() const
+    {
+        return x_uint & 0x7fffff;
+    }
+
+    bool is_inside_int32() const
+    {
+        return x_uint == 0
+            || (normalized_exponent() >= 0 && normalized_exponent() <= 23
+                && ((mantissa() & ((1ULL << (23 - normalized_exponent())) - 1)) == 0));
+    }
+};
+
+
+int main(int argc, char ** argv)
+{
+    double x = argc > 1 ? std::stod(argv[1]) : 0;
+    char buf[32];
+
+    d2s_buffered(x, buf);
+    std::cout << buf << "\n";
+
+    std::cout << DecomposedFloat64(x).is_inside_int64() << "\n";
+
+    return 0;
+}
--- a/dbms/src/IO/tests/zlib_ng_bug.cpp
+++ b/dbms/src/IO/tests/zlib_ng_bug.cpp
@ -1,32 +1,50 @@
-#include <Poco/FileStream.h>
-#include <Poco/NullStream.h>
-#include <Poco/StreamCopier.h>
-#include <Poco/DeflatingStream.h>
+#include <unistd.h>
+#include <vector>
+#include <stdexcept>
+#include <zlib.h>

-/** This script reproduces the bug in zlib-ng library.
-  * Put the following content to "data.bin" file:
-abcdefghijklmn!@Aab#AAabcdefghijklmn$%
-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-  * There are two lines. First line make sense. Second line contains padding to make file size large enough.
-  * Compile with
-  *  cmake -D SANITIZE=address
-  * and run:
+#pragma GCC diagnostic ignored "-Wold-style-cast"

-./zlib_ng_bug data2.bin
-=================================================================
-==204952==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6310000147ff at pc 0x000000596d7a bp 0x7ffd139edd50 sp 0x7ffd139edd48
-READ of size 1 at 0x6310000147ff thread T0
-  */

-int main(int argc, char ** argv)
+/// https://github.com/zlib-ng/zlib-ng/issues/494
+int main(int, char **)
 {
-    using namespace Poco;
+    std::vector<unsigned char> in(1048576);
+    std::vector<unsigned char> out(1048576);

-    std::string filename(argc >= 2 ? argv[1] : "data.bin");
-    FileInputStream istr(filename);
-    NullOutputStream ostr;
-    DeflatingOutputStream deflater(ostr, DeflatingStreamBuf::STREAM_GZIP);
-    StreamCopier::copyStream(istr, deflater);
+    ssize_t in_size = read(STDIN_FILENO, in.data(), 1048576);
+    if (in_size < 0)
+        throw std::runtime_error("Cannot read");
+    in.resize(in_size);
+
+    z_stream zstr{};
+    if (Z_OK != deflateInit2(&zstr, 1, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY))
+        throw std::runtime_error("Cannot deflateInit2");
+
+    zstr.next_in = in.data();
+    zstr.avail_in = in.size();
+    zstr.next_out = out.data();
+    zstr.avail_out = out.size();
+
+    while (zstr.avail_in > 0)
+        if (Z_OK != deflate(&zstr, Z_NO_FLUSH))
+            throw std::runtime_error("Cannot deflate");
+
+    while (true)
+    {
+        int rc = deflate(&zstr, Z_FINISH);
+
+        if (rc == Z_STREAM_END)
+            break;
+
+        if (rc != Z_OK)
+            throw std::runtime_error("Cannot finish deflate");
+    }
+
+    deflateEnd(&zstr);
+
+    if (ssize_t(zstr.total_out) != write(STDOUT_FILENO, out.data(), zstr.total_out))
+        throw std::runtime_error("Cannot write");

    return 0;
 }
--- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
@ -514,14 +514,21 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
        ASTPtr as_create_ptr = context.getDatabase(as_database_name)->getCreateTableQuery(context, as_table_name);
        const auto & as_create = as_create_ptr->as<ASTCreateQuery &>();

+        const String qualified_name = backQuoteIfNeed(as_database_name) + "." + backQuoteIfNeed(as_table_name);
+
        if (as_create.is_view)
            throw Exception(
-                "Cannot CREATE a table AS " + as_database_name + "." + as_table_name + ", it is a View",
+                "Cannot CREATE a table AS " + qualified_name + ", it is a View",
                ErrorCodes::INCORRECT_QUERY);

        if (as_create.is_live_view)
            throw Exception(
-                "Cannot CREATE a table AS " + as_database_name + "." + as_table_name + ", it is a Live View",
+                "Cannot CREATE a table AS " + qualified_name + ", it is a Live View",
+                ErrorCodes::INCORRECT_QUERY);
+
+        if (as_create.is_dictionary)
+            throw Exception(
+                "Cannot CREATE a table AS " + qualified_name + ", it is a Dictionary",
                ErrorCodes::INCORRECT_QUERY);

        create.set(create.storage, as_create.storage->ptr());
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@ -2267,17 +2267,17 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline, InputSortingInfoP
            limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode);
            sorting_stream->setLimits(limits);

-            stream = sorting_stream;
+            auto merging_stream = std::make_shared<MergeSortingBlockInputStream>(
+                sorting_stream, output_order_descr, settings.max_block_size, limit,
+                settings.max_bytes_before_remerge_sort,
+                settings.max_bytes_before_external_sort / pipeline.streams.size(),
+                context->getTemporaryPath(), settings.min_free_disk_space_for_temporary_data);
+
+            stream = merging_stream;
        });

        /// If there are several streams, we merge them into one
-        executeUnion(pipeline, {});
-
-        /// Merge the sorted blocks.
-        pipeline.firstStream() = std::make_shared<MergeSortingBlockInputStream>(
-            pipeline.firstStream(), output_order_descr, settings.max_block_size, limit,
-            settings.max_bytes_before_remerge_sort,
-            settings.max_bytes_before_external_sort, context->getTemporaryPath(), settings.min_free_disk_space_for_temporary_data);
+        executeMergeSorted(pipeline, output_order_descr, limit);
    }
 }

--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@ -7,6 +7,7 @@
 #include <DataTypes/DataTypeFactory.h>
 #include <Formats/FormatSettings.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/WriteHelpers.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTQueryParameter.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
@ -54,10 +55,12 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
    IColumn & temp_column = *temp_column_ptr;
    ReadBufferFromString read_buffer{value};
    FormatSettings format_settings;
-    data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings);
+    data_type->deserializeAsTextEscaped(temp_column, read_buffer, format_settings);

    if (!read_buffer.eof())
-        throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '"  + ast_param.name + "'", ErrorCodes::BAD_QUERY_PARAMETER);
+        throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '"  + ast_param.name + "'"
+            " because it isn't parsed completely: only " + toString(read_buffer.count()) + " of " + toString(value.size()) + " bytes was parsed: "
+            + value.substr(0, read_buffer.count()), ErrorCodes::BAD_QUERY_PARAMETER);

    ast = addTypeConversionToAST(std::make_shared<ASTLiteral>(temp_column[0]), type_name);
 }
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@ -129,9 +129,9 @@ static void setExceptionStackTrace(QueryLogElement & elem)
    {
        throw;
    }
-    catch (const Exception & e)
+    catch (const std::exception & e)
    {
-        elem.stack_trace = e.getStackTrace().toString();
+        elem.stack_trace = getExceptionStackTraceString(e);
    }
    catch (...) {}
 }
--- a/dbms/src/Interpreters/tests/create_query.cpp
+++ b/dbms/src/Interpreters/tests/create_query.cpp
@ -97,6 +97,6 @@ catch (const Exception & e)
    std::cerr << e.what() << ", " << e.displayText() << std::endl
        << std::endl
        << "Stack trace:" << std::endl
-        << e.getStackTrace().toString();
+        << e.getStackTraceString();
    return 1;
 }
--- a/dbms/src/Interpreters/tests/select_query.cpp
+++ b/dbms/src/Interpreters/tests/select_query.cpp
@ -55,6 +55,6 @@ catch (const Exception & e)
    std::cerr << e.what() << ", " << e.displayText() << std::endl
        << std::endl
        << "Stack trace:" << std::endl
-        << e.getStackTrace().toString();
+        << e.getStackTraceString();
    return 1;
 }
--- a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp
@ -1,11 +1,10 @@
-#include <Poco/Version.h>
 #include <Processors/Transforms/MergeSortingTransform.h>
 #include <Processors/IAccumulatingTransform.h>
 #include <Processors/Transforms/MergingSortedTransform.h>
-#include <Common/formatReadable.h>
 #include <Common/ProfileEvents.h>
-#include <common/config_common.h>
 #include <IO/WriteBufferFromFile.h>
+#include <IO/ReadBufferFromFile.h>
+#include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <DataStreams/NativeBlockInputStream.h>
 #include <DataStreams/NativeBlockOutputStream.h>
@ -21,6 +20,13 @@ namespace ProfileEvents
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int NOT_ENOUGH_SPACE;
+}
+class MergeSorter;
+
+
 class BufferingToFileTransform : public IAccumulatingTransform
 {
 public:
--- a/dbms/src/Processors/Transforms/MergeSortingTransform.h
+++ b/dbms/src/Processors/Transforms/MergeSortingTransform.h
@ -1,25 +1,14 @@
 #pragma once
+
 #include <Processors/Transforms/SortingTransform.h>
 #include <Core/SortDescription.h>
 #include <Common/filesystemHelpers.h>
-#include <IO/ReadBufferFromFile.h>
-#include <Compression/CompressedReadBuffer.h>
-#include <DataStreams/IBlockInputStream.h>
-#include <DataStreams/NativeBlockInputStream.h>
-
 #include <common/logger_useful.h>

-#include <queue>

 namespace DB
 {

-namespace ErrorCodes
-{
-    extern const int NOT_ENOUGH_SPACE;
-}
-class MergeSorter;
-
 class MergeSortingTransform : public SortingTransform
 {
 public:
--- a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp
+++ b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp
@ -148,9 +148,9 @@ IProcessor::Status MergingSortedTransform::prepare()
            return Status::NeedData;

        if (has_collation)
-            initQueue(queue_with_collation);
+            queue_with_collation = SortingHeap<SortCursorWithCollation>(cursors);
        else
-            initQueue(queue_without_collation);
+            queue_without_collation = SortingHeap<SortCursor>(cursors);

        is_initialized = true;
        return Status::Ready;
@ -169,7 +169,6 @@ IProcessor::Status MergingSortedTransform::prepare()

        if (need_data)
        {
-
            auto & input = *std::next(inputs.begin(), next_input_to_read);
            if (!input.isFinished())
            {
@ -183,7 +182,11 @@ IProcessor::Status MergingSortedTransform::prepare()
                    return Status::NeedData;

                updateCursor(std::move(chunk), next_input_to_read);
-                pushToQueue(next_input_to_read);
+
+                if (has_collation)
+                    queue_with_collation.push(cursors[next_input_to_read]);
+                else
+                    queue_without_collation.push(cursors[next_input_to_read]);
            }

            need_data = false;
@ -201,8 +204,8 @@ void MergingSortedTransform::work()
        merge(queue_without_collation);
 }

-template <typename TSortCursor>
-void MergingSortedTransform::merge(std::priority_queue<TSortCursor> & queue)
+template <typename TSortingHeap>
+void MergingSortedTransform::merge(TSortingHeap & queue)
 {
    /// Returns MergeStatus which we should return if we are going to finish now.
    auto can_read_another_row = [&, this]()
@ -224,77 +227,66 @@ void MergingSortedTransform::merge(std::priority_queue<TSortCursor> & queue)
    };

    /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size`
-    while (!queue.empty())
+    while (queue.isValid())
    {
        /// Shouldn't happen at first iteration, but check just in case.
        if (!can_read_another_row())
            return;

-        TSortCursor current = queue.top();
-        queue.pop();
-        bool first_iteration = true;
+        auto current = queue.current();

-        while (true)
+        /** And what if the block is totally less or equal than the rest for the current cursor?
+            * Or is there only one data source left in the queue? Then you can take the entire block on current cursor.
+            */
+        if (current.impl->isFirst()
+            && (queue.size() == 1
+                || (queue.size() >= 2 && current.totallyLessOrEquals(queue.nextChild()))))
        {
-            if (!first_iteration && !can_read_another_row())
+            //std::cerr << "current block is totally less or equals\n";
+
+            /// If there are already data in the current block, we first return it. We'll get here again the next time we call the merge function.
+            if (merged_data.mergedRows() != 0)
            {
-                queue.push(current);
-                return;
-            }
-            first_iteration = false;
-
-            /** And what if the block is totally less or equal than the rest for the current cursor?
-              * Or is there only one data source left in the queue? Then you can take the entire block on current cursor.
-              */
-            if (current.impl->isFirst() && (queue.empty() || current.totallyLessOrEquals(queue.top())))
-            {
-                //std::cerr << "current block is totally less or equals\n";
-
-                /// If there are already data in the current block, we first return it. We'll get here again the next time we call the merge function.
-                if (merged_data.mergedRows() != 0)
-                {
-                    //std::cerr << "merged rows is non-zero\n";
-                    queue.push(current);
-                    return;
-                }
-
-                /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl)
-                size_t source_num = current.impl->order;
-                insertFromChunk(source_num);
+                //std::cerr << "merged rows is non-zero\n";
                return;
            }

-            //std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n";
-            //std::cerr << "Inserting row\n";
-            merged_data.insertRow(current->all_columns, current->pos);
+            /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl)
+            size_t source_num = current.impl->order;
+            insertFromChunk(source_num);
+            queue.removeTop();
+            return;
+        }

-            if (out_row_sources_buf)
-            {
-                /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl)
-                RowSourcePart row_source(current.impl->order);
-                out_row_sources_buf->write(row_source.data);
-            }
+        //std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n";
+        //std::cerr << "Inserting row\n";
+        merged_data.insertRow(current->all_columns, current->pos);

-            if (current->isLast())
-            {
-                need_data = true;
-                next_input_to_read = current.impl->order;
+        if (out_row_sources_buf)
+        {
+            /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl)
+            RowSourcePart row_source(current.impl->order);
+            out_row_sources_buf->write(row_source.data);
+        }

-                if (limit && merged_data.totalMergedRows() >= limit)
-                    is_finished = true;
+        if (!current->isLast())
+        {
+//            std::cerr << "moving to next row\n";
+            queue.next();
+        }
+        else
+        {
+            /// We will get the next block from the corresponding source, if there is one.
+            queue.removeTop();

-                return;
-            }
+//            std::cerr << "It was last row, fetching next block\n";
+            need_data = true;
+            next_input_to_read = current.impl->order;

-            //std::cerr << "moving to next row\n";
-            current->next();
+            if (limit && merged_data.totalMergedRows() >= limit)
+                is_finished = true;

-            if (!queue.empty() && current.greater(queue.top()))
-            {
-                //std::cerr << "next row is not least, pushing back to queue\n";
-                queue.push(current);
-                break;
-            }
+            return;
        }
    }
    is_finished = true;
--- a/dbms/src/Processors/Transforms/MergingSortedTransform.h
+++ b/dbms/src/Processors/Transforms/MergingSortedTransform.h
@ -1,10 +1,10 @@
 #pragma once
+
 #include <Processors/IProcessor.h>
 #include <Core/SortDescription.h>
 #include <Core/SortCursor.h>
 #include <Processors/SharedChunk.h>

-#include <queue>

 namespace DB
 {
@ -111,14 +111,10 @@ protected:
    /// Chunks currently being merged.
    std::vector<SharedChunkPtr> source_chunks;

-    using CursorImpls = std::vector<SortCursorImpl>;
-    CursorImpls cursors;
+    SortCursorImpls cursors;

-    using Queue = std::priority_queue<SortCursor>;
-    Queue queue_without_collation;
-
-    using QueueWithCollation = std::priority_queue<SortCursorWithCollation>;
-    QueueWithCollation queue_with_collation;
+    SortingHeap<SortCursor> queue_without_collation;
+    SortingHeap<SortCursorWithCollation> queue_with_collation;

 private:

@ -128,8 +124,8 @@ private:
    bool need_data = false;
    size_t next_input_to_read = 0;

-    template <typename TSortCursor>
-    void merge(std::priority_queue<TSortCursor> & queue);
+    template <typename TSortingHeap>
+    void merge(TSortingHeap & queue);

    void insertFromChunk(size_t source_num);

@ -159,22 +155,6 @@ private:
        shared_chunk_ptr->all_columns = cursors[source_num].all_columns;
        shared_chunk_ptr->sort_columns = cursors[source_num].sort_columns;
    }
-
-    void pushToQueue(size_t source_num)
-    {
-        if (has_collation)
-            queue_with_collation.push(SortCursorWithCollation(&cursors[source_num]));
-        else
-            queue_without_collation.push(SortCursor(&cursors[source_num]));
-    }
-
-    template <typename TSortCursor>
-    void initQueue(std::priority_queue<TSortCursor> & queue)
-    {
-        for (auto & cursor : cursors)
-            if (!cursor.empty())
-                queue.push(TSortCursor(&cursor));
-    }
 };

 }
--- a/dbms/src/Processors/Transforms/SortingTransform.cpp
+++ b/dbms/src/Processors/Transforms/SortingTransform.cpp
@ -40,16 +40,12 @@ MergeSorter::MergeSorter(Chunks chunks_, SortDescription & description_, size_t

    chunks.swap(nonempty_chunks);

-    if (!has_collation)
-    {
-        for (auto & cursor : cursors)
-            queue_without_collation.push(SortCursor(&cursor));
-    }
+    if (has_collation)
+        queue_with_collation = SortingHeap<SortCursorWithCollation>(cursors);
+    else if (description.size() > 1)
+        queue_without_collation = SortingHeap<SortCursor>(cursors);
    else
-    {
-        for (auto & cursor : cursors)
-            queue_with_collation.push(SortCursorWithCollation(&cursor));
-    }
+        queue_simple = SortingHeap<SimpleSortCursor>(cursors);
 }


@ -65,50 +61,61 @@ Chunk MergeSorter::read()
        return res;
    }

-    return !has_collation
-           ? mergeImpl<SortCursor>(queue_without_collation)
-           : mergeImpl<SortCursorWithCollation>(queue_with_collation);
+    if (has_collation)
+        return mergeImpl(queue_with_collation);
+    else if (description.size() > 1)
+        return mergeImpl(queue_without_collation);
+    else
+        return mergeImpl(queue_simple);
 }


-template <typename TSortCursor>
-Chunk MergeSorter::mergeImpl(std::priority_queue<TSortCursor> & queue)
+template <typename TSortingHeap>
+Chunk MergeSorter::mergeImpl(TSortingHeap & queue)
 {
    size_t num_columns = chunks[0].getNumColumns();
-
    MutableColumns merged_columns = chunks[0].cloneEmptyColumns();
-    /// TODO: reserve (in each column)
+
+    /// Reserve
+    if (queue.isValid())
+    {
+        /// The expected size of output block is the same as input block
+        size_t size_to_reserve = chunks[0].getNumRows();
+        for (auto & column : merged_columns)
+            column->reserve(size_to_reserve);
+    }
+
+    /// TODO: Optimization when a single block left.

    /// Take rows from queue in right order and push to 'merged'.
    size_t merged_rows = 0;
-    while (!queue.empty())
+    while (queue.isValid())
    {
-        TSortCursor current = queue.top();
-        queue.pop();
+        auto current = queue.current();

+        /// Append a row from queue.
        for (size_t i = 0; i < num_columns; ++i)
            merged_columns[i]->insertFrom(*current->all_columns[i], current->pos);

        ++total_merged_rows;
        ++merged_rows;

-        if (!current->isLast())
-        {
-            current->next();
-            queue.push(current);
-        }
-
+        /// We don't need more rows because of limit has reached.
        if (limit && total_merged_rows == limit)
        {
            chunks.clear();
-            return Chunk(std::move(merged_columns), merged_rows);
+            break;
        }

+        queue.next();
+
+        /// It's enough for current output block but we will continue.
        if (merged_rows == max_merged_block_size)
-            return Chunk(std::move(merged_columns), merged_rows);
+            break;
    }

-    chunks.clear();
+    if (!queue.isValid())
+        chunks.clear();

    if (merged_rows == 0)
        return {};
--- a/dbms/src/Processors/Transforms/SortingTransform.h
+++ b/dbms/src/Processors/Transforms/SortingTransform.h
@ -1,10 +1,10 @@
 #pragma once
+
 #include <Processors/IProcessor.h>
 #include <Core/SortDescription.h>
 #include <Core/SortCursor.h>
 #include <DataStreams/IBlockInputStream.h>
 #include <Processors/ISource.h>
-#include <queue>


 namespace DB
@ -27,19 +27,19 @@ private:
    UInt64 limit;
    size_t total_merged_rows = 0;

-    using CursorImpls = std::vector<SortCursorImpl>;
-    CursorImpls cursors;
+    SortCursorImpls cursors;

    bool has_collation = false;

-    std::priority_queue<SortCursor> queue_without_collation;
-    std::priority_queue<SortCursorWithCollation> queue_with_collation;
+    SortingHeap<SortCursor> queue_without_collation;
+    SortingHeap<SimpleSortCursor> queue_simple;
+    SortingHeap<SortCursorWithCollation> queue_with_collation;

    /** Two different cursors are supported - with and without Collation.
      *  Templates are used (instead of virtual functions in SortCursor) for zero-overhead.
      */
-    template <typename TSortCursor>
-    Chunk mergeImpl(std::priority_queue<TSortCursor> & queue);
+    template <typename TSortingHeap>
+    Chunk mergeImpl(TSortingHeap & queue);
 };


--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 5b4a853534b47438b4d97935370f6b2397137c2b`