From afa35d55d9573dcc3c544c08163962010677c5e8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Jan 2019 19:35:43 +0300
Subject: [PATCH 001/191] Fixed linking order of glibc-compatibility library

---
 CMakeLists.txt                             |  1 -
 dbms/CMakeLists.txt                        |  2 +-
 libs/libglibc-compatibility/CMakeLists.txt | 12 +-----------
 3 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8bbc7ca40ea..a29dc66d491 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,7 +99,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
     if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0")
         option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON)
         if (GLIBC_COMPATIBILITY)
-            message (STATUS "Some symbols from glibc will be replaced for compatibility")
             link_libraries(glibc-compatibility)
         endif ()
     endif ()
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index 900b1e0a650..51a88fb05a0 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -342,7 +342,7 @@ target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE
 add_subdirectory (programs)
 add_subdirectory (tests)
 
-if (ENABLE_TESTS AND USE_GTEST)
+if (ENABLE_TESTS)
     macro (grep_gtest_sources BASE_DIR DST_VAR)
         # Cold match files that are not in tests/ directories
         file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp")
diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt
index 3477e474c7c..2ff3729d673 100644
--- a/libs/libglibc-compatibility/CMakeLists.txt
+++ b/libs/libglibc-compatibility/CMakeLists.txt
@@ -34,16 +34,6 @@ add_library (glibc-compatibility ${GLIBC_COMPATIBILITY_SOURCES})
 
 target_include_directories(glibc-compatibility PRIVATE libcxxabi)
 
-# glibc-compatibility does not depend on any libraries but is linked to all libraries implicitly.
-# Avoid linking of the library to itself.
 set_target_properties(glibc-compatibility PROPERTIES LINK_LIBRARIES "")
 
-# Garbage. Rough explanation: some libraries want to install itself and CMake forces us to also install the glibc-compatibility library.
-install(TARGETS glibc-compatibility EXPORT CapnProtoTargets ARCHIVE DESTINATION "/tmp")
-install(TARGETS glibc-compatibility EXPORT protobuf-targets ARCHIVE DESTINATION "/tmp")
-install(TARGETS glibc-compatibility EXPORT double-conversionTargets ARCHIVE DESTINATION "/tmp")
-install(TARGETS glibc-compatibility EXPORT SnappyTargets ARCHIVE DESTINATION "/tmp")
-
-if(ENABLE_TESTS)
-    add_subdirectory(tests)
-endif()
+add_subdirectory (tests)

From 3681c982f6b40340b86d6dd3ea5b3b1d191f5dd9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Jan 2019 21:20:07 +0300
Subject: [PATCH 002/191] Link all libraries to "glibc-compatibility"

---
 CMakeLists.txt                             | 1 +
 libs/libglibc-compatibility/CMakeLists.txt | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a29dc66d491..8bbc7ca40ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,6 +99,7 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
     if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0")
         option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON)
         if (GLIBC_COMPATIBILITY)
+            message (STATUS "Some symbols from glibc will be replaced for compatibility")
             link_libraries(glibc-compatibility)
         endif ()
     endif ()
diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt
index 2ff3729d673..c967f075ea9 100644
--- a/libs/libglibc-compatibility/CMakeLists.txt
+++ b/libs/libglibc-compatibility/CMakeLists.txt
@@ -34,6 +34,13 @@ add_library (glibc-compatibility ${GLIBC_COMPATIBILITY_SOURCES})
 
 target_include_directories(glibc-compatibility PRIVATE libcxxabi)
 
+# glibc-compatibility does not depend on any libraries but is linked to all libraries implicitly.
+# Avoid linking of the library to itself.
 set_target_properties(glibc-compatibility PROPERTIES LINK_LIBRARIES "")
 
+# Garbage. Rough explanation: some libraries want to install itself and CMake forces us to also install the glibc-compatibility library.
+install(TARGETS glibc-compatibility EXPORT CapnProtoTargets ARCHIVE DESTINATION "/tmp")
+install(TARGETS glibc-compatibility EXPORT protobuf-targets ARCHIVE DESTINATION "/tmp")
+install(TARGETS glibc-compatibility EXPORT double-conversionTargets ARCHIVE DESTINATION "/tmp")
+
 add_subdirectory (tests)

From 8047b4907a6fa27a6aa7dc8bf10dc999e33b87b7 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Jan 2019 21:27:46 +0300
Subject: [PATCH 003/191] Update CMakeLists.txt

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8bbc7ca40ea..9782bbf91a5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON)
 if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
     option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON)
 
-    if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0")
+    if (OS_LINUX AND NOT UNBUNDLED)
         option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON)
         if (GLIBC_COMPATIBILITY)
             message (STATUS "Some symbols from glibc will be replaced for compatibility")

From ebac45420b05093ffc47e5c442a744d7d7b48986 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 18 Jan 2019 22:42:29 +0300
Subject: [PATCH 004/191] Removed useless code in CMakeLists for "ssl"

---
 contrib/CMakeLists.txt | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index fe95dcad041..cba20303218 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -110,12 +110,7 @@ if (USE_INTERNAL_SSL_LIBRARY)
     if (NOT MAKE_STATIC_LIBRARIES)
         set (BUILD_SHARED 1)
     endif ()
-
-    # By default, ${CMAKE_INSTALL_PREFIX}/etc/ssl is selected - that is not what we need.
-    # We need to use system wide ssl directory.
-    set (OPENSSLDIR "/etc/ssl")
-
-    set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "")
+    set (LIBRESSL_SKIP_INSTALL 1)
     add_subdirectory (ssl)
     target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR})
     target_include_directories(${OPENSSL_SSL_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR})

From f53cdce65580fec3e04064baf0b2c587c8e3eb4b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 19 Jan 2019 02:52:21 +0300
Subject: [PATCH 005/191] Removed useless install

---
 contrib/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index cba20303218..8d1d0ecd150 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -110,7 +110,7 @@ if (USE_INTERNAL_SSL_LIBRARY)
     if (NOT MAKE_STATIC_LIBRARIES)
         set (BUILD_SHARED 1)
     endif ()
-    set (LIBRESSL_SKIP_INSTALL 1)
+    set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "")
     add_subdirectory (ssl)
     target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR})
     target_include_directories(${OPENSSL_SSL_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR})

From 2fb5addc4095a5093846732076ae61a5b3947fbd Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Mon, 25 Mar 2019 17:34:52 +0300
Subject: [PATCH 006/191] H3 library integration

---
 .gitmodules                                   |   3 +
 CMakeLists.txt                                |   1 +
 cmake/find_h3.cmake                           |  17 ++
 contrib/CMakeLists.txt                        |   5 +
 dbms/src/Functions/CMakeLists.txt             |   6 +
 dbms/src/Functions/geoToH3.cpp                | 171 ++++++++++++++++++
 dbms/src/Functions/registerFunctions.cpp      |   2 +
 .../queries/0_stateless/00746_sql_fuzzy.pl    |   2 +-
 .../0_stateless/00926_geo_to_h3.reference     |  20 ++
 .../queries/0_stateless/00926_geo_to_h3.sql   |  19 ++
 docs/ru/query_language/functions/geo.md       |  33 ++++
 11 files changed, 278 insertions(+), 1 deletion(-)
 create mode 100644 cmake/find_h3.cmake
 create mode 100644 dbms/src/Functions/geoToH3.cpp
 create mode 100644 dbms/tests/queries/0_stateless/00926_geo_to_h3.reference
 create mode 100644 dbms/tests/queries/0_stateless/00926_geo_to_h3.sql

diff --git a/.gitmodules b/.gitmodules
index 6ad948c9a0a..f2520eb22ad 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -76,3 +76,6 @@
 [submodule "contrib/brotli"]
 	path = contrib/brotli
 	url = https://github.com/google/brotli.git
+[submodule "contrib/h3"]
+	path = contrib/h3
+	url = https://github.com/uber/h3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9782bbf91a5..cf08ce4cfe6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -247,6 +247,7 @@ include (cmake/find_re2.cmake)
 include (cmake/find_rdkafka.cmake)
 include (cmake/find_capnp.cmake)
 include (cmake/find_llvm.cmake)
+include (cmake/find_h3.cmake)
 include (cmake/find_cpuid.cmake) # Freebsd, bundled
 if (NOT USE_CPUID)
     include (cmake/find_cpuinfo.cmake) # Debian
diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake
new file mode 100644
index 00000000000..7f19157f978
--- /dev/null
+++ b/cmake/find_h3.cmake
@@ -0,0 +1,17 @@
+option (USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of bundled" ${NOT_UNBUNDLED})
+
+if (USE_INTERNAL_H3_LIBRARY)
+    set (H3_LIBRARY h3)
+    set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include)
+else ()
+    find_library (H3_LIBRARY h3)
+    find_path (H3_INCLUDE_DIR NAMES geoCoord.h PATHS ${H3_INCLUDE_PATHS})
+endif ()
+
+if (H3_LIBRARY AND H3_INCLUDE_DIR)
+    set (USE_H3 1)
+else ()
+    set (USE_H3 0)
+endif ()
+
+message (STATUS "Using h3=${USE_H3}: ${H3_INCLUDE_DIR} : ${H3_LIBRARY}")
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 8d1d0ecd150..7861940412c 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -106,6 +106,11 @@ if (USE_INTERNAL_CPUID_LIBRARY)
     add_subdirectory (libcpuid)
 endif ()
 
+if (USE_INTERNAL_H3_LIBRARY)
+    add_subdirectory(h3)
+endif ()
+
+
 if (USE_INTERNAL_SSL_LIBRARY)
     if (NOT MAKE_STATIC_LIBRARIES)
         set (BUILD_SHARED 1)
diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt
index 6b4cfab15c1..80eb62b86d0 100644
--- a/dbms/src/Functions/CMakeLists.txt
+++ b/dbms/src/Functions/CMakeLists.txt
@@ -19,6 +19,7 @@ target_link_libraries(clickhouse_functions
         ${FARMHASH_LIBRARIES}
         ${METROHASH_LIBRARIES}
         murmurhash
+        m
         ${BASE64_LIBRARY}
         ${OPENSSL_CRYPTO_LIBRARY})
 
@@ -60,3 +61,8 @@ if (USE_XXHASH)
     target_link_libraries(clickhouse_functions PRIVATE ${XXHASH_LIBRARY})
     target_include_directories(clickhouse_functions SYSTEM PRIVATE ${XXHASH_INCLUDE_DIR})
 endif()
+
+if (USE_H3)
+    target_link_libraries(clickhouse_functions PRIVATE ${H3_LIBRARY})
+    target_include_directories(clickhouse_functions SYSTEM PRIVATE ${H3_INCLUDE_DIR})
+endif()
diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
new file mode 100644
index 00000000000..a4394e8940c
--- /dev/null
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -0,0 +1,171 @@
+#include <array>
+#include <math.h>
+#include <Functions/FunctionFactory.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/IFunction.h>
+#include <Common/typeid_cast.h>
+#include <ext/range.h>
+
+
+extern "C" {
+#include <h3Index.h>
+}
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
+/// Implements the function geoToH3 which takes 3 arguments (latitude, longitude and h3 resolution)
+/// and returns h3 index of this point
+class FunctionGeoToH3 : public IFunction
+{
+public:
+    static constexpr auto name = "geoToH3";
+
+    FunctionGeoToH3(const Context & context) : context(context) {}
+
+    static FunctionPtr create(const Context & context) { return std::make_shared<FunctionGeoToH3>(context); }
+
+    std::string getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 3; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        auto arg = arguments[0].get();
+        if (!WhichDataType(arg).isFloat64())
+            throw Exception(
+                "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be Float64",
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        arg = arguments[1].get();
+        if (!WhichDataType(arg).isFloat64())
+            throw Exception(
+                "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be Float64",
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        arg = arguments[2].get();
+        if (!WhichDataType(arg).isUInt8())
+            throw Exception(
+                "Illegal type " + arg->getName() + " of argument " + std::to_string(3) + " of function " + getName() + ". Must be UInt8",
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        return std::make_shared<DataTypeUInt64>();
+    }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
+    {
+        int const_cnt = 0;
+        const auto size = input_rows_count;
+
+        for (const auto idx : ext::range(0, 2))
+        {
+            const auto column = block.getByPosition(arguments[idx]).column.get();
+            if (typeid_cast<const ColumnConst *>(column))
+            {
+                ++const_cnt;
+            }
+            else if (!typeid_cast<const ColumnVector<Float64> *>(column))
+            {
+                throw Exception(
+                    "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
+            }
+        }
+
+        double resolution = 0;
+        bool is_const_resulution = false;
+        {
+            const auto column = block.getByPosition(arguments[2]).column.get();
+            if (typeid_cast<const ColumnConst *>(column))
+            {
+                is_const_resulution = true;
+                const auto col_const_res = static_cast<const ColumnConst *>(column);
+                resolution = col_const_res->getValue<UInt8>();
+            }
+            else if (!typeid_cast<const ColumnVector<UInt8> *>(column))
+            {
+                throw Exception(
+                    "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
+            }
+            else if (const_cnt == 2)
+            {
+                throw Exception(
+                    "Illegal type " + column->getName() + " of arguments 3 of function " + getName()
+                        + ". It must be const if arguments 1 and 2 are consts.",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            }
+        }
+
+
+        const auto col_lat = block.getByPosition(arguments[0]).column.get();
+        const auto col_lon = block.getByPosition(arguments[1]).column.get();
+        const auto col_res = block.getByPosition(arguments[2]).column.get();
+        if (const_cnt == 0)
+        {
+            const auto col_vec_lat = static_cast<const ColumnVector<Float64> *>(col_lat);
+            const auto col_vec_lon = static_cast<const ColumnVector<Float64> *>(col_lon);
+            const auto col_vec_res = static_cast<const ColumnVector<UInt8> *>(col_res);
+
+            auto dst = ColumnVector<UInt64>::create();
+            auto & dst_data = dst->getData();
+            dst_data.resize(size);
+
+            for (const auto row : ext::range(0, size))
+            {
+                const double lat = col_vec_lat->getData()[row];
+                const double lon = col_vec_lon->getData()[row];
+                if (!is_const_resulution)
+                {
+                    resolution = col_vec_res->getData()[row];
+                }
+
+                GeoCoord coord;
+                setGeoDegs(&coord, lat, lon);
+
+                H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution);
+
+                dst_data[row] = hindex;
+            }
+
+            block.getByPosition(result).column = std::move(dst);
+        }
+        else if (const_cnt == 2)
+        {
+            const auto col_const_lat = static_cast<const ColumnConst *>(col_lat);
+            const auto col_const_lon = static_cast<const ColumnConst *>(col_lon);
+
+            const double lat = col_const_lat->getValue<Float64>();
+            const double lon = col_const_lon->getValue<Float64>();
+
+            GeoCoord coord;
+            setGeoDegs(&coord, lat, lon);
+            H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution);
+
+            block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex);
+        }
+        else
+        {
+            throw Exception(
+                "Illegal types " + col_lat->getName() + ", " + col_lon->getName() + " of arguments 1, 2 of function " + getName()
+                    + ". All must be either const or vector",
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+    }
+
+private:
+    const Context & context;
+};
+
+
+void registerFunctionGeoToH3(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionGeoToH3>(FunctionFactory::CaseInsensitive);
+}
+
+}
diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp
index 86d630260ec..6de25cf733c 100644
--- a/dbms/src/Functions/registerFunctions.cpp
+++ b/dbms/src/Functions/registerFunctions.cpp
@@ -42,6 +42,7 @@ void registerFunctionsGeo(FunctionFactory &);
 void registerFunctionsNull(FunctionFactory &);
 void registerFunctionsFindCluster(FunctionFactory &);
 void registerFunctionTransform(FunctionFactory &);
+void registerFunctionGeoToH3(FunctionFactory &);
 
 #if USE_ICU
 void registerFunctionConvertCharset(FunctionFactory &);
@@ -83,6 +84,7 @@ void registerFunctions()
     registerFunctionsNull(factory);
     registerFunctionsFindCluster(factory);
     registerFunctionTransform(factory);
+    registerFunctionGeoToH3(factory);
 
 #if USE_ICU
     registerFunctionConvertCharset(factory);
diff --git a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl
index 72572b775a5..28ae90ec139 100755
--- a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl
+++ b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl
@@ -133,7 +133,7 @@ sub main {
         split /[\s;,]+/,
         $ENV{SQL_FUZZY_FUNCTIONS}
           || file_read($ENV{SQL_FUZZY_FILE_FUNCTIONS} || 'clickhouse-functions')
-          || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND'
+          || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses geoToH3 transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND'
     ];
     # $functions = [grep { not $_ ~~ [qw( )] } @$functions];    # will be removed
     # select name from system.table_functions format TSV;
diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference b/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference
new file mode 100644
index 00000000000..ad594f0e81f
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference
@@ -0,0 +1,20 @@
+644325529094369568
+639821928864584823
+644325528491955313
+644325528491955313
+644325528627451570
+644325529094369568
+644325528491955313
+644325528491955313
+644325528491955313
+644325528627451570
+644325529094369568
+55.720762	37.598135	644325528491955313
+55.720762	37.598135	644325528491955313
+55.72076201	37.598135	644325528491955313
+55.763241	37.660183	644325528627451570
+55.77922738	37.63098076	644325529094369568
+639821928864584823	1
+644325528491955313	2
+644325528627451570	1
+644325529094369568	1
diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql
new file mode 100644
index 00000000000..38a60c0061e
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql
@@ -0,0 +1,19 @@
+USE test;
+
+DROP TABLE IF EXISTS table1;
+
+CREATE TABLE table1 (lat Float64, lon Float64, resolution UInt8) ENGINE = Memory;
+
+INSERT INTO table1 VALUES(55.77922738, 37.63098076, 15);
+INSERT INTO table1 VALUES(55.76324100, 37.66018300, 15);
+INSERT INTO table1 VALUES(55.72076200, 37.59813500, 15);
+INSERT INTO table1 VALUES(55.72076201, 37.59813500, 15);
+INSERT INTO table1 VALUES(55.72076200, 37.59813500, 14);
+
+select geoToH3(55.77922738, 37.63098076, 15);
+select geoToH3(lat, lon, resolution) from table1 order by lat, lon, resolution;
+select geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15);
+select lat, lon, geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15);
+select geoToH3(lat, lon, resolution), count(*) from table1 group by geoToH3(lat, lon, resolution) order by geoToH3(lat, lon, resolution);
+
+DROP TABLE table1
diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md
index 4423a167e2e..ec1033eb49b 100644
--- a/docs/ru/query_language/functions/geo.md
+++ b/docs/ru/query_language/functions/geo.md
@@ -99,4 +99,37 @@ SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
 └─────┘
 ```
 
+## geoToH3
+
+Получает H3 индекс точки (lat, lon) с заданным разрешением
+
+```
+pointInPolygon(lat, lon, resolution)
+```
+
+**Входные значения**
+
+- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md).
+- `lon` - географическая долгота. Тип данных — [Float64](../../data_types/float.md).
+- `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`.
+
+Параметры `lat` и `lon` должны быть одновременно или константными, или нет. Если параметры `lat` и `lon` не являются константными, то параметр `resolution` не может быть константным.
+
+**Возвращаемые значения**
+
+Возвращает значение с типом [UInt64] (../../data_types/int_uint.md).
+`0` в случае ошибки.
+Иначе возвращается индексный номер шестиугольника.
+
+**Пример**
+
+``` sql
+SELECT geoToH3(55.71290588, 37.79506683, 15) as h3Index
+```
+```
+┌────────────h3Index─┐
+│ 644325524701193974 │
+└────────────────────┘
+```
+
 [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/geo/) <!--hide-->

From 879d746abd95c4ccd7744833606254f13d9b4eea Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 7 Apr 2019 01:20:09 +0300
Subject: [PATCH 007/191] Miscellaneous changes

---
 dbms/src/Core/Defines.h                        | 4 +++-
 dbms/src/DataStreams/ParallelInputsProcessor.h | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h
index 0a3b384797d..a59c7ddd01e 100644
--- a/dbms/src/Core/Defines.h
+++ b/dbms/src/Core/Defines.h
@@ -86,7 +86,7 @@
 #define PLATFORM_NOT_SUPPORTED "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress)"
 
 #if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__)
-//    #error PLATFORM_NOT_SUPPORTED
+    #error PLATFORM_NOT_SUPPORTED
 #endif
 
 /// Check for presence of address sanitizer
@@ -112,10 +112,12 @@
 #if defined(__clang__)
     #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
     #define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
+    #define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
 #else
     /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
     #define NO_SANITIZE_UNDEFINED
     #define NO_SANITIZE_ADDRESS
+    #define NO_SANITIZE_THREAD
 #endif
 
 #if defined __GNUC__ && !defined __clang__
diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h
index 9c7a1fc6928..43e66f4a894 100644
--- a/dbms/src/DataStreams/ParallelInputsProcessor.h
+++ b/dbms/src/DataStreams/ParallelInputsProcessor.h
@@ -100,7 +100,7 @@ public:
         try
         {
             for (size_t i = 0; i < max_threads; ++i)
-                threads.emplace_back([=] () { thread(thread_group, i); });
+                threads.emplace_back(&ParallelInputsProcessor::thread, this, std::move(thread_group), i);
         }
         catch (...)
         {

From 6df315a9859a5b5456d3269933a6de7101d58b7c Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Tue, 16 Apr 2019 11:57:46 +0700
Subject: [PATCH 008/191] Add a way to accept URL without scheme for domain and
 topLevelDomain

---
 dbms/src/Functions/domain.h                   | 30 +++++++++++--------
 .../0_stateless/00398_url_functions.reference |  2 ++
 .../0_stateless/00398_url_functions.sql       |  2 ++
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h
index 53a6d7ed4da..e96c37783f8 100644
--- a/dbms/src/Functions/domain.h
+++ b/dbms/src/Functions/domain.h
@@ -9,27 +9,31 @@ namespace DB
 {
 
 /// Extracts host from given url.
+template <bool ignore_scheme = true>
 inline StringRef getURLHost(const char * data, size_t size)
 {
     Pos pos = data;
     Pos end = data + size;
 
-    if (end == (pos = find_first_symbols<'/'>(pos, end)))
-        return {};
-
-    if (pos != data)
+    if (!ignore_scheme || strncmp("www.", data, 4))
     {
-        StringRef scheme = getURLScheme(data, size);
-        Pos scheme_end = data + scheme.size;
-
-        // Colon must follows after scheme.
-        if (pos - scheme_end != 1 || *scheme_end != ':')
+        if (end == (pos = find_first_symbols<'/'>(pos, end)))
             return {};
-    }
 
-    if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
-        return {};
-    pos += 2;
+        if (pos != data)
+        {
+            StringRef scheme = getURLScheme(data, size);
+            Pos scheme_end = data + scheme.size;
+
+            // Colon must follows after scheme.
+            if (pos - scheme_end != 1 || *scheme_end != ':')
+                return {};
+        }
+
+        if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
+            return {};
+        pos += 2;
+    }
 
     const char * start_of_host = pos;
     for (; pos < end; ++pos)
diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference
index e4a31f0654a..bb56b61ea2d 100644
--- a/dbms/tests/queries/0_stateless/00398_url_functions.reference
+++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference
@@ -12,6 +12,7 @@ www.example.com
 127.0.0.1
 www.example.com
 www.example.com
+www.example.com
 example.com
 example.com
 ====DOMAIN====
@@ -20,6 +21,7 @@ com
 ru
 ru
 com
+com
 ====PATH====
 П
 %D%9
diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql
index 16425dae46d..1358852a25c 100644
--- a/dbms/tests/queries/0_stateless/00398_url_functions.sql
+++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql
@@ -13,6 +13,7 @@ SELECT domain('http://www.example.com?q=4') AS Host;
 SELECT domain('http://127.0.0.1:443/') AS Host;
 SELECT domain('//www.example.com') AS Host;
 SELECT domain('//paul@www.example.com') AS Host;
+SELECT domain('www.example.com') as Host;
 SELECT domainWithoutWWW('//paul@www.example.com') AS Host;
 SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
 
@@ -23,6 +24,7 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
 SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
 SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
 SELECT topLevelDomain('//www.example.com') AS Domain;
+SELECT topLevelDomain('www.google.com') as Domain;
 
 SELECT '====PATH====';
 SELECT decodeURLComponent('%D0%9F');

From 593dcbb33ff3455c2b0424ba393016883e389135 Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Thu, 18 Apr 2019 10:32:42 +0700
Subject: [PATCH 009/191] Handle URL without www and scheme for domain and
 topleveldomain function

---
 dbms/src/Functions/domain.h                   | 38 +++++++++++--------
 .../0_stateless/00398_url_functions.reference |  2 +
 .../0_stateless/00398_url_functions.sql       |  4 +-
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h
index e96c37783f8..9ae9393405e 100644
--- a/dbms/src/Functions/domain.h
+++ b/dbms/src/Functions/domain.h
@@ -15,26 +15,32 @@ inline StringRef getURLHost(const char * data, size_t size)
     Pos pos = data;
     Pos end = data + size;
 
-    if (!ignore_scheme || strncmp("www.", data, 4))
+    if (end == (pos = find_first_symbols<'/'>(pos, end)))
     {
-        if (end == (pos = find_first_symbols<'/'>(pos, end)))
+        if (ignore_scheme)
+            pos = data;
+        else
             return {};
-
-        if (pos != data)
-        {
-            StringRef scheme = getURLScheme(data, size);
-            Pos scheme_end = data + scheme.size;
-
-            // Colon must follows after scheme.
-            if (pos - scheme_end != 1 || *scheme_end != ':')
-                return {};
-        }
-
-        if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
-            return {};
-        pos += 2;
     }
 
+    if (pos != data)
+    {
+        StringRef scheme = getURLScheme(data, size);
+        Pos scheme_end = data + scheme.size;
+
+        // Colon must follows after scheme.
+        if (pos - scheme_end != 1 || *scheme_end != ':')
+            return {};
+    }
+
+    if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
+    {
+        if (!ignore_scheme)
+            return {};
+    }
+    else
+        pos += 2;
+
     const char * start_of_host = pos;
     for (; pos < end; ++pos)
     {
diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference
index bb56b61ea2d..e5cead07b9c 100644
--- a/dbms/tests/queries/0_stateless/00398_url_functions.reference
+++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference
@@ -15,6 +15,7 @@ www.example.com
 www.example.com
 example.com
 example.com
+example.com
 ====DOMAIN====
 com
 
@@ -22,6 +23,7 @@ ru
 ru
 com
 com
+com
 ====PATH====
 П
 %D%9
diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql
index 1358852a25c..cfe419f6dc3 100644
--- a/dbms/tests/queries/0_stateless/00398_url_functions.sql
+++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql
@@ -14,6 +14,7 @@ SELECT domain('http://127.0.0.1:443/') AS Host;
 SELECT domain('//www.example.com') AS Host;
 SELECT domain('//paul@www.example.com') AS Host;
 SELECT domain('www.example.com') as Host;
+SELECT domain('example.com') as Host;
 SELECT domainWithoutWWW('//paul@www.example.com') AS Host;
 SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
 
@@ -24,7 +25,8 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
 SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
 SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
 SELECT topLevelDomain('//www.example.com') AS Domain;
-SELECT topLevelDomain('www.google.com') as Domain;
+SELECT topLevelDomain('www.example.com') as Domain;
+SELECT topLevelDomain('example.com') as Domain;
 
 SELECT '====PATH====';
 SELECT decodeURLComponent('%D0%9F');

From 596464697173ef3f6f76074e421bb3a7e4494743 Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Fri, 19 Apr 2019 11:02:25 +0700
Subject: [PATCH 010/191] Remove template for getURLHost function for skip
 scheme

---
 dbms/src/Functions/domain.h | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h
index 9ae9393405e..a1949b44d9c 100644
--- a/dbms/src/Functions/domain.h
+++ b/dbms/src/Functions/domain.h
@@ -9,18 +9,19 @@ namespace DB
 {
 
 /// Extracts host from given url.
-template <bool ignore_scheme = true>
 inline StringRef getURLHost(const char * data, size_t size)
 {
     Pos pos = data;
     Pos end = data + size;
 
-    if (end == (pos = find_first_symbols<'/'>(pos, end)))
+    Pos slash_pos = find_first_symbols<'/'>(pos, end);
+    if (slash_pos != end)
     {
-        if (ignore_scheme)
-            pos = data;
-        else
-            return {};
+        pos = slash_pos;
+    }
+    else
+    {
+        pos = data;
     }
 
     if (pos != data)
@@ -33,12 +34,8 @@ inline StringRef getURLHost(const char * data, size_t size)
             return {};
     }
 
-    if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
-    {
-        if (!ignore_scheme)
-            return {};
-    }
-    else
+    // Check with we still have // character from the scheme
+    if (!(end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/'))
         pos += 2;
 
     const char * start_of_host = pos;

From b567127f4988f7edc23c8dedc98dc360482efe90 Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Tue, 23 Apr 2019 12:23:59 +0700
Subject: [PATCH 011/191] Doing more URL check on domain and topLevelDomain
 function

---
 dbms/src/Functions/domain.h | 50 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h
index a1949b44d9c..3c16e02b595 100644
--- a/dbms/src/Functions/domain.h
+++ b/dbms/src/Functions/domain.h
@@ -8,6 +8,42 @@
 namespace DB
 {
 
+static inline bool isUnsafeCharUrl(char c)
+{
+    switch (c)
+    {
+        case ' ':
+        case '\t':
+        case '<':
+        case '>':
+        case '#':
+        case '%':
+        case '{':
+        case '}':
+        case '|':
+        case '\\':
+        case '^':
+        case '~':
+        case '[':
+        case ']':
+            return true;
+    }
+    return false;
+}
+
+static inline bool isEndOfUrl(char c)
+{
+    switch (c)
+    {
+        case ':':
+        case '/':
+        case '?':
+        case '#':
+            return true;
+    }
+    return false;
+}
+
 /// Extracts host from given url.
 inline StringRef getURLHost(const char * data, size_t size)
 {
@@ -39,14 +75,26 @@ inline StringRef getURLHost(const char * data, size_t size)
         pos += 2;
 
     const char * start_of_host = pos;
+    bool has_dot_delimiter = false;
     for (; pos < end; ++pos)
     {
         if (*pos == '@')
             start_of_host = pos + 1;
-        else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#')
+        else if (*pos == '.')
+        {
+            if (pos + 1 == end || isEndOfUrl(*(pos + 1)))
+                return StringRef{};
+            has_dot_delimiter = true;
+        }
+        else if (isEndOfUrl(*pos))
             break;
+        else if (isUnsafeCharUrl(*pos))
+            return StringRef{};
     }
 
+    if (!has_dot_delimiter)
+        return StringRef{};
+
     return (pos == start_of_host) ? StringRef{} : StringRef(start_of_host, pos - start_of_host);
 }
 

From c93893576667ab9d42f048d8efd8a9e7027d78fc Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Tue, 23 Apr 2019 12:35:12 +0700
Subject: [PATCH 012/191] Update tests for domain and top level domain scheme
 removal

---
 ...0381_first_significant_subdomain.reference |   2 +-
 .../00037_uniq_state_merge1.reference         |  76 ++++----
 .../00038_uniq_state_merge2.reference         | 180 +++++++++---------
 .../00044_any_left_join_string.reference      |  12 +-
 ..._functions_with_non_constant_arg.reference |   7 +-
 5 files changed, 137 insertions(+), 140 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference
index 7f8c9ba186c..37a82987f34 100644
--- a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference
+++ b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference
@@ -1,3 +1,3 @@
 canada	congo	net-domena
 yandex	yandex		yandex	yandex	яндекс	яндекс	yandex
-canada	hello	hello	hello	hello	hello									canada	canada
+canada		hello	hello											canada	
diff --git a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference
index d9ca7e3be21..f0a48df0f91 100644
--- a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference
+++ b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference
@@ -1,24 +1,16 @@
-yandex.ru	25107	25107
-	21999	21999
-public_search	16749	16749
+	85185	85185
+yandex.ru	25106	25106
 avito.ru	16523	16523
-public	15429	15429
-mail.yandex.ru	13663	13663
-yandsearch	10039	10039
-news	8827	8827
+mail.yandex.ru	13662	13662
 mail.ru	7643	7643
-doc	7537	7537
 auto.ru	7350	7350
 hurpass.com	6395	6395
 best.ru	5477	5477
 tv.yandex.ru	5341	5341
 korer.ru	4967	4967
-mail.yandsearch	4246	4246
-cars	4077	4077
-publ	3970	3970
-yandex	3845	3845
+mail.yandsearch	4244	4244
 main=hurriyet.com	3806	3806
-yandex.ua	3803	3803
+yandex.ua	3802	3802
 korablitz.ru	3717	3717
 uyelik.hurriyet.com	3584	3584
 e.mail.ru	3508	3508
@@ -28,46 +20,32 @@ coccoc.com	2707	2707
 rutube.ru	2699	2699
 rbc.ru	2644	2644
 mamba.ru	2598	2598
-video	2558	2558
-mail.yandex	2447	2447
-wot	2253	2253
+mail.yandex	2443	2443
 pikabu.ru	2130	2130
 yandex.php	2057	2057
 e.mail.yandex.ru	1971	1971
 brandex.ru	1969	1969
-bravoslava-230v	1942	1942
-search	1933	1933
 market.ru	1913	1913
 mynet.ru	1881	1881
-mail	1845	1845
-mail.yandex.ua	1825	1825
+mail.yandex.ua	1824	1824
 rutube.com	1821	1821
-images	1812	1812
 news.rambler.com	1787	1787
 hurpass.com.tr	1763	1763
 ads.search	1742	1742
-marina_2_sezon	1680	1680
 cars.auto.ru	1628	1628
 cian.ru	1620	1620
 ivi.ru	1617	1617
 av.by	1598	1598
-world	1596	1596
 news.yandex.ru	1495	1495
 vk.com	1474	1474
-pub	1469	1469
-forum	1414	1414
 wow-girls.ru	1399	1399
-kinogo-dhpWXEdIcgoxWUZ6fgdTWw..	1338	1338
 uyelik.hurriyet.com.tr	1330	1330
 aukro.ua	1314	1314
-plugins	1244	1244
 images.yandsearch	1235	1235
 ondom.ru	1221	1221
 korablitz.com	1189	1189
-videovol-9-sezon	1187	1187
 kerl.org	1155	1155
 mail.yandex.php	1148	1148
-file	1147	1147
 love.mail.yandex.ru	1136	1136
 yandex.kz	1124	1124
 coccoc.com.tr	1113	1113
@@ -77,24 +55,46 @@ sprashivai.ru	1072	1072
 market.yandex.ru	1064	1064
 spb-n.ru	1056	1056
 sz.spaces.ru	1055	1055
-xofx.net%2F63857&secret-oper=reply&id=0&extras]	1054	1054
 marinance.ua	1050	1050
 tube.ru	1044	1044
 haber.com	1043	1043
-image&img_url=http	1042	1042
-sport	1040	1040
 megogo.net	993	993
 sozcu.com	991	991
 yandex.by	938	938
-image&uinfo	936	936
 fast-golove.mail.ru_Mobile=0&at=35&text=производств	927	927
-linka	901	901
 gazeta.ru	892	892
-yandex.ru;yandex.ru	892	892
-kinogo-dhpWXEdIcgoxWUZ6fgdTXA..	890	890
+yandex.ru;yandex.ru	891	891
 fotki.yandex.ru	875	875
 fast-golove.mail.yandex.php	842	842
-news=previews	839	839
-faber	833	833
 lenta.ru	820	820
 publicdaroglundai_anketa.ru	813	813
+mail.yandex.kz	810	810
+censor.net	807	807
+mail.yandex.by	805	805
+nnn.ru	796	796
+maxi.su	788	788
+rambler.ru	755	755
+hurpass.com.ua	729	729
+g1.botva.lv	728	728
+m.sport.airway	724	724
+tvizle.com	723	723
+fast-golove.mail.yandex.ru	712	712
+spb.ru	693	693
+eksisozluk.com	689	689
+uyelik.hurriyet	666	666
+rst.ua	650	650
+deko.ru	647	647
+my.mail.yandex.ru	647	647
+astrov.pro	625	625
+yandsearch.php	624	624
+kinogo.net	617	617
+fanati-avtomobile.jsp	611	611
+tv.yandsearch	605	605
+soft.ru	603	603
+pluginplus.ru	601	601
+images.yandex	595	595
+1tv.rbc.ru	592	592
+ria.ru	591	591
+marina_prezideniz.hurriyet.com	578	578
+youtube.ru	575	575
+cars.autochno.ru	570	570
diff --git a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
index 926cb1911ba..0ea3994f453 100644
--- a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
+++ b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
@@ -1,100 +1,100 @@
-	582035	80248
-ru	299420	71339
-com	78253	34500
-html	40288	19569
-ua	33160	18847
-tr	19570	13117
-net	19003	12908
-php	17817	12011
-yandsearch	13598	10329
-by	9349	7695
-yandex	8946	7282
-org	5897	5320
-tv	5371	4660
-kz	5175	4588
-aspx	3084	2800
-phtml	3012	2725
-xml	2993	2726
-tr&callback_url=http	2897	2681
-su	2833	2587
-shtml	2442	2218
-hurriyet	2030	1907
-search	1915	1904
-tr&user	1556	1494
-jpg	1531	1427
-tr&users	1449	1373
-tr&callback	1294	1244
-jsp	1083	1048
-net%2F63857&secret-oper=reply&id=0&extras]	1054	1054
-htm	957	921
+ru	282356	70396
+	96315	85424
+com	72459	33535
+html	33195	17665
+ua	31469	18290
+tr	18197	12501
+net	17633	12318
+php	16416	11358
+yandsearch	12917	9992
+by	8690	7254
+yandex	8004	6661
+org	5397	4920
+tv	4957	4360
+kz	4915	4388
+phtml	2754	2529
+tr&callback_url=http	2742	2556
+su	2731	2505
+xml	2731	2520
+aspx	2593	2394
+search	1854	1844
+shtml	1788	1688
+hurriyet	1517	1468
+tr&user	1469	1419
+tr&users	1361	1299
+tr&callback	1197	1157
+jpg	1129	1094
+jsp	1028	1000
 ru_Mobile=0&at=35&text=производств	927	927
-lv	916	910
-tr&user_page	916	885
-exe	911	891
-me	911	864
-tr&user_page=http	900	868
-do	864	838
-tr&used	782	768
-pro	778	772
+lv	897	892
+tr&user_page=http	859	831
+tr&user_page	858	834
+exe	853	837
+me	791	754
+tr&used	761	747
+pro	757	753
+htm	756	736
+do	747	730
 airway	724	724
-biz	685	672
-mail	677	660
-info	593	575
-tr&callback_url=https	534	526
-tr%2Fgaleri	533	522
+mail	632	618
+biz	623	612
+info	525	515
 bstatistik_dlja-dlya-naches	521	521
-sx	498	496
-ru%2Fupload	497	492
-news	492	487
-hu	486	479
-aspx&referer	473	459
+tr&callback_url=https	508	501
+news	469	464
+sx	465	464
 pogoda	460	460
-auto	438	429
-az	434	425
-net%2F63857&secret=506d9e3dfbd268e6b6630e58	432	432
+hu	432	429
 sportlibrary	431	431
-jpg,http	411	397
-tr&callbusiness	410	407
-fm	405	400
-online	401	399
-tr&callbusines	388	384
-ru%2Fnews	387	382
+aspx&referer	407	395
+auto	406	398
+tr&callbusiness	398	395
+az	393	385
+fm	387	386
+online	382	380
 bstatistic	366	366
-wbp	346	346
-am	336	333
-ru;yandsearch	330	328
-tr&user_page=https	330	328
-tr&callback_url	329	319
-html&lang=ru&lr=110&category=dressages%2Fcs306755	328	328
-pl	328	326
-blog	327	326
-jpg&pos	307	302
-bstana	305	305
-ru;yandex	287	284
-im	283	278
-diary	277	275
-slando	276	274
-eu	274	269
-to	271	269
-asp	253	250
-html&lang	253	248
-mynet	253	251
-tj	242	241
-sberbank	241	238
-haber	234	227
-jpg,https	232	232
-cc	226	221
+tr&callbusines	365	362
+wbp	344	344
+tr&user_page=https	326	325
+ru;yandsearch	322	321
+tr&callback_url	307	297
+bstana	304	304
+am	292	290
+ru;yandex	279	276
+blog	274	274
+pl	273	271
+diary	267	265
+jpg,http	267	261
+slando	260	258
+eu	256	253
+im	248	245
+to	242	240
+tj	231	231
+mynet	226	225
+sberbank	224	224
 _2544	222	222
-ws	221	219
-mamba	220	220
 liveinteria	218	218
-tr%2Fanasayfa	215	210
+mamba	217	217
+jpg,https	215	215
 tr&user_pts=&states	213	213
-yandsearchplus	212	211
-jpg","photo	211	209
-ru%2Fwww	211	211
-com&callback_url=http	209	208
+jpg&pos	212	210
+html&lang	211	207
 auto-supers	208	208
-co	206	205
-kg	206	205
-ru%2Fuploads	206	205
+yandsearchplus	205	204
+asp	201	198
+aspx&referera	201	201
+bstatistik_dlja-dlya_avia	201	201
+bstanii_otryasam	200	200
+wroad_5d	200	200
+com&callback_url=http	198	198
+ru&pos=3_0	198	198
+haber	196	191
+ws	194	193
+kg	191	190
+video	190	190
+co	188	188
+bstan	187	187
+swf	186	186
+cc	185	182
+turkasovki	183	183
+wssp	176	176
diff --git a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
index a96e3c9f457..f5ef97e01d7 100644
--- a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
+++ b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
@@ -1,10 +1,10 @@
+	3959563	550936
 auto.ru	576845	8935
-yandex.ru	410788	111278
-public	328528	23
-	313516	26015
-public_search	311125	0
+yandex.ru	410783	111278
 korer.ru	277987	0
 avito.ru	163820	15556
-mail.yandex.ru	152469	1046
+mail.yandex.ru	152468	1046
 main=hurriyet.com	152096	259
-wot	116912	6682
+mail.ru	87949	22225
+best.ru	58537	55
+korablitz.ru	51844	0
diff --git a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference
index ad9a93d1113..530d00668a4 100644
--- a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference
+++ b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference
@@ -2,8 +2,5 @@
 0
 0
 0
-http://игры на передачи пригорька россия&lr=213&rpt=simage&uinfo=ww-1905-wh-643-fw-112-rossiisoft.in.ua%2FKievav@yandex?appkey=506d9e3dfbd268e6b6630e58
-http://игры на передачи пригорька россия&lr=213&rpt=simage&uinfo=ww-1905-wh-643-fw-112-rossiisoft.in.ua%2FKievav@yandex?appkey=506d9e3dfbd268e6b6630e58
-http://ru	slovari	15
-https://ru	spb.rabota	15
-https://e	yandex	12
+http://topicId=323145-EXC=1-PG=10&from=distriruyu-redakciy-lakovora-dalgames.mail@mail.yandex.ru/yandex.ru/news.mail
+http://plugin_sd=1&ie=UTF-8&l=vi&p=AhY_cQZSQQ5JBlUEZVcJG1F4XldSeWNjVEdhen83@mail.yandsearch

From 3639f03bc607f65f57504c3819cb7edff3e6eaa1 Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Tue, 23 Apr 2019 12:56:24 +0700
Subject: [PATCH 013/191] Check if we got a reserved char on url for domain and
 top level domain

---
 dbms/src/Functions/domain.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h
index 3c16e02b595..10251f355b6 100644
--- a/dbms/src/Functions/domain.h
+++ b/dbms/src/Functions/domain.h
@@ -31,7 +31,7 @@ static inline bool isUnsafeCharUrl(char c)
     return false;
 }
 
-static inline bool isEndOfUrl(char c)
+static inline bool isCharEndOfUrl(char c)
 {
     switch (c)
     {
@@ -44,6 +44,22 @@ static inline bool isEndOfUrl(char c)
     return false;
 }
 
+static inline bool isReservedCharUrl(char c)
+{
+    switch (c)
+    {
+        case ';':
+        case '/':
+        case '?':
+        case ':':
+        case '@':
+        case '=':
+        case '&':
+            return true;
+    }
+    return false;
+}
+
 /// Extracts host from given url.
 inline StringRef getURLHost(const char * data, size_t size)
 {
@@ -82,13 +98,13 @@ inline StringRef getURLHost(const char * data, size_t size)
             start_of_host = pos + 1;
         else if (*pos == '.')
         {
-            if (pos + 1 == end || isEndOfUrl(*(pos + 1)))
+            if (pos + 1 == end || isCharEndOfUrl(*(pos + 1)))
                 return StringRef{};
             has_dot_delimiter = true;
         }
         else if (isEndOfUrl(*pos))
             break;
-        else if (isUnsafeCharUrl(*pos))
+        else if (isUnsafeCharUrl(*pos) || isReservedCharUrl(*pos))
             return StringRef{};
     }
 

From 8b0dda39e4e268af282e77c2d24f89f04b3f64c6 Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Tue, 23 Apr 2019 13:08:09 +0700
Subject: [PATCH 014/191] Handle more test and update tests

---
 dbms/src/Functions/domain.h                   |   2 +-
 dbms/src/Functions/tl.sql                     |  26 +++
 .../0_stateless/00398_url_functions.reference |   2 +-
 .../00037_uniq_state_merge1.reference         |  20 +-
 .../00038_uniq_state_merge2.reference         | 176 +++++++++---------
 .../00044_any_left_join_string.reference      |   8 +-
 ..._functions_with_non_constant_arg.reference |   4 +-
 7 files changed, 132 insertions(+), 106 deletions(-)
 create mode 100644 dbms/src/Functions/tl.sql

diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h
index 10251f355b6..37c5a6fe5cd 100644
--- a/dbms/src/Functions/domain.h
+++ b/dbms/src/Functions/domain.h
@@ -102,7 +102,7 @@ inline StringRef getURLHost(const char * data, size_t size)
                 return StringRef{};
             has_dot_delimiter = true;
         }
-        else if (isEndOfUrl(*pos))
+        else if (isCharEndOfUrl(*pos))
             break;
         else if (isUnsafeCharUrl(*pos) || isReservedCharUrl(*pos))
             return StringRef{};
diff --git a/dbms/src/Functions/tl.sql b/dbms/src/Functions/tl.sql
new file mode 100644
index 00000000000..3f9d241d654
--- /dev/null
+++ b/dbms/src/Functions/tl.sql
@@ -0,0 +1,26 @@
+CREATE TABLE ip(
+    a FixedString(16)
+) ENGINE = Memory;
+
+
+
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
+
+WITH IPv6CIDRtoIPv6Range(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'), 40) as test SELECT IPv6NumToString(a) FROM ip WHERE a BETWEEN tupleElement(test, 1) AND tupleElement(test, 2) ;
diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference
index e5cead07b9c..23390c199f0 100644
--- a/dbms/tests/queries/0_stateless/00398_url_functions.reference
+++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference
@@ -20,7 +20,7 @@ example.com
 com
 
 ru
-ru
+
 com
 com
 com
diff --git a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference
index f0a48df0f91..3bedecd267b 100644
--- a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference
+++ b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference
@@ -1,15 +1,14 @@
-	85185	85185
-yandex.ru	25106	25106
+	89348	89348
+yandex.ru	25105	25105
 avito.ru	16523	16523
-mail.yandex.ru	13662	13662
+mail.yandex.ru	13659	13659
 mail.ru	7643	7643
 auto.ru	7350	7350
 hurpass.com	6395	6395
 best.ru	5477	5477
 tv.yandex.ru	5341	5341
 korer.ru	4967	4967
-mail.yandsearch	4244	4244
-main=hurriyet.com	3806	3806
+mail.yandsearch	4237	4237
 yandex.ua	3802	3802
 korablitz.ru	3717	3717
 uyelik.hurriyet.com	3584	3584
@@ -20,14 +19,14 @@ coccoc.com	2707	2707
 rutube.ru	2699	2699
 rbc.ru	2644	2644
 mamba.ru	2598	2598
-mail.yandex	2443	2443
+mail.yandex	2441	2441
 pikabu.ru	2130	2130
 yandex.php	2057	2057
 e.mail.yandex.ru	1971	1971
 brandex.ru	1969	1969
 market.ru	1913	1913
 mynet.ru	1881	1881
-mail.yandex.ua	1824	1824
+mail.yandex.ua	1823	1823
 rutube.com	1821	1821
 news.rambler.com	1787	1787
 hurpass.com.tr	1763	1763
@@ -61,16 +60,14 @@ haber.com	1043	1043
 megogo.net	993	993
 sozcu.com	991	991
 yandex.by	938	938
-fast-golove.mail.ru_Mobile=0&at=35&text=производств	927	927
 gazeta.ru	892	892
-yandex.ru;yandex.ru	891	891
 fotki.yandex.ru	875	875
 fast-golove.mail.yandex.php	842	842
 lenta.ru	820	820
 publicdaroglundai_anketa.ru	813	813
 mail.yandex.kz	810	810
 censor.net	807	807
-mail.yandex.by	805	805
+mail.yandex.by	804	804
 nnn.ru	796	796
 maxi.su	788	788
 rambler.ru	755	755
@@ -98,3 +95,6 @@ ria.ru	591	591
 marina_prezideniz.hurriyet.com	578	578
 youtube.ru	575	575
 cars.autochno.ru	570	570
+a2.stars.auto.yandsearch	566	566
+love.mail.ru	560	560
+mail.rambler.ru	553	553
diff --git a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
index 0ea3994f453..575d19b2ebf 100644
--- a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
+++ b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
@@ -1,100 +1,100 @@
-ru	282356	70396
-	96315	85424
-com	72459	33535
-html	33195	17665
-ua	31469	18290
-tr	18197	12501
-net	17633	12318
-php	16416	11358
-yandsearch	12917	9992
-by	8690	7254
-yandex	8004	6661
-org	5397	4920
-tv	4957	4360
-kz	4915	4388
-phtml	2754	2529
-tr&callback_url=http	2742	2556
-su	2731	2505
-xml	2731	2520
-aspx	2593	2394
-search	1854	1844
-shtml	1788	1688
-hurriyet	1517	1468
-tr&user	1469	1419
-tr&users	1361	1299
-tr&callback	1197	1157
-jpg	1129	1094
-jsp	1028	1000
-ru_Mobile=0&at=35&text=производств	927	927
-lv	897	892
-tr&user_page=http	859	831
-tr&user_page	858	834
-exe	853	837
-me	791	754
-tr&used	761	747
-pro	757	753
-htm	756	736
-do	747	730
+ru	262911	69218
+	92101	89421
+com	63297	30285
+ua	29037	17475
+html	25077	15037
+tr	16770	11857
+net	16387	11686
+php	14373	10307
+yandsearch	12024	9484
+by	8192	6915
+yandex	7211	6124
+org	4890	4514
+kz	4677	4209
+tv	4400	3928
+su	2602	2396
+phtml	2409	2226
+xml	2322	2182
+aspx	1959	1848
+search	1835	1827
+hurriyet	1385	1345
+shtml	995	966
+lv	879	875
+jsp	855	845
+exe	814	798
+pro	737	734
 airway	724	724
-mail	632	618
-biz	623	612
-info	525	515
+me	675	647
+jpg	662	647
+do	625	611
+mail	593	581
+biz	537	530
 bstatistik_dlja-dlya-naches	521	521
-tr&callback_url=https	508	501
-news	469	464
-sx	465	464
-pogoda	460	460
-hu	432	429
+info	461	453
+pogoda	459	459
+sx	450	449
+news	448	444
 sportlibrary	431	431
-aspx&referer	407	395
-auto	406	398
-tr&callbusiness	398	395
-az	393	385
-fm	387	386
-online	382	380
+hu	396	393
+htm	393	385
+fm	379	378
+online	374	372
 bstatistic	366	366
-tr&callbusines	365	362
-wbp	344	344
-tr&user_page=https	326	325
-ru;yandsearch	322	321
-tr&callback_url	307	297
+auto	363	355
+az	356	350
+wbp	343	343
 bstana	304	304
-am	292	290
-ru;yandex	279	276
-blog	274	274
-pl	273	271
-diary	267	265
-jpg,http	267	261
-slando	260	258
-eu	256	253
-im	248	245
-to	242	240
-tj	231	231
-mynet	226	225
-sberbank	224	224
-_2544	222	222
+blog	268	268
+diary	262	261
+am	260	258
+slando	254	252
+im	238	235
+eu	237	234
 liveinteria	218	218
-mamba	217	217
-jpg,https	215	215
-tr&user_pts=&states	213	213
-jpg&pos	212	210
-html&lang	211	207
+to	215	213
+mamba	214	214
 auto-supers	208	208
-yandsearchplus	205	204
-asp	201	198
-aspx&referera	201	201
+sberbank	207	207
+tj	205	205
 bstatistik_dlja-dlya_avia	201	201
 bstanii_otryasam	200	200
+pl	200	198
 wroad_5d	200	200
-com&callback_url=http	198	198
-ru&pos=3_0	198	198
-haber	196	191
-ws	194	193
-kg	191	190
-video	190	190
-co	188	188
+mynet	191	190
 bstan	187	187
-swf	186	186
-cc	185	182
+yandsearchplus	186	186
+haber	184	179
+jpg,https	184	184
 turkasovki	183	183
-wssp	176	176
+co	177	177
+video	177	177
+gif","photos	175	175
+mgshared_zone	172	172
+wssp	172	172
+jpg,http	170	168
+swf	167	167
+cc	166	164
+ws	164	164
+kg	157	156
+mobili_s_probegom	154	153
+cgi	153	152
+yandsearcher	152	151
+uz	150	150
+nsf	149	149
+adriver	147	144
+slandsearch	143	142
+korrez	140	140
+bstatistik_dlja-dlja-putin	139	139
+rambler	133	132
+mvideo	132	132
+asp	129	128
+vc	127	127
+md	121	121
+jpg","photo	119	119
+mp4	118	117
+ee	116	115
+loveplaceOfSearchplus	111	111
+nl	111	111
+bstatistika	107	107
+br	102	102
+sport	99	99
diff --git a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
index f5ef97e01d7..05e97417263 100644
--- a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
+++ b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
@@ -1,10 +1,10 @@
-	3959563	550936
+	4508175	712434
 auto.ru	576845	8935
-yandex.ru	410783	111278
+yandex.ru	410776	111278
 korer.ru	277987	0
 avito.ru	163820	15556
-mail.yandex.ru	152468	1046
-main=hurriyet.com	152096	259
+mail.yandex.ru	152447	1046
 mail.ru	87949	22225
 best.ru	58537	55
 korablitz.ru	51844	0
+hurpass.com	49671	1251
diff --git a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference
index 530d00668a4..4d0ba2b70f3 100644
--- a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference
+++ b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference
@@ -2,5 +2,5 @@
 0
 0
 0
-http://topicId=323145-EXC=1-PG=10&from=distriruyu-redakciy-lakovora-dalgames.mail@mail.yandex.ru/yandex.ru/news.mail
-http://plugin_sd=1&ie=UTF-8&l=vi&p=AhY_cQZSQQ5JBlUEZVcJG1F4XldSeWNjVEdhen83@mail.yandsearch
+https://povary_dlya-511-gemotedDynamo_accoshyutoy-s-kortosh@bk.ru/yandsearch?text=simages%2F8%2F10544998#posts%2Fkartofeleri
+https://povary_dlya-511-gemotedDynamo_accoshyutoy-s-kortosh@bk.ru/yandsearch?text=simages%2F8%2F10544998#posts%2Fkartofeleri

From d1bca5b6a49ee7aa1383627c0410ed753c97e2e2 Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Fri, 26 Apr 2019 10:59:20 +0700
Subject: [PATCH 015/191] Remove test file

---
 dbms/src/Functions/tl.sql | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 dbms/src/Functions/tl.sql

diff --git a/dbms/src/Functions/tl.sql b/dbms/src/Functions/tl.sql
deleted file mode 100644
index 3f9d241d654..00000000000
--- a/dbms/src/Functions/tl.sql
+++ /dev/null
@@ -1,26 +0,0 @@
-CREATE TABLE ip(
-    a FixedString(16)
-) ENGINE = Memory;
-
-
-
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001'));
-
-WITH IPv6CIDRtoIPv6Range(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'), 40) as test SELECT IPv6NumToString(a) FROM ip WHERE a BETWEEN tupleElement(test, 1) AND tupleElement(test, 2) ;

From b54855addbc1ff0a5aa7898a6784319bc747a238 Mon Sep 17 00:00:00 2001
From: Guillaume Tassery <tassery.guillaume@gmail.com>
Date: Fri, 26 Apr 2019 11:26:48 +0700
Subject: [PATCH 016/191] Add tests for cutToFirstSignificantSubdomain when we
 don't send a scheme

---
 dbms/tests/queries/0_stateless/00398_url_functions.reference | 2 ++
 dbms/tests/queries/0_stateless/00398_url_functions.sql       | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference
index 23390c199f0..acb605597d3 100644
--- a/dbms/tests/queries/0_stateless/00398_url_functions.reference
+++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference
@@ -65,6 +65,8 @@ example.com
 example.com
 example.com
 example.com
+example.com
+example.com
 ====CUT WWW====
 http://example.com
 http://example.com:1234
diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql
index cfe419f6dc3..d301cac5b15 100644
--- a/dbms/tests/queries/0_stateless/00398_url_functions.sql
+++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql
@@ -73,6 +73,8 @@ SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b');
 SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b#d=f');
 SELECT cutToFirstSignificantSubdomain('http://paul@www.example.com/a/b/c?a=b#d=f');
 SELECT cutToFirstSignificantSubdomain('//paul@www.example.com/a/b/c?a=b#d=f');
+SELECT cutToFirstSignificantSubdomain('www.example.com');
+SELECT cutToFirstSignificantSubdomain('example.com');
 
 SELECT '====CUT WWW====';
 SELECT cutWWW('http://www.example.com');

From c661f5c0a10e2aeaee71fd129870d32b5e4e24bc Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Sun, 19 May 2019 00:07:23 +0300
Subject: [PATCH 017/191] new

---
 dbms/CMakeLists.txt                           |  2 +-
 dbms/programs/client/Client.cpp               | 68 ++++++++++++++++++-
 dbms/programs/server/HTTPHandler.cpp          |  6 ++
 .../DataTypes/DataTypeAggregateFunction.cpp   |  8 +++
 .../src/DataTypes/DataTypeAggregateFunction.h |  2 +
 dbms/src/DataTypes/DataTypeCustom.h           |  4 ++
 .../DataTypeCustomSimpleTextSerialization.cpp |  7 ++
 .../DataTypeCustomSimpleTextSerialization.h   |  4 ++
 dbms/src/DataTypes/DataTypeEnum.cpp           |  8 +++
 dbms/src/DataTypes/DataTypeEnum.h             |  2 +
 dbms/src/DataTypes/DataTypeFixedString.cpp    |  6 ++
 dbms/src/DataTypes/DataTypeFixedString.h      |  2 +
 dbms/src/DataTypes/DataTypeLowCardinality.h   |  5 ++
 dbms/src/DataTypes/DataTypeNullable.cpp       |  9 +++
 dbms/src/DataTypes/DataTypeNullable.h         |  1 +
 dbms/src/DataTypes/DataTypeString.cpp         |  6 ++
 dbms/src/DataTypes/DataTypeString.h           |  1 +
 .../DataTypeWithSimpleSerialization.h         |  5 ++
 dbms/src/DataTypes/IDataType.cpp              | 12 ++++
 dbms/src/DataTypes/IDataType.h                |  4 ++
 dbms/src/Interpreters/Context.cpp             | 23 +++++++
 dbms/src/Interpreters/Context.h               |  8 +++
 .../ReplaceQueryParameterVisitor.cpp          | 60 ++++++++++++++++
 .../ReplaceQueryParameterVisitor.h            | 27 ++++++++
 dbms/src/Interpreters/executeQuery.cpp        |  8 +++
 dbms/src/Parsers/ASTQueryParameter.cpp        | 19 ++++++
 dbms/src/Parsers/ASTQueryParameter.h          | 27 ++++++++
 dbms/src/Parsers/ExpressionElementParsers.cpp | 40 ++++++++++-
 dbms/src/Parsers/ExpressionElementParsers.h   | 11 +++
 dbms/src/Parsers/Lexer.cpp                    |  5 +-
 dbms/src/Parsers/Lexer.h                      |  3 +
 dbms/src/Parsers/tests/lexer.cpp              |  1 -
 32 files changed, 387 insertions(+), 7 deletions(-)
 create mode 100644 dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
 create mode 100644 dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
 create mode 100644 dbms/src/Parsers/ASTQueryParameter.cpp
 create mode 100644 dbms/src/Parsers/ASTQueryParameter.h

diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index d0ca68543f0..542d79dd69b 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -87,7 +87,7 @@ endif ()
 add_subdirectory (src)
 
 set(dbms_headers)
-set(dbms_sources)
+set(dbms_sources src/Interpreters/ReplaceQueryParameterVisitor.cpp src/Interpreters/ReplaceQueryParameterVisitor.h)
 
 include(../cmake/dbms_glob_sources.cmake)
 
diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index 349f6690cbe..ba603b77d23 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -59,6 +59,7 @@
 #include <Parsers/parseQuery.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterSetQuery.h>
+#include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Client/Connection.h>
 #include <Common/InterruptListener.h>
 #include <Functions/registerFunctions.h>
@@ -201,6 +202,9 @@ private:
     /// External tables info.
     std::list<ExternalTable> external_tables;
 
+    /// Dictionary with query parameters for prepared statements.
+    NameToNameMap params_substitution;
+
     ConnectionParameters connection_parameters;
 
 
@@ -793,7 +797,6 @@ private:
         /// Some parts of a query (result output and formatting) are executed client-side.
         /// Thus we need to parse the query.
         parsed_query = parsed_query_;
-
         if (!parsed_query)
         {
             const char * begin = query.data();
@@ -803,6 +806,16 @@ private:
         if (!parsed_query)
             return true;
 
+        if (!params_substitution.empty())
+        {
+            /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
+            ReplaceQueryParameterVisitor visitor(params_substitution);
+            visitor.visit(parsed_query);
+
+            /// Get new query after substitutions.
+            query = serializeAST(*parsed_query);
+        }
+
         processed_rows = 0;
         progress.reset();
         show_progress_bar = false;
@@ -1537,6 +1550,13 @@ private:
         std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
     }
 
+    static std::pair<String, String> parseParam(const String & s)
+    {
+        size_t pos = s.find('_') + 1;
+        /// Cut two first dash "--" and divide arg from name and value
+        return std::make_pair(s.substr(2, pos - 2), s.substr(pos));
+    }	
+
 public:
     void init(int argc, char ** argv)
     {
@@ -1546,13 +1566,15 @@ public:
         /** We allow different groups of arguments:
           * - common arguments;
           * - arguments for any number of external tables each in form "--external args...",
-          *   where possible args are file, name, format, structure, types.
+          *   where possible args are file, name, format, structure, types;
+          * - param arguments for prepared statements.
           * Split these groups before processing.
           */
         using Arguments = std::vector<const char *>;
 
         Arguments common_arguments{""};        /// 0th argument is ignored.
         std::vector<Arguments> external_tables_arguments;
+        std::vector<Arguments> param_arguments;
 
         bool in_external_group = false;
         for (int arg_num = 1; arg_num < argc; ++arg_num)
@@ -1595,7 +1617,15 @@ public:
             else
             {
                 in_external_group = false;
-                common_arguments.emplace_back(arg);
+
+                /// Parameter arg after underline.
+                if (startsWith(arg, "--param_"))
+                {
+                    param_arguments.emplace_back(Arguments{""});
+                    param_arguments.back().emplace_back(arg);
+                }
+                else
+                    common_arguments.emplace_back(arg);
             }
         }
 
@@ -1670,6 +1700,38 @@ public:
             ("structure", po::value<std::string>(), "structure")
             ("types", po::value<std::string>(), "types")
         ;
+
+        /// Parse commandline options related to prepared statements.
+        po::options_description param_description("Query parameters options");
+        param_description.add_options()
+                ("param_", po::value<std::string>(), "name and value of substitution")
+        ;
+
+        for (size_t i = 0; i < param_arguments.size(); ++i) {
+            po::parsed_options parsed_param = po::command_line_parser(
+                    param_arguments[i].size(), param_arguments[i].data()).options(param_description).extra_parser(
+                    parseParam).run();
+            po::variables_map param_options;
+            po::store(parsed_param, param_options);
+
+            /// Save name and values of substitution in dictionary.
+            try {
+                String param = param_options["param_"].as<std::string>();
+                size_t pos = param.find('=');
+                if (pos != String::npos && pos + 1 != param.size()) {
+                    if (!params_substitution.insert({param.substr(0, pos), param.substr(pos + 1)}).second)
+                        throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
+                } else
+                    throw Exception("Expected parameter field as --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
+            }
+            catch (const Exception & e)
+            {
+                std::string text = e.displayText();
+                std::cerr << "Code: " << e.code() << ". " << text << std::endl;
+                exit(e.code());
+            }
+        }
+
         /// Parse main commandline options.
         po::parsed_options parsed = po::command_line_parser(
             common_arguments.size(), common_arguments.data()).options(main_description).run();
diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp
index ee8a50662c9..04a3e25a1c5 100644
--- a/dbms/programs/server/HTTPHandler.cpp
+++ b/dbms/programs/server/HTTPHandler.cpp
@@ -511,6 +511,12 @@ void HTTPHandler::processQuery(
         else if (param_could_be_skipped(it->first))
         {
         }
+        else if (startsWith(it->first, "param_"))
+        {
+            /// Save name and values of substitution in dictionary.
+            String param_name = it->first.substr(strlen("param_"));
+            context.setParamSubstitution(param_name, it->second);
+        }
         else
         {
             /// All other query parameters are treated as settings.
diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
index a5dd5f8be62..683ff60df56 100644
--- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -216,6 +216,14 @@ void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuff
 }
 
 
+void DataTypeAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+    String s;
+    readString(s, istr);
+    deserializeFromString(function, column, s);
+}
+
+
 void DataTypeAggregateFunction::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     writeJSONString(serializeToString(function, column, row_num), ostr, settings);
diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.h b/dbms/src/DataTypes/DataTypeAggregateFunction.h
index 51de109865b..ebf4444503b 100644
--- a/dbms/src/DataTypes/DataTypeAggregateFunction.h
+++ b/dbms/src/DataTypes/DataTypeAggregateFunction.h
@@ -51,6 +51,8 @@ public:
     void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
     void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
diff --git a/dbms/src/DataTypes/DataTypeCustom.h b/dbms/src/DataTypes/DataTypeCustom.h
index 93882361e20..316d56c1f5c 100644
--- a/dbms/src/DataTypes/DataTypeCustom.h
+++ b/dbms/src/DataTypes/DataTypeCustom.h
@@ -33,6 +33,10 @@ public:
       */
     virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
 
+    /** Text deserialization without quoting or escaping.
+      */
+    virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+
     /** Text serialization with escaping but without quoting.
       */
     virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
diff --git a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp
index 44ce27a6e88..e1848fff2a0 100644
--- a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp
+++ b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp
@@ -32,6 +32,13 @@ DataTypeCustomSimpleTextSerialization::~DataTypeCustomSimpleTextSerialization()
 {
 }
 
+void DataTypeCustomSimpleTextSerialization::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    String str;
+    readString(str, istr);
+    deserializeFromString(*this, column, str, settings);
+}
+
 void DataTypeCustomSimpleTextSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     writeEscapedString(serializeToString(*this, column, row_num, settings), ostr);
diff --git a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h
index fb9be86d95f..cf1590c62e5 100644
--- a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h
+++ b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h
@@ -21,6 +21,10 @@ public:
     virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0;
     virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
 
+    /** Text deserialization without quoting or escaping.
+      */
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
     /** Text serialization with escaping but without quoting.
       */
     void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp
index 24f760a1800..a784f1502e4 100644
--- a/dbms/src/DataTypes/DataTypeEnum.cpp
+++ b/dbms/src/DataTypes/DataTypeEnum.cpp
@@ -166,6 +166,14 @@ void DataTypeEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer & is
     static_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
 }
 
+template <typename Type>
+void DataTypeEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+    std::string field_name;
+    readString(field_name, istr);
+    static_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
+}
+
 template <typename Type>
 void DataTypeEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h
index 19d4ad691dc..b99e2383860 100644
--- a/dbms/src/DataTypes/DataTypeEnum.h
+++ b/dbms/src/DataTypes/DataTypeEnum.h
@@ -96,6 +96,8 @@ public:
     void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
     void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp
index d1a007e16d2..34970fdaae9 100644
--- a/dbms/src/DataTypes/DataTypeFixedString.cpp
+++ b/dbms/src/DataTypes/DataTypeFixedString.cpp
@@ -176,6 +176,12 @@ void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & i
 }
 
 
+void DataTypeFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+    read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); });
+}
+
+
 void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     const char * pos = reinterpret_cast<const char *>(&static_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
diff --git a/dbms/src/DataTypes/DataTypeFixedString.h b/dbms/src/DataTypes/DataTypeFixedString.h
index 3019b6d225d..1a8a33d95c6 100644
--- a/dbms/src/DataTypes/DataTypeFixedString.h
+++ b/dbms/src/DataTypes/DataTypeFixedString.h
@@ -50,6 +50,8 @@ public:
 
     void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
 
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
     void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h
index 1742c1cb2e9..8e6e12fadba 100644
--- a/dbms/src/DataTypes/DataTypeLowCardinality.h
+++ b/dbms/src/DataTypes/DataTypeLowCardinality.h
@@ -81,6 +81,11 @@ public:
         deserializeImpl(column, &IDataType::deserializeAsTextQuoted, istr, settings);
     }
 
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+    {
+        deserializeImpl(column, &IDataType::deserializeAsTextEscaped, istr, settings);
+    }
+
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
     {
         serializeImpl(column, row_num, &IDataType::serializeAsTextCSV, ostr, settings);
diff --git a/dbms/src/DataTypes/DataTypeNullable.cpp b/dbms/src/DataTypes/DataTypeNullable.cpp
index 0bfe8a157d6..c56d8616be2 100644
--- a/dbms/src/DataTypes/DataTypeNullable.cpp
+++ b/dbms/src/DataTypes/DataTypeNullable.cpp
@@ -251,6 +251,15 @@ void DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr
         [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); });
 }
 
+
+void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    safeDeserialize(column,
+        [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); },
+        [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); });
+}
+
+
 void DataTypeNullable::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     const ColumnNullable & col = static_cast<const ColumnNullable &>(column);
diff --git a/dbms/src/DataTypes/DataTypeNullable.h b/dbms/src/DataTypes/DataTypeNullable.h
index 1081f84dd11..2b098ea0476 100644
--- a/dbms/src/DataTypes/DataTypeNullable.h
+++ b/dbms/src/DataTypes/DataTypeNullable.h
@@ -53,6 +53,7 @@ public:
     void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
     void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
 
diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp
index d3334ef93bf..5d104c76fef 100644
--- a/dbms/src/DataTypes/DataTypeString.cpp
+++ b/dbms/src/DataTypes/DataTypeString.cpp
@@ -244,6 +244,12 @@ static inline void read(IColumn & column, Reader && reader)
 }
 
 
+void DataTypeString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+    read(column, [&](ColumnString::Chars & data) { readStringInto(data, istr); });
+}
+
+
 void DataTypeString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
 {
     read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); });
diff --git a/dbms/src/DataTypes/DataTypeString.h b/dbms/src/DataTypes/DataTypeString.h
index 0a3d2277e79..3cf85f69a1f 100644
--- a/dbms/src/DataTypes/DataTypeString.h
+++ b/dbms/src/DataTypes/DataTypeString.h
@@ -30,6 +30,7 @@ public:
     void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
 
     void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
 
     void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
diff --git a/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h b/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h
index 8f897153fd0..6f6120deb4f 100644
--- a/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h
+++ b/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h
@@ -32,6 +32,11 @@ protected:
         serializeText(column, row_num, ostr, settings);
     }
 
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+    {
+        deserializeText(column, istr, settings);
+    }
+
     void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
     {
         deserializeText(column, istr, settings);
diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp
index 09c080f56cc..83b62a425ae 100644
--- a/dbms/src/DataTypes/IDataType.cpp
+++ b/dbms/src/DataTypes/IDataType.cpp
@@ -223,6 +223,18 @@ void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuf
     }
 }
 
+void IDataType::deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    if (custom_text_serialization)
+    {
+        custom_text_serialization->deserializeWholeText(column, istr, settings);
+    }
+    else
+    {
+        deserializeWholeText(column, istr, settings);
+    }
+}
+
 void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h
index b55065e4c34..d96698f9b4c 100644
--- a/dbms/src/DataTypes/IDataType.h
+++ b/dbms/src/DataTypes/IDataType.h
@@ -244,6 +244,8 @@ public:
       */
     virtual void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
 
+    virtual void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
+
     /** Text serialization intended for using in JSON format.
       */
     virtual void serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
@@ -284,6 +286,8 @@ protected:
       */
     virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
 
+    virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+
     /** Text serialization intended for using in JSON format.
       * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes.
       */
diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index 5e429c6ce06..d5ff05b1a3b 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -1866,6 +1866,29 @@ Context::SampleBlockCache & Context::getSampleBlockCache() const
 }
 
 
+bool Context::checkEmptyParamSubstitution() const
+{
+    return params_substitution.empty();
+}
+
+
+void Context::setParamSubstitution(const String & name, const String & value)
+{
+    auto lock = getLock();
+    if (!params_substitution.insert({name, value}).second) {
+        throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
+    };
+}
+
+
+NameToNameMap Context::getParamSubstitution() const
+{
+    if (!params_substitution.empty())
+        return params_substitution;
+    throw Exception("Context haven't query parameters", ErrorCodes::LOGICAL_ERROR);
+}
+
+
 #if USE_EMBEDDED_COMPILER
 
 std::shared_ptr<CompiledExpressionCache> Context::getCompiledExpressionCache() const
diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h
index 73b280072bd..13079b37c62 100644
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@@ -145,6 +145,9 @@ private:
     using DatabasePtr = std::shared_ptr<IDatabase>;
     using Databases = std::map<String, std::shared_ptr<IDatabase>>;
 
+    NameToNameMap params_substitution;   /// Dictionary with query parameters for prepared statements.
+                                                     /// (key=name, value)
+
     IHostContextPtr host_context;  /// Arbitrary object that may used to attach some host specific information to query context,
                                    /// when using ClickHouse as a library in some project. For example, it may contain host
                                    /// logger, some query identification information, profiling guards, etc. This field is
@@ -467,6 +470,11 @@ public:
 
     SampleBlockCache & getSampleBlockCache() const;
 
+    /// Query parameters for prepared statements.
+    bool checkEmptyParamSubstitution() const;
+    NameToNameMap getParamSubstitution() const;
+    void setParamSubstitution(const String & name, const String & value);
+
 #if USE_EMBEDDED_COMPILER
     std::shared_ptr<CompiledExpressionCache> getCompiledExpressionCache() const;
     void setCompiledExpressionCache(size_t cache_size);
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
new file mode 100644
index 00000000000..87379f351e6
--- /dev/null
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -0,0 +1,60 @@
+#include <stdlib.h>
+
+#include <Common/typeid_cast.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Core/Field.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <Columns/IColumn.h>
+#include <Formats/FormatSettings.h>
+#include <IO/ReadBufferFromString.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTQueryParameter.h>
+#include <Interpreters/ReplaceQueryParameterVisitor.h>
+
+namespace DB
+{
+
+namespace ErrorCodes {
+    extern const int UNKNOWN_IDENTIFIER;
+    extern const int LOGICAL_ERROR;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+void ReplaceQueryParameterVisitor::visit(ASTPtr & ast)
+{
+    for (auto & child : ast->children)
+    {
+        if (child->as<ASTQueryParameter>())
+            visitQP(child);
+        else
+            visit(child);
+    }
+}
+
+String ReplaceQueryParameterVisitor::getParamValue(const String & name)
+{
+    auto search = params_substitution.find(name);
+    if (search != params_substitution.end())
+        return search->second;
+    else
+        throw Exception("Expected same names in parameter field --param_{name}={value} and in query {name:type}", ErrorCodes::BAD_ARGUMENTS);
+}
+
+void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
+{
+    auto ast_param = ast->as<ASTQueryParameter>();
+    String value = getParamValue(ast_param->name);
+    const auto data_type = DataTypeFactory::instance().get(ast_param->type);
+
+    auto temp_column_ptr = data_type->createColumn();
+    IColumn &temp_column = *temp_column_ptr;
+    ReadBufferFromString read_buffer{value};
+    FormatSettings format_settings;
+    data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings);
+
+    Field field = temp_column[0];
+    ast = std::make_shared<ASTLiteral>(field);
+}
+
+}
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
new file mode 100644
index 00000000000..df97a408d6f
--- /dev/null
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <Core/Names.h>
+#include <Parsers/IAST_fwd.h>
+
+namespace DB
+{
+
+class ASTQueryParameter;
+
+/// Get prepared statements in query, replace ASTQueryParameter with ASTLiteral.
+class ReplaceQueryParameterVisitor
+{
+public:
+    ReplaceQueryParameterVisitor(const NameToNameMap & params)
+    :   params_substitution(params)
+    {}
+
+    void visit(ASTPtr & ast);
+
+private:
+    const NameToNameMap params_substitution;
+    void visitQP(ASTPtr & ast);
+    String getParamValue(const String & name);
+};
+
+}
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 93f6415d054..1ccde2bebb6 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -26,6 +26,7 @@
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/InterpreterSetQuery.h>
+#include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Interpreters/executeQuery.h>
 #include "DNSCacheUpdater.h"
 
@@ -169,6 +170,13 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         /// TODO Parser should fail early when max_query_size limit is reached.
         ast = parseQuery(parser, begin, end, "", max_query_size);
 
+        if (!context.checkEmptyParamSubstitution())    /// Avoid change from TCPHandler.
+        {
+            /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
+            ReplaceQueryParameterVisitor visitor(context.getParamSubstitution());
+            visitor.visit(ast);
+        }
+
         auto * insert_query = ast->as<ASTInsertQuery>();
 
         if (insert_query && insert_query->settings_ast)
diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp
new file mode 100644
index 00000000000..559dbe8802d
--- /dev/null
+++ b/dbms/src/Parsers/ASTQueryParameter.cpp
@@ -0,0 +1,19 @@
+#include <Parsers/ASTQueryParameter.h>
+#include <IO/WriteHelpers.h>
+
+
+namespace DB
+{
+
+void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+    String name_type = name + type;
+    settings.ostr << name_type;
+}
+
+void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const
+{
+    writeString(name, ostr);
+}
+
+}
diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h
new file mode 100644
index 00000000000..ac2a005f30d
--- /dev/null
+++ b/dbms/src/Parsers/ASTQueryParameter.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <Parsers/ASTWithAlias.h>
+
+
+namespace DB
+{
+
+/// Query parameter: name and type.
+class ASTQueryParameter : public ASTWithAlias
+{
+public:
+    String name, type;
+
+    ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {}
+
+    /** Get the text that identifies this element. */
+    String getID(char delim) const override { return "QueryParameter" + (delim + name + delim + type); }
+
+    ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); };
+
+protected:
+    void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+    void appendColumnNameImpl(WriteBuffer & ostr) const override;
+};
+
+}
diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp
index 2741aa0d491..f14f37802c2 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.cpp
+++ b/dbms/src/Parsers/ExpressionElementParsers.cpp
@@ -15,6 +15,7 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTAsterisk.h>
 #include <Parsers/ASTQualifiedAsterisk.h>
+#include <Parsers/ASTQueryParameter.h>
 #include <Parsers/ASTOrderByElement.h>
 #include <Parsers/ASTSubquery.h>
 
@@ -1199,6 +1200,42 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
 }
 
 
+bool ParserSubstitutionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    if (pos->type != TokenType::OpeningFiguredBracket)
+        return false;
+
+    auto old_pos = ++pos;
+    String s_name, s_type;
+
+    while (pos.isValid() && pos->type != TokenType::Colon)
+        ++pos;
+
+    if (pos->type != TokenType::Colon)
+    {
+        expected.add(pos, "colon between name and type");
+        return false;
+    }
+
+    s_name = String(old_pos->begin, pos->begin);
+    old_pos = ++pos;
+
+    while (pos.isValid() && pos->type != TokenType::ClosingFiguredBracket)
+        ++pos;
+
+    if (pos->type != TokenType::ClosingFiguredBracket)
+    {
+        expected.add(pos, "closing figured bracket");
+        return false;
+    }
+
+    s_type = String(old_pos->begin, pos->begin);
+    ++pos;
+    node = std::make_shared<ASTQueryParameter>(s_name, s_type);
+    return true;
+}
+
+
 bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     return ParserSubquery().parse(pos, node, expected)
@@ -1218,7 +1255,8 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
         || ParserFunction().parse(pos, node, expected)
         || ParserQualifiedAsterisk().parse(pos, node, expected)
         || ParserAsterisk().parse(pos, node, expected)
-        || ParserCompoundIdentifier().parse(pos, node, expected);
+        || ParserCompoundIdentifier().parse(pos, node, expected)
+        || ParserSubstitutionExpression().parse(pos, node, expected);
 }
 
 
diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h
index e3dc5ae44d0..d10670ec888 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.h
+++ b/dbms/src/Parsers/ExpressionElementParsers.h
@@ -242,6 +242,17 @@ private:
 };
 
 
+/** A substitution expression.
+  * Parse query with parameter expression {name:type}.
+  */
+class ParserSubstitutionExpression : public IParserBase
+{
+protected:
+    const char * getName() const { return "substitution"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
+};
+
+
 /** The expression element is one of: an expression in parentheses, an array, a literal, a function, an identifier, an asterisk.
   */
 class ParserExpressionElement : public IParserBase
diff --git a/dbms/src/Parsers/Lexer.cpp b/dbms/src/Parsers/Lexer.cpp
index 0494eacd490..3e33759440d 100644
--- a/dbms/src/Parsers/Lexer.cpp
+++ b/dbms/src/Parsers/Lexer.cpp
@@ -173,7 +173,10 @@ Token Lexer::nextTokenImpl()
             return Token(TokenType::OpeningSquareBracket, token_begin, ++pos);
         case ']':
             return Token(TokenType::ClosingSquareBracket, token_begin, ++pos);
-
+        case '{':
+            return Token(TokenType::OpeningFiguredBracket, token_begin, ++pos);
+        case '}':
+            return Token(TokenType::ClosingFiguredBracket, token_begin, ++pos);
         case ',':
             return Token(TokenType::Comma, token_begin, ++pos);
         case ';':
diff --git a/dbms/src/Parsers/Lexer.h b/dbms/src/Parsers/Lexer.h
index 13cd00e3dd3..021b6ae7ed3 100644
--- a/dbms/src/Parsers/Lexer.h
+++ b/dbms/src/Parsers/Lexer.h
@@ -23,6 +23,9 @@ namespace DB
     M(OpeningSquareBracket) \
     M(ClosingSquareBracket) \
     \
+    M(OpeningFiguredBracket) \
+    M(ClosingFiguredBracket) \
+    \
     M(Comma) \
     M(Semicolon) \
     M(Dot)                    /** Compound identifiers, like a.b or tuple access operator a.1, (x, y).2. */ \
diff --git a/dbms/src/Parsers/tests/lexer.cpp b/dbms/src/Parsers/tests/lexer.cpp
index dca93b469bd..ccc97298ed8 100644
--- a/dbms/src/Parsers/tests/lexer.cpp
+++ b/dbms/src/Parsers/tests/lexer.cpp
@@ -76,7 +76,6 @@ int main(int, char **)
 
         if (token.isEnd())
             break;
-
         writeChar(' ', out);
 
         auto it = hilite.find(token.type);

From f6812bbb1a98db7b378626dce1849d582ef9b0d8 Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Sun, 19 May 2019 02:57:26 +0300
Subject: [PATCH 018/191] fix style

---
 dbms/programs/client/Client.cpp                        |  8 +++++---
 dbms/src/Interpreters/Context.cpp                      |  3 +--
 dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp | 10 ++++------
 dbms/src/Parsers/ASTQueryParameter.h                   |  2 +-
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index ba603b77d23..0fee78ddb21 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -1555,7 +1555,7 @@ private:
         size_t pos = s.find('_') + 1;
         /// Cut two first dash "--" and divide arg from name and value
         return std::make_pair(s.substr(2, pos - 2), s.substr(pos));
-    }	
+    }
 
 public:
     void init(int argc, char ** argv)
@@ -1707,7 +1707,8 @@ public:
                 ("param_", po::value<std::string>(), "name and value of substitution")
         ;
 
-        for (size_t i = 0; i < param_arguments.size(); ++i) {
+        for (size_t i = 0; i < param_arguments.size(); ++i)
+        {
             po::parsed_options parsed_param = po::command_line_parser(
                     param_arguments[i].size(), param_arguments[i].data()).options(param_description).extra_parser(
                     parseParam).run();
@@ -1718,7 +1719,8 @@ public:
             try {
                 String param = param_options["param_"].as<std::string>();
                 size_t pos = param.find('=');
-                if (pos != String::npos && pos + 1 != param.size()) {
+                if (pos != String::npos && pos + 1 != param.size())
+                {
                     if (!params_substitution.insert({param.substr(0, pos), param.substr(pos + 1)}).second)
                         throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
                 } else
diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index d5ff05b1a3b..32bca217ef1 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -1875,9 +1875,8 @@ bool Context::checkEmptyParamSubstitution() const
 void Context::setParamSubstitution(const String & name, const String & value)
 {
     auto lock = getLock();
-    if (!params_substitution.insert({name, value}).second) {
+    if (!params_substitution.insert({name, value}).second)
         throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
-    };
 }
 
 
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index 87379f351e6..c60706cd1ef 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -1,11 +1,8 @@
-#include <stdlib.h>
-
 #include <Common/typeid_cast.h>
-#include <Common/StringUtils/StringUtils.h>
+#include <Columns/IColumn.h>
 #include <Core/Field.h>
 #include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Columns/IColumn.h>
 #include <Formats/FormatSettings.h>
 #include <IO/ReadBufferFromString.h>
 #include <Parsers/ASTLiteral.h>
@@ -15,7 +12,8 @@
 namespace DB
 {
 
-namespace ErrorCodes {
+namespace ErrorCodes
+{
     extern const int UNKNOWN_IDENTIFIER;
     extern const int LOGICAL_ERROR;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@@ -48,7 +46,7 @@ void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
     const auto data_type = DataTypeFactory::instance().get(ast_param->type);
 
     auto temp_column_ptr = data_type->createColumn();
-    IColumn &temp_column = *temp_column_ptr;
+    IColumn & temp_column = *temp_column_ptr;
     ReadBufferFromString read_buffer{value};
     FormatSettings format_settings;
     data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings);
diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h
index ac2a005f30d..f6645b4876a 100644
--- a/dbms/src/Parsers/ASTQueryParameter.h
+++ b/dbms/src/Parsers/ASTQueryParameter.h
@@ -17,7 +17,7 @@ public:
     /** Get the text that identifies this element. */
     String getID(char delim) const override { return "QueryParameter" + (delim + name + delim + type); }
 
-    ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); };
+    ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); }
 
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;

From 11a5a6db70d67df014942a662050b5c96c5f1ab1 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Mon, 20 May 2019 15:16:51 +0300
Subject: [PATCH 019/191] Remove unused functions

---
 dbms/src/Storages/VirtualColumnUtils.cpp | 85 ++++++------------------
 dbms/src/Storages/VirtualColumnUtils.h   | 17 ++---
 2 files changed, 26 insertions(+), 76 deletions(-)

diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp
index 3ac32ce0f5b..cbb1feef3af 100644
--- a/dbms/src/Storages/VirtualColumnUtils.cpp
+++ b/dbms/src/Storages/VirtualColumnUtils.cpp
@@ -23,71 +23,11 @@
 namespace DB
 {
 
-namespace VirtualColumnUtils
+namespace
 {
 
-String chooseSuffix(const NamesAndTypesList & columns, const String & name)
-{
-    int id = 0;
-    String current_suffix;
-    while (true)
-    {
-        bool done = true;
-        for (const auto & it : columns)
-            if (it.name == name + current_suffix)
-            {
-                done = false;
-                break;
-            }
-        if (done) break;
-        ++id;
-        current_suffix = toString<Int32>(id);
-    }
-    return current_suffix;
-}
-
-String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector<String> & names)
-{
-    int id = 0;
-    String current_suffix;
-    while (true)
-    {
-        bool done = true;
-        for (const auto & it : columns)
-        {
-            for (size_t i = 0; i < names.size(); ++i)
-            {
-                if (it.name == names[i] + current_suffix)
-                {
-                    done = false;
-                    break;
-                }
-            }
-            if (!done)
-                break;
-        }
-        if (done)
-            break;
-        ++id;
-        current_suffix = toString<Int32>(id);
-    }
-    return current_suffix;
-}
-
-void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value)
-{
-    auto & select = ast->as<ASTSelectQuery &>();
-    if (!select.with())
-        select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared<ASTExpressionList>());
-
-    auto literal = std::make_shared<ASTLiteral>(value);
-    literal->alias = column_name;
-    literal->prefer_alias_to_column_name = true;
-    select.with()->children.push_back(literal);
-}
-
 /// Verifying that the function depends only on the specified columns
-static bool isValidFunction(const ASTPtr & expression, const NameSet & columns)
+bool isValidFunction(const ASTPtr & expression, const NameSet & columns)
 {
     for (size_t i = 0; i < expression->children.size(); ++i)
         if (!isValidFunction(expression->children[i], columns))
@@ -100,7 +40,7 @@ static bool isValidFunction(const ASTPtr & expression, const NameSet & columns)
 }
 
 /// Extract all subfunctions of the main conjunction, but depending only on the specified columns
-static void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector<ASTPtr> & result)
+void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector<ASTPtr> & result)
 {
     const auto * function = expression->as<ASTFunction>();
     if (function && function->name == "and")
@@ -115,7 +55,7 @@ static void extractFunctions(const ASTPtr & expression, const NameSet & columns,
 }
 
 /// Construct a conjunction from given functions
-static ASTPtr buildWhereExpression(const ASTs & functions)
+ASTPtr buildWhereExpression(const ASTs & functions)
 {
     if (functions.size() == 0)
         return nullptr;
@@ -130,6 +70,23 @@ static ASTPtr buildWhereExpression(const ASTs & functions)
     return new_query;
 }
 
+}
+
+namespace VirtualColumnUtils
+{
+
+void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value)
+{
+    auto & select = ast->as<ASTSelectQuery &>();
+    if (!select.with())
+        select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared<ASTExpressionList>());
+
+    auto literal = std::make_shared<ASTLiteral>(value);
+    literal->alias = column_name;
+    literal->prefer_alias_to_column_name = true;
+    select.with()->children.push_back(literal);
+}
+
 void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context)
 {
     const auto & select = query->as<ASTSelectQuery &>();
diff --git a/dbms/src/Storages/VirtualColumnUtils.h b/dbms/src/Storages/VirtualColumnUtils.h
index a1e1db4f04c..4976deaa4c9 100644
--- a/dbms/src/Storages/VirtualColumnUtils.h
+++ b/dbms/src/Storages/VirtualColumnUtils.h
@@ -3,7 +3,7 @@
 #include <set>
 
 #include <Core/Block.h>
-#include <Parsers/IAST.h>
+#include <Parsers/IAST_fwd.h>
 
 
 namespace DB
@@ -16,13 +16,6 @@ class NamesAndTypesList;
 namespace VirtualColumnUtils
 {
 
-/// Calculate the minimum numeric suffix to add to the string so that it is not present in the set
-String chooseSuffix(const NamesAndTypesList & columns, const String & name);
-
-/// Calculate the minimum total numeric suffix to add to each string,
-/// so that none is present in the set.
-String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector<String> & names);
-
 /// Adds to the select query section `select column_name as value`
 /// For example select _port as 9000.
 void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value);
@@ -33,14 +26,14 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va
 void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context);
 
 /// Extract from the input stream a set of `name` column values
-template <typename T1>
-std::multiset<T1> extractSingleValueFromBlock(const Block & block, const String & name)
+template <typename T>
+std::multiset<T> extractSingleValueFromBlock(const Block & block, const String & name)
 {
-    std::multiset<T1> res;
+    std::multiset<T> res;
     const ColumnWithTypeAndName & data = block.getByName(name);
     size_t rows = block.rows();
     for (size_t i = 0; i < rows; ++i)
-        res.insert((*data.column)[i].get<T1>());
+        res.insert((*data.column)[i].get<T>());
     return res;
 }
 

From 5e683180e80f6e4213d849e95429c550233bb866 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Mon, 20 May 2019 15:23:07 +0300
Subject: [PATCH 020/191] Update EN documentation on Merge Engine.

---
 docs/en/operations/table_engines/merge.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/en/operations/table_engines/merge.md b/docs/en/operations/table_engines/merge.md
index 366a5459bf8..f29075ec973 100644
--- a/docs/en/operations/table_engines/merge.md
+++ b/docs/en/operations/table_engines/merge.md
@@ -27,11 +27,11 @@ Example 2:
 Let's say you have a old table (WatchLog_old) and decided to change partitioning without moving data to a new table (WatchLog_new) and you need to see data from both tables.
 
 ```
-CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) 
+CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64)
 ENGINE=MergeTree(date, (UserId, EventType), 8192);
 INSERT INTO WatchLog_old VALUES ('2018-01-01', 1, 'hit', 3);
 
-CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) 
+CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64)
 ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192;
 INSERT INTO WatchLog_new VALUES ('2018-01-02', 2, 'hit', 3);
 
@@ -61,7 +61,9 @@ Virtual columns differ from normal columns in the following ways:
 - They are not selected when using the asterisk (`SELECT *`).
 - Virtual columns are not shown in `SHOW CREATE TABLE` and `DESC TABLE` queries.
 
-The `Merge` type table contains a virtual `_table` column of the `String` type. (If the table already has a `_table` column, the virtual column is called `_table1`; if you already have `_table1`, it's called `_table2`, and so on.) It contains the name of the table that data was read from.
+The `Merge` type table contains the virtual column `_table` of the type `String`. It contains the name of the table that data was read from. If any underlying table already has the column `_table`, then the virtual column is shadowed and is not accessible.
+
+<!-- TODO: what if underlying tables have different set of columns? -->
 
 If the `WHERE/PREWHERE` clause contains conditions for the `_table` column that do not depend on other table columns (as one of the conjunction elements, or as an entire expression), these conditions are used as an index. The conditions are performed on a data set of table names to read data from, and the read operation will be performed from only those tables that the condition was triggered on.
 

From cf39c4cc473487650c5848004f2ab41aaacd5fc7 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Tue, 21 May 2019 14:24:32 +0300
Subject: [PATCH 021/191] Embed virtual columns into IStorage

---
 dbms/src/Interpreters/SyntaxAnalyzer.cpp      |  2 +-
 dbms/src/Storages/AlterCommands.cpp           |  6 +-
 dbms/src/Storages/ColumnsDescription.cpp      | 23 ++++---
 dbms/src/Storages/ColumnsDescription.h        |  9 +--
 dbms/src/Storages/IStorage.cpp                | 40 +++++++----
 dbms/src/Storages/IStorage.h                  | 24 +++----
 dbms/src/Storages/Kafka/StorageKafka.cpp      | 66 ++++++++++++-------
 dbms/src/Storages/Kafka/StorageKafka.h        | 21 ++----
 dbms/src/Storages/StorageCatBoostPool.cpp     |  8 +--
 dbms/src/Storages/StorageMerge.cpp            | 55 ++++++----------
 dbms/src/Storages/StorageMerge.h              |  3 +-
 .../System/StorageSystemPartsBase.cpp         |  2 +-
 12 files changed, 142 insertions(+), 117 deletions(-)

diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
index 198ea0f39db..a6f91356dbe 100644
--- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp
+++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
@@ -75,7 +75,7 @@ void collectSourceColumns(const ASTSelectQuery * select_query, StoragePtr storag
 
         if (select_query)
         {
-            const auto & storage_aliases = storage->getColumns().getAliases();
+            const auto & storage_aliases = storage->getColumns().getAliasesAndVirtuals();
             source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end());
         }
     }
diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp
index 88f3e909f49..7a1aebe2580 100644
--- a/dbms/src/Storages/AlterCommands.cpp
+++ b/dbms/src/Storages/AlterCommands.cpp
@@ -182,7 +182,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri
 {
     if (type == ADD_COLUMN)
     {
-        ColumnDescription column(column_name, data_type);
+        ColumnDescription column(column_name, data_type, false);
         if (default_expression)
         {
             column.default_desc.kind = default_kind;
@@ -384,8 +384,8 @@ void AlterCommands::validate(const IStorage & table, const Context & context)
                 column_to_command_idx[column_name] = i;
 
                 /// we're creating dummy DataTypeUInt8 in order to prevent the NullPointerException in ExpressionActions
-                columns.add(ColumnDescription(
-                    column_name, command.data_type ? command.data_type : std::make_shared<DataTypeUInt8>()));
+                columns.add(
+                    ColumnDescription(column_name, command.data_type ? command.data_type : std::make_shared<DataTypeUInt8>(), false));
 
                 if (command.default_expression)
                 {
diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp
index 55eaf1b5022..c51807c2679 100644
--- a/dbms/src/Storages/ColumnsDescription.cpp
+++ b/dbms/src/Storages/ColumnsDescription.cpp
@@ -32,6 +32,11 @@ namespace ErrorCodes
     extern const int CANNOT_PARSE_TEXT;
 }
 
+ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_)
+    : name(std::move(name_)), type(std::move(type_)), is_virtual(is_virtual_)
+{
+}
+
 bool ColumnDescription::operator==(const ColumnDescription & other) const
 {
     auto codec_str = [](const CompressionCodecPtr & codec_ptr) { return codec_ptr ? codec_ptr->getCodecDesc() : String(); };
@@ -115,10 +120,10 @@ void ColumnDescription::readText(ReadBuffer & buf)
 }
 
 
-ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary)
+ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, bool all_virtuals)
 {
     for (auto & elem : ordinary)
-        add(ColumnDescription(std::move(elem.name), std::move(elem.type)));
+        add(ColumnDescription(std::move(elem.name), std::move(elem.type), all_virtuals));
 }
 
 
@@ -227,7 +232,7 @@ NamesAndTypesList ColumnsDescription::getOrdinary() const
 {
     NamesAndTypesList ret;
     for (const auto & col : columns)
-        if (col.default_desc.kind == ColumnDefaultKind::Default)
+        if (col.default_desc.kind == ColumnDefaultKind::Default && !col.is_virtual)
             ret.emplace_back(col.name, col.type);
     return ret;
 }
@@ -241,11 +246,11 @@ NamesAndTypesList ColumnsDescription::getMaterialized() const
     return ret;
 }
 
-NamesAndTypesList ColumnsDescription::getAliases() const
+NamesAndTypesList ColumnsDescription::getAliasesAndVirtuals() const
 {
     NamesAndTypesList ret;
     for (const auto & col : columns)
-        if (col.default_desc.kind == ColumnDefaultKind::Alias)
+        if (col.default_desc.kind == ColumnDefaultKind::Alias || col.is_virtual)
             ret.emplace_back(col.name, col.type);
     return ret;
 }
@@ -285,7 +290,7 @@ NamesAndTypesList ColumnsDescription::getAllPhysical() const
 {
     NamesAndTypesList ret;
     for (const auto & col : columns)
-        if (col.default_desc.kind != ColumnDefaultKind::Alias)
+        if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual)
             ret.emplace_back(col.name, col.type);
     return ret;
 }
@@ -294,7 +299,7 @@ Names ColumnsDescription::getNamesOfPhysical() const
 {
     Names ret;
     for (const auto & col : columns)
-        if (col.default_desc.kind != ColumnDefaultKind::Alias)
+        if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual)
             ret.emplace_back(col.name);
     return ret;
 }
@@ -302,7 +307,7 @@ Names ColumnsDescription::getNamesOfPhysical() const
 NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) const
 {
     auto it = columns.get<1>().find(column_name);
-    if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias)
+    if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias || it->is_virtual)
         throw Exception("There is no physical column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
     return NameAndTypePair(it->name, it->type);
 }
@@ -310,7 +315,7 @@ NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) cons
 bool ColumnsDescription::hasPhysical(const String & column_name) const
 {
     auto it = columns.get<1>().find(column_name);
-    return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias;
+    return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias && !it->is_virtual;
 }
 
 
diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h
index 7ec8ed2c44f..44a60d2dc7e 100644
--- a/dbms/src/Storages/ColumnsDescription.h
+++ b/dbms/src/Storages/ColumnsDescription.h
@@ -32,9 +32,10 @@ struct ColumnDescription
     String comment;
     CompressionCodecPtr codec;
     ASTPtr ttl;
+    bool is_virtual = false;
 
     ColumnDescription() = default;
-    ColumnDescription(String name_, DataTypePtr type_) : name(std::move(name_)), type(std::move(type_)) {}
+    ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_);
 
     bool operator==(const ColumnDescription & other) const;
     bool operator!=(const ColumnDescription & other) const { return !(*this == other); }
@@ -49,7 +50,7 @@ class ColumnsDescription
 {
 public:
     ColumnsDescription() = default;
-    explicit ColumnsDescription(NamesAndTypesList ordinary_);
+    explicit ColumnsDescription(NamesAndTypesList ordinary_, bool all_virtuals = false);
 
     /// `after_column` can be a Nested column name;
     void add(ColumnDescription column, const String & after_column = String());
@@ -66,8 +67,8 @@ public:
 
     NamesAndTypesList getOrdinary() const;
     NamesAndTypesList getMaterialized() const;
-    NamesAndTypesList getAliases() const;
-    /// ordinary + materialized + aliases.
+    NamesAndTypesList getAliasesAndVirtuals() const;
+    /// ordinary + materialized + aliases + virtuals.
     NamesAndTypesList getAll() const;
 
     using ColumnTTLs = std::unordered_map<String, ASTPtr>;
diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp
index 06320cc1f30..ad8130474a1 100644
--- a/dbms/src/Storages/IStorage.cpp
+++ b/dbms/src/Storages/IStorage.cpp
@@ -25,28 +25,21 @@ IStorage::IStorage(ColumnsDescription columns_)
     setColumns(std::move(columns_));
 }
 
+IStorage::IStorage(ColumnsDescription columns_, ColumnsDescription virtuals_) : virtuals(std::move(virtuals_))
+{
+    setColumns(std::move(columns_));
+}
+
 const ColumnsDescription & IStorage::getColumns() const
 {
     return columns;
 }
 
-void IStorage::setColumns(ColumnsDescription columns_)
-{
-    if (columns_.getOrdinary().empty())
-        throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
-    columns = std::move(columns_);
-}
-
 const IndicesDescription & IStorage::getIndices() const
 {
     return indices;
 }
 
-void IStorage::setIndices(IndicesDescription indices_)
-{
-    indices = std::move(indices_);
-}
-
 NameAndTypePair IStorage::getColumn(const String & column_name) const
 {
     /// By default, we assume that there are no virtual columns in the storage.
@@ -266,6 +259,29 @@ void IStorage::check(const Block & block, bool need_all) const
     }
 }
 
+void IStorage::setColumns(ColumnsDescription columns_)
+{
+    if (columns_.getOrdinary().empty())
+        throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
+    columns = std::move(columns_);
+
+    for (const auto & column : virtuals)
+    {
+        if (!columns.has(column.name))
+            columns.add(column);
+    }
+}
+
+void IStorage::setIndices(IndicesDescription indices_)
+{
+    indices = std::move(indices_);
+}
+
+bool IStorage::isVirtualColumn(const String & column_name) const
+{
+    return getColumns().get(column_name).is_virtual;
+}
+
 TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id)
 {
     TableStructureReadLockHolder result;
diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h
index f18592ebce5..913b97a445b 100644
--- a/dbms/src/Storages/IStorage.h
+++ b/dbms/src/Storages/IStorage.h
@@ -50,6 +50,7 @@ class IStorage : public std::enable_shared_from_this<IStorage>
 public:
     IStorage() = default;
     explicit IStorage(ColumnsDescription columns_);
+    IStorage(ColumnsDescription columns_, ColumnsDescription virtuals_);
 
     virtual ~IStorage() = default;
     IStorage(const IStorage &) = delete;
@@ -82,11 +83,8 @@ public:
 
 
 public: /// thread-unsafe part. lockStructure must be acquired
-    const ColumnsDescription & getColumns() const;
-    void setColumns(ColumnsDescription columns_);
-
+    const ColumnsDescription & getColumns() const; /// returns combined set of columns
     const IndicesDescription & getIndices() const;
-    void setIndices(IndicesDescription indices_);
 
     /// NOTE: these methods should include virtual columns,
     ///       but should NOT include ALIAS columns (they are treated separately).
@@ -112,8 +110,18 @@ public: /// thread-unsafe part. lockStructure must be acquired
     /// If |need_all| is set, then checks that all the columns of the table are in the block.
     void check(const Block & block, bool need_all = false) const;
 
+protected: /// still thread-unsafe part.
+    void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones.
+    void setIndices(IndicesDescription indices_);
+
+    /// Returns whether the column is virtual - by default all columns are real.
+    /// Initially reserved virtual column name may be shadowed by real column.
+    /// Returns false even for non-existent non-virtual columns.
+    virtual bool isVirtualColumn(const String & column_name) const;
+
 private:
-    ColumnsDescription columns;
+    ColumnsDescription columns; /// combined real and virtual columns
+    const ColumnsDescription virtuals = {};
     IndicesDescription indices;
 
 public:
@@ -322,12 +330,6 @@ public:
     /// Returns additional columns that need to be read for FINAL to work.
     virtual Names getColumnsRequiredForFinal() const { return {}; }
 
-protected:
-    /// Returns whether the column is virtual - by default all columns are real.
-    /// Initially reserved virtual column name may be shadowed by real column.
-    /// Returns false even for non-existent non-virtual columns.
-    virtual bool isVirtualColumn(const String & /* column_name */) const { return false; }
-
 private:
     /// You always need to take the next three locks in this order.
 
diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp
index e3340a2c573..b7bd6607836 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.cpp
+++ b/dbms/src/Storages/Kafka/StorageKafka.cpp
@@ -4,6 +4,8 @@
 #include <DataStreams/LimitBlockInputStream.h>
 #include <DataStreams/UnionBlockInputStream.h>
 #include <DataStreams/copyData.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
 #include <Interpreters/InterpreterInsertQuery.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -69,21 +71,36 @@ StorageKafka::StorageKafka(
     const std::string & database_name_,
     Context & context_,
     const ColumnsDescription & columns_,
-    const String & brokers_, const String & group_, const Names & topics_,
-    const String & format_name_, char row_delimiter_, const String & schema_name_,
-    size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken_,
+    const String & brokers_,
+    const String & group_,
+    const Names & topics_,
+    const String & format_name_,
+    char row_delimiter_,
+    const String & schema_name_,
+    size_t num_consumers_,
+    UInt64 max_block_size_,
+    size_t skip_broken_,
     bool intermediate_commit_)
-    : IStorage{columns_},
-    table_name(table_name_), database_name(database_name_), global_context(context_),
-    topics(global_context.getMacros()->expand(topics_)),
-    brokers(global_context.getMacros()->expand(brokers_)),
-    group(global_context.getMacros()->expand(group_)),
-    format_name(global_context.getMacros()->expand(format_name_)),
-    row_delimiter(row_delimiter_),
-    schema_name(global_context.getMacros()->expand(schema_name_)),
-    num_consumers(num_consumers_), max_block_size(max_block_size_), log(&Logger::get("StorageKafka (" + table_name_ + ")")),
-    semaphore(0, num_consumers_),
-    skip_broken(skip_broken_), intermediate_commit(intermediate_commit_)
+    : IStorage(
+        columns_,
+        ColumnsDescription({{"_topic", std::make_shared<DataTypeString>()},
+                            {"_key", std::make_shared<DataTypeString>()},
+                            {"_offset", std::make_shared<DataTypeUInt64>()}}, true))
+    , table_name(table_name_)
+    , database_name(database_name_)
+    , global_context(context_)
+    , topics(global_context.getMacros()->expand(topics_))
+    , brokers(global_context.getMacros()->expand(brokers_))
+    , group(global_context.getMacros()->expand(group_))
+    , format_name(global_context.getMacros()->expand(format_name_))
+    , row_delimiter(row_delimiter_)
+    , schema_name(global_context.getMacros()->expand(schema_name_))
+    , num_consumers(num_consumers_)
+    , max_block_size(max_block_size_)
+    , log(&Logger::get("StorageKafka (" + table_name_ + ")"))
+    , semaphore(0, num_consumers_)
+    , skip_broken(skip_broken_)
+    , intermediate_commit(intermediate_commit_)
 {
     task = global_context.getSchedulePool().createTask(log->name(), [this]{ streamThread(); });
     task->deactivate();
@@ -91,15 +108,13 @@ StorageKafka::StorageKafka(
 
 
 BlockInputStreams StorageKafka::read(
-    const Names & column_names,
-    const SelectQueryInfo & /*query_info*/,
+    const Names & /* column_names */,
+    const SelectQueryInfo & /* query_info */,
     const Context & context,
-    QueryProcessingStage::Enum /*processed_stage*/,
-    size_t /*max_block_size*/,
+    QueryProcessingStage::Enum /* processed_stage */,
+    size_t /* max_block_size */,
     unsigned num_streams)
 {
-    check(column_names);
-
     if (num_created_consumers == 0)
         return BlockInputStreams();
 
@@ -111,8 +126,8 @@ BlockInputStreams StorageKafka::read(
     // Claim as many consumers as requested, but don't block
     for (size_t i = 0; i < stream_count; ++i)
     {
-        // Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block
-        // TODO That leads to awful performance.
+        /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block
+        /// TODO: that leads to awful performance.
         streams.emplace_back(std::make_shared<KafkaBlockInputStream>(*this, context, schema_name, 1));
     }
 
@@ -154,6 +169,13 @@ void StorageKafka::shutdown()
 }
 
 
+void StorageKafka::rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name)
+{
+    table_name = new_table_name;
+    database_name = new_database_name;
+}
+
+
 void StorageKafka::updateDependencies()
 {
     task->activateAndSchedule();
diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h
index 3a40e29a03e..ae9e9baa724 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.h
+++ b/dbms/src/Storages/Kafka/StorageKafka.h
@@ -39,11 +39,7 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
-    void rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name) override
-    {
-        table_name = new_table_name;
-        database_name = new_database_name;
-    }
+    void rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name) override;
 
     void updateDependencies() override;
 
@@ -56,18 +52,15 @@ private:
     const String brokers;
     const String group;
     const String format_name;
-    // Optional row delimiter for generating char delimited stream
-    // in order to make various input stream parsers happy.
-    char row_delimiter;
+    char row_delimiter; /// optional row delimiter for generating char delimited stream in order to make various input stream parsers happy.
     const String schema_name;
-    /// Total number of consumers
-    size_t num_consumers;
-    /// Maximum block size for insertion into this table
-    UInt64 max_block_size;
-    /// Number of actually created consumers.
+    size_t num_consumers; /// total number of consumers
+    UInt64 max_block_size; /// maximum block size for insertion into this table
+
     /// Can differ from num_consumers in case of exception in startup() (or if startup() hasn't been called).
     /// In this case we still need to be able to shutdown() properly.
-    size_t num_created_consumers = 0;
+    size_t num_created_consumers = 0; /// number of actually created consumers.
+
     Poco::Logger * log;
 
     // Consumer list
diff --git a/dbms/src/Storages/StorageCatBoostPool.cpp b/dbms/src/Storages/StorageCatBoostPool.cpp
index 1258ebec7e2..b76150611c4 100644
--- a/dbms/src/Storages/StorageCatBoostPool.cpp
+++ b/dbms/src/Storages/StorageCatBoostPool.cpp
@@ -254,12 +254,12 @@ void StorageCatBoostPool::createSampleBlockAndColumns()
 
     /// Order is important: first numeric columns, then categorial, then all others.
     for (const auto & column : num_columns)
-        columns.add(DB::ColumnDescription(column.name, column.type));
+        columns.add(DB::ColumnDescription(column.name, column.type, false));
     for (const auto & column : cat_columns)
-        columns.add(DB::ColumnDescription(column.name, column.type));
+        columns.add(DB::ColumnDescription(column.name, column.type, false));
     for (const auto & column : other_columns)
     {
-        DB::ColumnDescription column_desc(column.name, column.type);
+        DB::ColumnDescription column_desc(column.name, column.type, false);
         /// We assign Materialized kind to the column so that it doesn't show in SELECT *.
         /// Because the table is readonly, we do not need default expression.
         column_desc.default_desc.kind = ColumnDefaultKind::Materialized;
@@ -270,7 +270,7 @@ void StorageCatBoostPool::createSampleBlockAndColumns()
     {
         if (!desc.alias.empty())
         {
-            DB::ColumnDescription column(desc.alias, get_type(desc.column_type));
+            DB::ColumnDescription column(desc.alias, get_type(desc.column_type), false);
             column.default_desc.kind = ColumnDefaultKind::Alias;
             column.default_desc.expression = std::make_shared<ASTIdentifier>(desc.column_name);
             columns.add(std::move(column));
diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp
index c70f52b9dd2..7146fc32487 100644
--- a/dbms/src/Storages/StorageMerge.cpp
+++ b/dbms/src/Storages/StorageMerge.cpp
@@ -50,9 +50,11 @@ StorageMerge::StorageMerge(
     const String & source_database_,
     const String & table_name_regexp_,
     const Context & context_)
-    : IStorage{columns_},
-    name(name_), source_database(source_database_),
-    table_name_regexp(table_name_regexp_), global_context(context_)
+    : IStorage(columns_, ColumnsDescription({{"_table", std::make_shared<DataTypeString>()}}, true))
+    , name(name_)
+    , source_database(source_database_)
+    , table_name_regexp(table_name_regexp_)
+    , global_context(context_)
 {
 }
 
@@ -60,44 +62,29 @@ StorageMerge::StorageMerge(
 /// NOTE: structure of underlying tables as well as their set are not constant,
 ///       so the results of these methods may become obsolete after the call.
 
-bool StorageMerge::isVirtualColumn(const String & column_name) const
-{
-    if (column_name != "_table")
-        return false;
-
-    return !IStorage::hasColumn(column_name);
-}
-
 NameAndTypePair StorageMerge::getColumn(const String & column_name) const
 {
-    if (IStorage::hasColumn(column_name))
-        return IStorage::getColumn(column_name);
+    if (!IStorage::hasColumn(column_name))
+    {
+        auto first_table = getFirstTable([](auto &&) { return true; });
+        if (first_table)
+            return first_table->getColumn(column_name);
+    }
 
-    /// virtual column of the Merge table itself
-    if (column_name == "_table")
-        return { column_name, std::make_shared<DataTypeString>() };
-
-    /// virtual (and real) columns of the underlying tables
-    auto first_table = getFirstTable([](auto &&) { return true; });
-    if (first_table)
-        return first_table->getColumn(column_name);
-
-    throw Exception("There is no column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
+    return IStorage::getColumn(column_name);
 }
 
+
 bool StorageMerge::hasColumn(const String & column_name) const
 {
-    if (column_name == "_table")
-        return true;
+    if (!IStorage::hasColumn(column_name))
+    {
+        auto first_table = getFirstTable([](auto &&) { return true; });
+        if (first_table)
+            return first_table->hasColumn(column_name);
+    }
 
-    if (IStorage::hasColumn(column_name))
-        return true;
-
-    auto first_table = getFirstTable([](auto &&) { return true; });
-    if (first_table)
-        return first_table->hasColumn(column_name);
-
-    return false;
+    return true;
 }
 
 
@@ -196,7 +183,7 @@ BlockInputStreams StorageMerge::read(
 
     for (const auto & column_name : column_names)
     {
-        if (isVirtualColumn(column_name))
+        if (column_name == "_table" && isVirtualColumn(column_name))
             has_table_virtual_column = true;
         else
             real_column_names.push_back(column_name);
diff --git a/dbms/src/Storages/StorageMerge.h b/dbms/src/Storages/StorageMerge.h
index 477da9829b7..4253256abf9 100644
--- a/dbms/src/Storages/StorageMerge.h
+++ b/dbms/src/Storages/StorageMerge.h
@@ -26,6 +26,7 @@ public:
     bool supportsFinal() const override { return true; }
     bool supportsIndexForIn() const override { return true; }
 
+    /// Consider columns coming from the underlying tables
     NameAndTypePair getColumn(const String & column_name) const override;
     bool hasColumn(const String & column_name) const override;
 
@@ -84,8 +85,6 @@ protected:
 
     void convertingSourceStream(const Block & header, const Context & context, ASTPtr & query,
                                 BlockInputStreamPtr & source_stream, QueryProcessingStage::Enum processed_stage);
-
-    bool isVirtualColumn(const String & column_name) const override;
 };
 
 }
diff --git a/dbms/src/Storages/System/StorageSystemPartsBase.cpp b/dbms/src/Storages/System/StorageSystemPartsBase.cpp
index 85fd64195ca..190dbda7e68 100644
--- a/dbms/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/dbms/src/Storages/System/StorageSystemPartsBase.cpp
@@ -277,7 +277,7 @@ StorageSystemPartsBase::StorageSystemPartsBase(std::string name_, NamesAndTypesL
 
     auto add_alias = [&](const String & alias_name, const String & column_name)
     {
-        ColumnDescription column(alias_name, columns.get(column_name).type);
+        ColumnDescription column(alias_name, columns.get(column_name).type, false);
         column.default_desc.kind = ColumnDefaultKind::Alias;
         column.default_desc.expression = std::make_shared<ASTIdentifier>(column_name);
         columns.add(column);

From dd906eabdc029135c19d122315cc90a74605cda9 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Wed, 22 May 2019 22:38:43 +0300
Subject: [PATCH 022/191] [WIP] refactoring

---
 dbms/src/Core/Types.h                         |  2 +-
 dbms/src/Storages/ColumnsDescription.h        |  6 +-
 dbms/src/Storages/IStorage.h                  |  6 +-
 .../Storages/Kafka/KafkaBlockInputStream.cpp  | 29 ++++--
 .../Storages/Kafka/KafkaBlockInputStream.h    |  7 +-
 .../Kafka/ReadBufferFromKafkaConsumer.h       |  6 ++
 dbms/src/Storages/Kafka/StorageKafka.cpp      | 96 ++++++++++---------
 dbms/src/Storages/Kafka/StorageKafka.h        | 39 ++++----
 8 files changed, 108 insertions(+), 83 deletions(-)

diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h
index 1209b1b1d72..61216a637f3 100644
--- a/dbms/src/Core/Types.h
+++ b/dbms/src/Core/Types.h
@@ -1,8 +1,8 @@
 #pragma once
 
+#include <cstdint>
 #include <string>
 #include <vector>
-#include <cstdint>
 
 
 namespace DB
diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h
index 44a60d2dc7e..e7f2919c3bd 100644
--- a/dbms/src/Storages/ColumnsDescription.h
+++ b/dbms/src/Storages/ColumnsDescription.h
@@ -68,8 +68,8 @@ public:
     NamesAndTypesList getOrdinary() const;
     NamesAndTypesList getMaterialized() const;
     NamesAndTypesList getAliasesAndVirtuals() const;
-    /// ordinary + materialized + aliases + virtuals.
-    NamesAndTypesList getAll() const;
+    NamesAndTypesList getAllPhysical() const; /// ordinary + materialized.
+    NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + virtuals.
 
     using ColumnTTLs = std::unordered_map<String, ASTPtr>;
     ColumnTTLs getColumnTTLs() const;
@@ -88,8 +88,6 @@ public:
             throw Exception("Cannot modify ColumnDescription for column " + column_name + ": column name cannot be changed", ErrorCodes::LOGICAL_ERROR);
     }
 
-    /// ordinary + materialized.
-    NamesAndTypesList getAllPhysical() const;
     Names getNamesOfPhysical() const;
     bool hasPhysical(const String & column_name) const;
     NameAndTypePair getPhysical(const String & column_name) const;
diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h
index 913b97a445b..8f1a7b06d9e 100644
--- a/dbms/src/Storages/IStorage.h
+++ b/dbms/src/Storages/IStorage.h
@@ -91,9 +91,9 @@ public: /// thread-unsafe part. lockStructure must be acquired
     virtual NameAndTypePair getColumn(const String & column_name) const;
     virtual bool hasColumn(const String & column_name) const;
 
-    Block getSampleBlock() const;
-    Block getSampleBlockNonMaterialized() const;
-    Block getSampleBlockForColumns(const Names & column_names) const; /// including virtual and alias columns.
+    Block getSampleBlock() const; /// ordinary + materialized.
+    Block getSampleBlockNonMaterialized() const; /// ordinary.
+    Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals.
 
     /// Verify that all the requested names are in the table and are set correctly:
     /// list of names is not empty and the names do not repeat.
diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
index 56b1db85a3f..abc4e702a6e 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
@@ -7,15 +7,15 @@ namespace DB
 {
 
 KafkaBlockInputStream::KafkaBlockInputStream(
-    StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_)
-    : storage(storage_), context(context_), max_block_size(max_block_size_)
+    StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_)
+    : storage(storage_), context(context_), column_names(columns), max_block_size(max_block_size_)
 {
     context.setSetting("input_format_skip_unknown_fields", 1u); // Always skip unknown fields regardless of the context (JSON or TSKV)
     context.setSetting("input_format_allow_errors_ratio", 0.);
-    context.setSetting("input_format_allow_errors_num", storage.skip_broken);
+    context.setSetting("input_format_allow_errors_num", storage.skipBroken());
 
-    if (!schema.empty())
-        context.setSetting("format_schema", schema);
+    if (!storage.getSchemaName().empty())
+        context.setSetting("format_schema", storage.getSchemaName());
 }
 
 KafkaBlockInputStream::~KafkaBlockInputStream()
@@ -29,6 +29,11 @@ KafkaBlockInputStream::~KafkaBlockInputStream()
     storage.pushBuffer(buffer);
 }
 
+Block KafkaBlockInputStream::getHeader() const
+{
+    return storage.getSampleBlockForColumns(column_names);
+}
+
 void KafkaBlockInputStream::readPrefixImpl()
 {
     buffer = storage.tryClaimBuffer(context.getSettingsRef().queue_max_wait_ms.totalMilliseconds());
@@ -37,20 +42,30 @@ void KafkaBlockInputStream::readPrefixImpl()
     if (!buffer)
         buffer = storage.createBuffer();
 
-    buffer->subBufferAs<ReadBufferFromKafkaConsumer>()->subscribe(storage.topics);
+    buffer->subBufferAs<ReadBufferFromKafkaConsumer>()->subscribe(storage.getTopics());
 
     const auto & limits = getLimits();
     const size_t poll_timeout = buffer->subBufferAs<ReadBufferFromKafkaConsumer>()->pollTimeout();
     size_t rows_portion_size = poll_timeout ? std::min(max_block_size, limits.max_execution_time.totalMilliseconds() / poll_timeout) : max_block_size;
     rows_portion_size = std::max(rows_portion_size, 1ul);
 
-    auto child = FormatFactory::instance().getInput(storage.format_name, *buffer, storage.getSampleBlock(), context, max_block_size, rows_portion_size);
+    auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support
+    auto child = FormatFactory::instance().getInput(
+        storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size);
     child->setLimits(limits);
     addChild(child);
 
     broken = true;
 }
 
+Block KafkaBlockInputStream::readImpl()
+{
+    /// FIXME: materialize MATERIALIZED columns here.
+    Block block = children.back()->read();
+    /// TODO: add virtual columns here
+    return block;
+}
+
 void KafkaBlockInputStream::readSuffixImpl()
 {
     buffer->subBufferAs<ReadBufferFromKafkaConsumer>()->commit();
diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
index 1b6c8b8ae25..dcaec1f5066 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
@@ -11,19 +11,20 @@ namespace DB
 class KafkaBlockInputStream : public IBlockInputStream
 {
 public:
-    KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_);
+    KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_);
     ~KafkaBlockInputStream() override;
 
     String getName() const override { return storage.getName(); }
-    Block readImpl() override { return children.back()->read(); }
-    Block getHeader() const override { return storage.getSampleBlock(); }
+    Block getHeader() const override;
 
     void readPrefixImpl() override;
+    Block readImpl() override;
     void readSuffixImpl() override;
 
 private:
     StorageKafka & storage;
     Context context;
+    Names column_names;
     UInt64 max_block_size;
 
     BufferPtr buffer;
diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
index 20a1c5830d7..9bb3fd473ab 100644
--- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
+++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Core/Names.h>
+#include <Core/Types.h>
 #include <IO/DelimitedReadBuffer.h>
 #include <common/logger_useful.h>
 
@@ -33,6 +34,11 @@ public:
 
     auto pollTimeout() { return poll_timeout; }
 
+    // Return values for the message that's being read.
+    String currentTopic() { return current[-1].get_topic(); }
+    String currentKey() { return current[-1].get_key(); }
+    auto currentOffset() { return current[-1].get_offset(); }
+
 private:
     using Messages = std::vector<cppkafka::Message>;
 
diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp
index b7bd6607836..79622b79856 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.cpp
+++ b/dbms/src/Storages/Kafka/StorageKafka.cpp
@@ -108,7 +108,7 @@ StorageKafka::StorageKafka(
 
 
 BlockInputStreams StorageKafka::read(
-    const Names & /* column_names */,
+    const Names & column_names,
     const SelectQueryInfo & /* query_info */,
     const Context & context,
     QueryProcessingStage::Enum /* processed_stage */,
@@ -127,8 +127,8 @@ BlockInputStreams StorageKafka::read(
     for (size_t i = 0; i < stream_count; ++i)
     {
         /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block
-        /// TODO: that leads to awful performance.
-        streams.emplace_back(std::make_shared<KafkaBlockInputStream>(*this, context, schema_name, 1));
+        /// TODO: probably that leads to awful performance.
+        streams.emplace_back(std::make_shared<KafkaBlockInputStream>(*this, context, column_names, 1));
     }
 
     LOG_DEBUG(log, "Starting reading " << streams.size() << " streams");
@@ -182,46 +182,6 @@ void StorageKafka::updateDependencies()
 }
 
 
-cppkafka::Configuration StorageKafka::createConsumerConfiguration()
-{
-    cppkafka::Configuration conf;
-
-    LOG_TRACE(log, "Setting brokers: " << brokers);
-    conf.set("metadata.broker.list", brokers);
-
-    LOG_TRACE(log, "Setting Group ID: " << group << " Client ID: clickhouse");
-    conf.set("group.id", group);
-
-    conf.set("client.id", VERSION_FULL);
-
-    // If no offset stored for this group, read all messages from the start
-    conf.set("auto.offset.reset", "smallest");
-
-    // We manually commit offsets after a stream successfully finished
-    conf.set("enable.auto.commit", "false");
-
-    // Ignore EOF messages
-    conf.set("enable.partition.eof", "false");
-
-    // for debug logs inside rdkafka
-    // conf.set("debug", "consumer,cgrp,topic,fetch");
-
-    // Update consumer configuration from the configuration
-    const auto & config = global_context.getConfigRef();
-    if (config.has(CONFIG_PREFIX))
-        loadFromConfig(conf, config, CONFIG_PREFIX);
-
-    // Update consumer topic-specific configuration
-    for (const auto & topic : topics)
-    {
-        const auto topic_config_key = CONFIG_PREFIX + "_" + topic;
-        if (config.has(topic_config_key))
-            loadFromConfig(conf, config, topic_config_key);
-    }
-
-    return conf;
-}
-
 BufferPtr StorageKafka::createBuffer()
 {
     // Create a consumer and subscribe to topics
@@ -269,6 +229,47 @@ void StorageKafka::pushBuffer(BufferPtr buffer)
     semaphore.set();
 }
 
+
+cppkafka::Configuration StorageKafka::createConsumerConfiguration()
+{
+    cppkafka::Configuration conf;
+
+    LOG_TRACE(log, "Setting brokers: " << brokers);
+    conf.set("metadata.broker.list", brokers);
+
+    LOG_TRACE(log, "Setting Group ID: " << group << " Client ID: clickhouse");
+    conf.set("group.id", group);
+
+    conf.set("client.id", VERSION_FULL);
+
+    // If no offset stored for this group, read all messages from the start
+    conf.set("auto.offset.reset", "smallest");
+
+    // We manually commit offsets after a stream successfully finished
+    conf.set("enable.auto.commit", "false");
+
+    // Ignore EOF messages
+    conf.set("enable.partition.eof", "false");
+
+    // for debug logs inside rdkafka
+    // conf.set("debug", "consumer,cgrp,topic,fetch");
+
+    // Update consumer configuration from the configuration
+    const auto & config = global_context.getConfigRef();
+    if (config.has(CONFIG_PREFIX))
+        loadFromConfig(conf, config, CONFIG_PREFIX);
+
+    // Update consumer topic-specific configuration
+    for (const auto & topic : topics)
+    {
+        const auto topic_config_key = CONFIG_PREFIX + "_" + topic;
+        if (config.has(topic_config_key))
+            loadFromConfig(conf, config, topic_config_key);
+    }
+
+    return conf;
+}
+
 bool StorageKafka::checkDependencies(const String & current_database_name, const String & current_table_name)
 {
     // Check if all dependencies are attached
@@ -344,12 +345,16 @@ bool StorageKafka::streamToViews()
     if (block_size == 0)
         block_size = settings.max_block_size.value;
 
+    // Execute the query
+    InterpreterInsertQuery interpreter{insert, global_context};
+    auto block_io = interpreter.execute();
+
     // Create a stream for each consumer and join them in a union stream
     BlockInputStreams streams;
     streams.reserve(num_created_consumers);
     for (size_t i = 0; i < num_created_consumers; ++i)
     {
-        auto stream = std::make_shared<KafkaBlockInputStream>(*this, global_context, schema_name, block_size);
+        auto stream = std::make_shared<KafkaBlockInputStream>(*this, global_context, block_io.out->getHeader().getNames(), block_size);
         streams.emplace_back(stream);
 
         // Limit read batch to maximum block size to allow DDL
@@ -366,9 +371,6 @@ bool StorageKafka::streamToViews()
     else
         in = streams[0];
 
-    // Execute the query
-    InterpreterInsertQuery interpreter{insert, global_context};
-    auto block_io = interpreter.execute();
     copyData(*in, *block_io.out, &stream_cancelled);
 
     // Check whether the limits were applied during query execution
diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h
index ae9e9baa724..f9b6609def5 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.h
+++ b/dbms/src/Storages/Kafka/StorageKafka.h
@@ -20,9 +20,6 @@ namespace DB
   */
 class StorageKafka : public ext::shared_ptr_helper<StorageKafka>, public IStorage
 {
-    friend class KafkaBlockInputStream;
-    friend class KafkaBlockOutputStream;
-
 public:
     std::string getName() const override { return "Kafka"; }
     std::string getTableName() const override { return table_name; }
@@ -43,6 +40,27 @@ public:
 
     void updateDependencies() override;
 
+    BufferPtr createBuffer();
+    BufferPtr claimBuffer();
+    BufferPtr tryClaimBuffer(long wait_ms);
+    void pushBuffer(BufferPtr buf);
+
+    const auto & getTopics() const { return topics; }
+    const auto & getFormatName() const { return format_name; }
+    const auto & getSchemaName() const { return schema_name; }
+    const auto & skipBroken() const { return skip_broken; }
+
+protected:
+    StorageKafka(
+        const std::string & table_name_,
+        const std::string & database_name_,
+        Context & context_,
+        const ColumnsDescription & columns_,
+        const String & brokers_, const String & group_, const Names & topics_,
+        const String & format_name_, char row_delimiter_, const String & schema_name_,
+        size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken,
+        bool intermediate_commit_);
+
 private:
     // Configuration and state
     String table_name;
@@ -77,25 +95,10 @@ private:
     std::atomic<bool> stream_cancelled{false};
 
     cppkafka::Configuration createConsumerConfiguration();
-    BufferPtr createBuffer();
-    BufferPtr claimBuffer();
-    BufferPtr tryClaimBuffer(long wait_ms);
-    void pushBuffer(BufferPtr buf);
 
     void streamThread();
     bool streamToViews();
     bool checkDependencies(const String & database_name, const String & table_name);
-
-protected:
-    StorageKafka(
-        const std::string & table_name_,
-        const std::string & database_name_,
-        Context & context_,
-        const ColumnsDescription & columns_,
-        const String & brokers_, const String & group_, const Names & topics_,
-        const String & format_name_, char row_delimiter_, const String & schema_name_,
-        size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken,
-        bool intermediate_commit_);
 };
 
 }

From 800854119e059b7213d2a3fb561458651593e4af Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Thu, 23 May 2019 14:15:18 +0300
Subject: [PATCH 023/191] Add buffer callback to fill in virtual columns

---
 dbms/src/DataStreams/OneBlockInputStream.h         |  2 +-
 dbms/src/Formats/BinaryRowInputStream.cpp          |  2 ++
 dbms/src/Formats/CSVRowInputStream.cpp             |  1 +
 dbms/src/Formats/CapnProtoRowInputStream.cpp       |  1 +
 dbms/src/Formats/FormatFactory.cpp                 | 12 ++++++++++--
 dbms/src/Formats/FormatFactory.h                   | 14 ++++++++++++--
 dbms/src/Formats/JSONEachRowRowInputStream.cpp     |  1 +
 dbms/src/Formats/NativeFormat.cpp                  |  1 +
 dbms/src/Formats/ParquetBlockInputStream.cpp       |  1 +
 dbms/src/Formats/ProtobufRowInputStream.cpp        |  1 +
 dbms/src/Formats/TSKVRowInputStream.cpp            |  1 +
 dbms/src/Formats/TabSeparatedRowInputStream.cpp    |  3 +++
 dbms/src/Formats/ValuesRowInputStream.cpp          |  1 +
 dbms/src/Interpreters/SyntaxAnalyzer.cpp           |  4 +++-
 dbms/src/Storages/ColumnsDescription.cpp           | 13 +++++++++++--
 dbms/src/Storages/ColumnsDescription.h             |  3 ++-
 dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp  | 12 +++++++++++-
 dbms/src/Storages/Kafka/KafkaBlockInputStream.h    |  1 +
 .../Storages/Kafka/ReadBufferFromKafkaConsumer.h   |  6 +++---
 19 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/dbms/src/DataStreams/OneBlockInputStream.h b/dbms/src/DataStreams/OneBlockInputStream.h
index 3f1da34fcd8..168053b4fb3 100644
--- a/dbms/src/DataStreams/OneBlockInputStream.h
+++ b/dbms/src/DataStreams/OneBlockInputStream.h
@@ -12,7 +12,7 @@ namespace DB
 class OneBlockInputStream : public IBlockInputStream
 {
 public:
-    OneBlockInputStream(const Block & block_) : block(block_) {}
+    explicit OneBlockInputStream(const Block & block_) : block(block_) {}
 
     String getName() const override { return "One"; }
 
diff --git a/dbms/src/Formats/BinaryRowInputStream.cpp b/dbms/src/Formats/BinaryRowInputStream.cpp
index c710b17ee9e..37b405c18df 100644
--- a/dbms/src/Formats/BinaryRowInputStream.cpp
+++ b/dbms/src/Formats/BinaryRowInputStream.cpp
@@ -65,6 +65,7 @@ void registerInputFormatRowBinary(FormatFactory & factory)
         const Context &,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
+        FormatFactory::BufferCallback /* callback */,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
@@ -78,6 +79,7 @@ void registerInputFormatRowBinary(FormatFactory & factory)
         const Context &,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
+        FormatFactory::BufferCallback /* callback */,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
diff --git a/dbms/src/Formats/CSVRowInputStream.cpp b/dbms/src/Formats/CSVRowInputStream.cpp
index bb348faa96d..6c118f73f01 100644
--- a/dbms/src/Formats/CSVRowInputStream.cpp
+++ b/dbms/src/Formats/CSVRowInputStream.cpp
@@ -479,6 +479,7 @@ void registerInputFormatCSV(FormatFactory & factory)
             const Context &,
             UInt64 max_block_size,
             UInt64 rows_portion_size,
+            FormatFactory::BufferCallback /* callback */,
             const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp
index 414a25cf39c..e83de3f676e 100644
--- a/dbms/src/Formats/CapnProtoRowInputStream.cpp
+++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp
@@ -307,6 +307,7 @@ void registerInputFormatCapnProto(FormatFactory & factory)
            const Context & context,
            UInt64 max_block_size,
            UInt64 rows_portion_size,
+           FormatFactory::BufferCallback /* callback */,
            const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp
index 08f0355064b..f9454ab7f65 100644
--- a/dbms/src/Formats/FormatFactory.cpp
+++ b/dbms/src/Formats/FormatFactory.cpp
@@ -27,7 +27,14 @@ const FormatFactory::Creators & FormatFactory::getCreators(const String & name)
 }
 
 
-BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & buf, const Block & sample, const Context & context, UInt64 max_block_size, UInt64 rows_portion_size) const
+BlockInputStreamPtr FormatFactory::getInput(
+    const String & name,
+    ReadBuffer & buf,
+    const Block & sample,
+    const Context & context,
+    UInt64 max_block_size,
+    UInt64 rows_portion_size,
+    BufferCallback callback) const
 {
     const auto & input_getter = getCreators(name).first;
     if (!input_getter)
@@ -47,7 +54,8 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu
     format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
     format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
 
-    return input_getter(buf, sample, context, max_block_size, rows_portion_size, format_settings);
+    return input_getter(
+        buf, sample, context, max_block_size, rows_portion_size, callback ? callback : [] {}, format_settings);
 }
 
 
diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h
index 843d866301d..79e3d98659d 100644
--- a/dbms/src/Formats/FormatFactory.h
+++ b/dbms/src/Formats/FormatFactory.h
@@ -24,6 +24,9 @@ class WriteBuffer;
   */
 class FormatFactory final : public ext::singleton<FormatFactory>
 {
+public:
+    using BufferCallback = std::function<void()>;
+
 private:
     using InputCreator = std::function<BlockInputStreamPtr(
         ReadBuffer & buf,
@@ -31,6 +34,7 @@ private:
         const Context & context,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
+        BufferCallback callback,
         const FormatSettings & settings)>;
 
     using OutputCreator = std::function<BlockOutputStreamPtr(
@@ -44,8 +48,14 @@ private:
     using FormatsDictionary = std::unordered_map<String, Creators>;
 
 public:
-    BlockInputStreamPtr getInput(const String & name, ReadBuffer & buf,
-        const Block & sample, const Context & context, UInt64 max_block_size, UInt64 rows_portion_size = 0) const;
+    BlockInputStreamPtr getInput(
+        const String & name,
+        ReadBuffer & buf,
+        const Block & sample,
+        const Context & context,
+        UInt64 max_block_size,
+        UInt64 rows_portion_size = 0,
+        BufferCallback callback = {}) const;
 
     BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf,
         const Block & sample, const Context & context) const;
diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.cpp b/dbms/src/Formats/JSONEachRowRowInputStream.cpp
index 5055d6c0c7d..30a140edace 100644
--- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp
+++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp
@@ -260,6 +260,7 @@ void registerInputFormatJSONEachRow(FormatFactory & factory)
         const Context &,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
+        FormatFactory::BufferCallback /* callback */,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
diff --git a/dbms/src/Formats/NativeFormat.cpp b/dbms/src/Formats/NativeFormat.cpp
index 88e727fdd3f..06cce134e57 100644
--- a/dbms/src/Formats/NativeFormat.cpp
+++ b/dbms/src/Formats/NativeFormat.cpp
@@ -14,6 +14,7 @@ void registerInputFormatNative(FormatFactory & factory)
         const Context &,
         UInt64 /* max_block_size */,
         UInt64 /* min_read_rows */,
+        FormatFactory::BufferCallback /* callback */,
         const FormatSettings &)
     {
         return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
diff --git a/dbms/src/Formats/ParquetBlockInputStream.cpp b/dbms/src/Formats/ParquetBlockInputStream.cpp
index a573969b65f..1cd1ca4ae40 100644
--- a/dbms/src/Formats/ParquetBlockInputStream.cpp
+++ b/dbms/src/Formats/ParquetBlockInputStream.cpp
@@ -477,6 +477,7 @@ void registerInputFormatParquet(FormatFactory & factory)
            const Context & context,
            UInt64 /* max_block_size */,
            UInt64 /* rows_portion_size */,
+           FormatFactory::BufferCallback /* callback */,
            const FormatSettings & /* settings */) { return std::make_shared<ParquetBlockInputStream>(buf, sample, context); });
 }
 
diff --git a/dbms/src/Formats/ProtobufRowInputStream.cpp b/dbms/src/Formats/ProtobufRowInputStream.cpp
index 44d830f56ea..dc658401de4 100644
--- a/dbms/src/Formats/ProtobufRowInputStream.cpp
+++ b/dbms/src/Formats/ProtobufRowInputStream.cpp
@@ -73,6 +73,7 @@ void registerInputFormatProtobuf(FormatFactory & factory)
         const Context & context,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
+        FormatFactory::BufferCallback /* callback */,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
diff --git a/dbms/src/Formats/TSKVRowInputStream.cpp b/dbms/src/Formats/TSKVRowInputStream.cpp
index ac89d5ec1c5..17038dc36ad 100644
--- a/dbms/src/Formats/TSKVRowInputStream.cpp
+++ b/dbms/src/Formats/TSKVRowInputStream.cpp
@@ -199,6 +199,7 @@ void registerInputFormatTSKV(FormatFactory & factory)
         const Context &,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
+        FormatFactory::BufferCallback /* callback */,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.cpp b/dbms/src/Formats/TabSeparatedRowInputStream.cpp
index 884bc49454f..f7fd7783725 100644
--- a/dbms/src/Formats/TabSeparatedRowInputStream.cpp
+++ b/dbms/src/Formats/TabSeparatedRowInputStream.cpp
@@ -457,6 +457,7 @@ void registerInputFormatTabSeparated(FormatFactory & factory)
             const Context &,
             UInt64 max_block_size,
             UInt64 rows_portion_size,
+            FormatFactory::BufferCallback /* callback */,
             const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
@@ -473,6 +474,7 @@ void registerInputFormatTabSeparated(FormatFactory & factory)
             const Context &,
             UInt64 max_block_size,
             UInt64 rows_portion_size,
+            FormatFactory::BufferCallback /* callback */,
             const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
@@ -489,6 +491,7 @@ void registerInputFormatTabSeparated(FormatFactory & factory)
             const Context &,
             UInt64 max_block_size,
             UInt64 rows_portion_size,
+            FormatFactory::BufferCallback /* callback */,
             const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp
index b2d972d678b..ba2a34a84ef 100644
--- a/dbms/src/Formats/ValuesRowInputStream.cpp
+++ b/dbms/src/Formats/ValuesRowInputStream.cpp
@@ -156,6 +156,7 @@ void registerInputFormatValues(FormatFactory & factory)
         const Context & context,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
+        FormatFactory::BufferCallback /* callback */,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
index a6f91356dbe..1fa874f3be5 100644
--- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp
+++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
@@ -75,8 +75,10 @@ void collectSourceColumns(const ASTSelectQuery * select_query, StoragePtr storag
 
         if (select_query)
         {
-            const auto & storage_aliases = storage->getColumns().getAliasesAndVirtuals();
+            const auto & storage_aliases = storage->getColumns().getAliases();
+            const auto & storage_virtuals = storage->getColumns().getVirtuals();
             source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end());
+            source_columns.insert(source_columns.end(), storage_virtuals.begin(), storage_virtuals.end());
         }
     }
 }
diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp
index c51807c2679..2dbe308ea57 100644
--- a/dbms/src/Storages/ColumnsDescription.cpp
+++ b/dbms/src/Storages/ColumnsDescription.cpp
@@ -246,15 +246,24 @@ NamesAndTypesList ColumnsDescription::getMaterialized() const
     return ret;
 }
 
-NamesAndTypesList ColumnsDescription::getAliasesAndVirtuals() const
+NamesAndTypesList ColumnsDescription::getAliases() const
 {
     NamesAndTypesList ret;
     for (const auto & col : columns)
-        if (col.default_desc.kind == ColumnDefaultKind::Alias || col.is_virtual)
+        if (col.default_desc.kind == ColumnDefaultKind::Alias)
             ret.emplace_back(col.name, col.type);
     return ret;
 }
 
+NamesAndTypesList ColumnsDescription::getVirtuals() const
+{
+    NamesAndTypesList result;
+    for (const auto & column : columns)
+        if (column.is_virtual)
+            result.emplace_back(column.name, column.type);
+    return result;
+}
+
 NamesAndTypesList ColumnsDescription::getAll() const
 {
     NamesAndTypesList ret;
diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h
index e7f2919c3bd..d0d042498fa 100644
--- a/dbms/src/Storages/ColumnsDescription.h
+++ b/dbms/src/Storages/ColumnsDescription.h
@@ -67,7 +67,8 @@ public:
 
     NamesAndTypesList getOrdinary() const;
     NamesAndTypesList getMaterialized() const;
-    NamesAndTypesList getAliasesAndVirtuals() const;
+    NamesAndTypesList getAliases() const;
+    NamesAndTypesList getVirtuals() const;
     NamesAndTypesList getAllPhysical() const; /// ordinary + materialized.
     NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + virtuals.
 
diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
index abc4e702a6e..c5bff5f2b1a 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
@@ -16,6 +16,8 @@ KafkaBlockInputStream::KafkaBlockInputStream(
 
     if (!storage.getSchemaName().empty())
         context.setSetting("format_schema", storage.getSchemaName());
+
+    virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneEmptyColumns();
 }
 
 KafkaBlockInputStream::~KafkaBlockInputStream()
@@ -50,8 +52,16 @@ void KafkaBlockInputStream::readPrefixImpl()
     rows_portion_size = std::max(rows_portion_size, 1ul);
 
     auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support
+    auto buffer_callback = [this]
+    {
+        const auto * sub_buffer = buffer->subBufferAs<ReadBufferFromKafkaConsumer>();
+        virtual_columns[0]->insert(sub_buffer->currentTopic());  // "topic"
+        virtual_columns[1]->insert(sub_buffer->currentKey());    // "key"
+        virtual_columns[2]->insert(sub_buffer->currentOffset()); // "offset"
+    };
+
     auto child = FormatFactory::instance().getInput(
-        storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size);
+        storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size, buffer_callback);
     child->setLimits(limits);
     addChild(child);
 
diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
index dcaec1f5066..d51100ce938 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h
@@ -28,6 +28,7 @@ private:
     UInt64 max_block_size;
 
     BufferPtr buffer;
+    MutableColumns virtual_columns;
     bool broken = true, claimed = false;
 };
 
diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
index 9bb3fd473ab..acfb88d3160 100644
--- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
+++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
@@ -35,9 +35,9 @@ public:
     auto pollTimeout() { return poll_timeout; }
 
     // Return values for the message that's being read.
-    String currentTopic() { return current[-1].get_topic(); }
-    String currentKey() { return current[-1].get_key(); }
-    auto currentOffset() { return current[-1].get_offset(); }
+    String currentTopic() const { return current[-1].get_topic(); }
+    String currentKey() const { return current[-1].get_key(); }
+    auto currentOffset() const { return current[-1].get_offset(); }
 
 private:
     using Messages = std::vector<cppkafka::Message>;

From 19a850ad7501c9f94b39b14c1bad319ecc35a41b Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Thu, 23 May 2019 16:20:25 +0300
Subject: [PATCH 024/191] Use read callback to populate virtual columns in
 Kafka Engine

---
 dbms/src/Core/Block.cpp                       |  2 ++
 dbms/src/Formats/BinaryRowInputStream.cpp     |  8 ++++----
 .../BlockInputStreamFromRowInputStream.cpp    | 11 ++++++++--
 .../BlockInputStreamFromRowInputStream.h      |  3 +++
 dbms/src/Formats/CSVRowInputStream.cpp        |  4 ++--
 dbms/src/Formats/CapnProtoRowInputStream.cpp  |  3 ++-
 dbms/src/Formats/FormatFactory.cpp            |  2 +-
 dbms/src/Formats/FormatFactory.h              |  6 +++---
 .../src/Formats/JSONEachRowRowInputStream.cpp |  4 ++--
 dbms/src/Formats/NativeFormat.cpp             |  2 +-
 dbms/src/Formats/ParquetBlockInputStream.cpp  |  2 +-
 dbms/src/Formats/ProtobufRowInputStream.cpp   |  4 ++--
 dbms/src/Formats/TSKVRowInputStream.cpp       |  4 ++--
 .../Formats/TabSeparatedRowInputStream.cpp    | 12 +++++------
 dbms/src/Formats/ValuesRowInputStream.cpp     |  4 ++--
 .../Storages/Kafka/KafkaBlockInputStream.cpp  | 20 ++++++++++++++-----
 16 files changed, 57 insertions(+), 34 deletions(-)

diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp
index 27b2cb81b09..e156d7f69f6 100644
--- a/dbms/src/Core/Block.cpp
+++ b/dbms/src/Core/Block.cpp
@@ -336,6 +336,7 @@ MutableColumns Block::mutateColumns()
 
 void Block::setColumns(MutableColumns && columns)
 {
+    /// TODO: assert if |columns| doesn't match |data|!
     size_t num_columns = data.size();
     for (size_t i = 0; i < num_columns; ++i)
         data[i].column = std::move(columns[i]);
@@ -344,6 +345,7 @@ void Block::setColumns(MutableColumns && columns)
 
 void Block::setColumns(const Columns & columns)
 {
+    /// TODO: assert if |columns| doesn't match |data|!
     size_t num_columns = data.size();
     for (size_t i = 0; i < num_columns; ++i)
         data[i].column = columns[i];
diff --git a/dbms/src/Formats/BinaryRowInputStream.cpp b/dbms/src/Formats/BinaryRowInputStream.cpp
index 37b405c18df..9177a70bb18 100644
--- a/dbms/src/Formats/BinaryRowInputStream.cpp
+++ b/dbms/src/Formats/BinaryRowInputStream.cpp
@@ -65,12 +65,12 @@ void registerInputFormatRowBinary(FormatFactory & factory)
         const Context &,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
-        FormatFactory::BufferCallback /* callback */,
+        FormatFactory::ReadCallback callback,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
             std::make_shared<BinaryRowInputStream>(buf, sample, false, false),
-            sample, max_block_size, rows_portion_size, settings);
+            sample, max_block_size, rows_portion_size, callback, settings);
     });
 
     factory.registerInputFormat("RowBinaryWithNamesAndTypes", [](
@@ -79,12 +79,12 @@ void registerInputFormatRowBinary(FormatFactory & factory)
         const Context &,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
-        FormatFactory::BufferCallback /* callback */,
+        FormatFactory::ReadCallback callback,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
             std::make_shared<BinaryRowInputStream>(buf, sample, true, true),
-            sample, max_block_size, rows_portion_size, settings);
+            sample, max_block_size, rows_portion_size, callback, settings);
     });
 }
 
diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp
index b67ce9b28cd..2c693d6ae32 100644
--- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp
+++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp
@@ -28,9 +28,15 @@ BlockInputStreamFromRowInputStream::BlockInputStreamFromRowInputStream(
     const Block & sample_,
     UInt64 max_block_size_,
     UInt64 rows_portion_size_,
+    FormatFactory::ReadCallback callback,
     const FormatSettings & settings)
-    : row_input(row_input_), sample(sample_), max_block_size(max_block_size_), rows_portion_size(rows_portion_size_),
-    allow_errors_num(settings.input_allow_errors_num), allow_errors_ratio(settings.input_allow_errors_ratio)
+    : row_input(row_input_)
+    , sample(sample_)
+    , max_block_size(max_block_size_)
+    , rows_portion_size(rows_portion_size_)
+    , read_callback(callback)
+    , allow_errors_num(settings.input_allow_errors_num)
+    , allow_errors_ratio(settings.input_allow_errors_ratio)
 {
 }
 
@@ -73,6 +79,7 @@ Block BlockInputStreamFromRowInputStream::readImpl()
                 RowReadExtension info;
                 if (!row_input->read(columns, info))
                     break;
+                read_callback();
 
                 for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx)
                 {
diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h
index 2f91aa2ecb2..98dd954fef7 100644
--- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h
+++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h
@@ -2,6 +2,7 @@
 
 #include <Core/Defines.h>
 #include <DataStreams/IBlockInputStream.h>
+#include <Formats/FormatFactory.h>
 #include <Formats/FormatSettings.h>
 #include <Formats/IRowInputStream.h>
 
@@ -24,6 +25,7 @@ public:
         const Block & sample_,
         UInt64 max_block_size_,
         UInt64 rows_portion_size_,
+        FormatFactory::ReadCallback callback,
         const FormatSettings & settings);
 
     void readPrefix() override { row_input->readPrefix(); }
@@ -45,6 +47,7 @@ private:
     Block sample;
     UInt64 max_block_size;
     UInt64 rows_portion_size;
+    FormatFactory::ReadCallback read_callback;
     BlockMissingValues block_missing_values;
 
     UInt64 allow_errors_num;
diff --git a/dbms/src/Formats/CSVRowInputStream.cpp b/dbms/src/Formats/CSVRowInputStream.cpp
index 6c118f73f01..b3731902c31 100644
--- a/dbms/src/Formats/CSVRowInputStream.cpp
+++ b/dbms/src/Formats/CSVRowInputStream.cpp
@@ -479,12 +479,12 @@ void registerInputFormatCSV(FormatFactory & factory)
             const Context &,
             UInt64 max_block_size,
             UInt64 rows_portion_size,
-            FormatFactory::BufferCallback /* callback */,
+            FormatFactory::ReadCallback callback,
             const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
                 std::make_shared<CSVRowInputStream>(buf, sample, with_names, settings),
-                sample, max_block_size, rows_portion_size, settings);
+                sample, max_block_size, rows_portion_size, callback, settings);
         });
     }
 }
diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp
index e83de3f676e..c567430e44e 100644
--- a/dbms/src/Formats/CapnProtoRowInputStream.cpp
+++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp
@@ -307,7 +307,7 @@ void registerInputFormatCapnProto(FormatFactory & factory)
            const Context & context,
            UInt64 max_block_size,
            UInt64 rows_portion_size,
-           FormatFactory::BufferCallback /* callback */,
+           FormatFactory::ReadCallback callback,
            const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
@@ -315,6 +315,7 @@ void registerInputFormatCapnProto(FormatFactory & factory)
                 sample,
                 max_block_size,
                 rows_portion_size,
+                callback,
                 settings);
         });
 }
diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp
index f9454ab7f65..fe34d621530 100644
--- a/dbms/src/Formats/FormatFactory.cpp
+++ b/dbms/src/Formats/FormatFactory.cpp
@@ -34,7 +34,7 @@ BlockInputStreamPtr FormatFactory::getInput(
     const Context & context,
     UInt64 max_block_size,
     UInt64 rows_portion_size,
-    BufferCallback callback) const
+    ReadCallback callback) const
 {
     const auto & input_getter = getCreators(name).first;
     if (!input_getter)
diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h
index 79e3d98659d..accc493fe30 100644
--- a/dbms/src/Formats/FormatFactory.h
+++ b/dbms/src/Formats/FormatFactory.h
@@ -25,7 +25,7 @@ class WriteBuffer;
 class FormatFactory final : public ext::singleton<FormatFactory>
 {
 public:
-    using BufferCallback = std::function<void()>;
+    using ReadCallback = std::function<void()>;
 
 private:
     using InputCreator = std::function<BlockInputStreamPtr(
@@ -34,7 +34,7 @@ private:
         const Context & context,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
-        BufferCallback callback,
+        ReadCallback callback,
         const FormatSettings & settings)>;
 
     using OutputCreator = std::function<BlockOutputStreamPtr(
@@ -55,7 +55,7 @@ public:
         const Context & context,
         UInt64 max_block_size,
         UInt64 rows_portion_size = 0,
-        BufferCallback callback = {}) const;
+        ReadCallback callback = {}) const;
 
     BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf,
         const Block & sample, const Context & context) const;
diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.cpp b/dbms/src/Formats/JSONEachRowRowInputStream.cpp
index 30a140edace..72acf722ae7 100644
--- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp
+++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp
@@ -260,12 +260,12 @@ void registerInputFormatJSONEachRow(FormatFactory & factory)
         const Context &,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
-        FormatFactory::BufferCallback /* callback */,
+        FormatFactory::ReadCallback callback,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
             std::make_shared<JSONEachRowRowInputStream>(buf, sample, settings),
-            sample, max_block_size, rows_portion_size, settings);
+            sample, max_block_size, rows_portion_size, callback, settings);
     });
 }
 
diff --git a/dbms/src/Formats/NativeFormat.cpp b/dbms/src/Formats/NativeFormat.cpp
index 06cce134e57..f324879608b 100644
--- a/dbms/src/Formats/NativeFormat.cpp
+++ b/dbms/src/Formats/NativeFormat.cpp
@@ -14,7 +14,7 @@ void registerInputFormatNative(FormatFactory & factory)
         const Context &,
         UInt64 /* max_block_size */,
         UInt64 /* min_read_rows */,
-        FormatFactory::BufferCallback /* callback */,
+        FormatFactory::ReadCallback /* callback */,
         const FormatSettings &)
     {
         return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
diff --git a/dbms/src/Formats/ParquetBlockInputStream.cpp b/dbms/src/Formats/ParquetBlockInputStream.cpp
index 1cd1ca4ae40..19ffa7a63f0 100644
--- a/dbms/src/Formats/ParquetBlockInputStream.cpp
+++ b/dbms/src/Formats/ParquetBlockInputStream.cpp
@@ -477,7 +477,7 @@ void registerInputFormatParquet(FormatFactory & factory)
            const Context & context,
            UInt64 /* max_block_size */,
            UInt64 /* rows_portion_size */,
-           FormatFactory::BufferCallback /* callback */,
+           FormatFactory::ReadCallback /* callback */,
            const FormatSettings & /* settings */) { return std::make_shared<ParquetBlockInputStream>(buf, sample, context); });
 }
 
diff --git a/dbms/src/Formats/ProtobufRowInputStream.cpp b/dbms/src/Formats/ProtobufRowInputStream.cpp
index dc658401de4..98ed513eb90 100644
--- a/dbms/src/Formats/ProtobufRowInputStream.cpp
+++ b/dbms/src/Formats/ProtobufRowInputStream.cpp
@@ -73,12 +73,12 @@ void registerInputFormatProtobuf(FormatFactory & factory)
         const Context & context,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
-        FormatFactory::BufferCallback /* callback */,
+        FormatFactory::ReadCallback callback,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
             std::make_shared<ProtobufRowInputStream>(buf, sample, FormatSchemaInfo(context, "Protobuf")),
-            sample, max_block_size, rows_portion_size, settings);
+            sample, max_block_size, rows_portion_size, callback, settings);
     });
 }
 
diff --git a/dbms/src/Formats/TSKVRowInputStream.cpp b/dbms/src/Formats/TSKVRowInputStream.cpp
index 17038dc36ad..d86ee22bc4b 100644
--- a/dbms/src/Formats/TSKVRowInputStream.cpp
+++ b/dbms/src/Formats/TSKVRowInputStream.cpp
@@ -199,12 +199,12 @@ void registerInputFormatTSKV(FormatFactory & factory)
         const Context &,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
-        FormatFactory::BufferCallback /* callback */,
+        FormatFactory::ReadCallback callback,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
             std::make_shared<TSKVRowInputStream>(buf, sample, settings),
-            sample, max_block_size, rows_portion_size, settings);
+            sample, max_block_size, rows_portion_size, callback, settings);
     });
 }
 
diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.cpp b/dbms/src/Formats/TabSeparatedRowInputStream.cpp
index f7fd7783725..c30749a792b 100644
--- a/dbms/src/Formats/TabSeparatedRowInputStream.cpp
+++ b/dbms/src/Formats/TabSeparatedRowInputStream.cpp
@@ -457,12 +457,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory)
             const Context &,
             UInt64 max_block_size,
             UInt64 rows_portion_size,
-            FormatFactory::BufferCallback /* callback */,
+            FormatFactory::ReadCallback callback,
             const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
                 std::make_shared<TabSeparatedRowInputStream>(buf, sample, false, false, settings),
-                sample, max_block_size, rows_portion_size, settings);
+                sample, max_block_size, rows_portion_size, callback, settings);
         });
     }
 
@@ -474,12 +474,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory)
             const Context &,
             UInt64 max_block_size,
             UInt64 rows_portion_size,
-            FormatFactory::BufferCallback /* callback */,
+            FormatFactory::ReadCallback callback,
             const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
                 std::make_shared<TabSeparatedRowInputStream>(buf, sample, true, false, settings),
-                sample, max_block_size, rows_portion_size, settings);
+                sample, max_block_size, rows_portion_size, callback, settings);
         });
     }
 
@@ -491,12 +491,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory)
             const Context &,
             UInt64 max_block_size,
             UInt64 rows_portion_size,
-            FormatFactory::BufferCallback /* callback */,
+            FormatFactory::ReadCallback callback,
             const FormatSettings & settings)
         {
             return std::make_shared<BlockInputStreamFromRowInputStream>(
                 std::make_shared<TabSeparatedRowInputStream>(buf, sample, true, true, settings),
-                sample, max_block_size, rows_portion_size, settings);
+                sample, max_block_size, rows_portion_size, callback, settings);
         });
     }
 }
diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp
index ba2a34a84ef..33799a95549 100644
--- a/dbms/src/Formats/ValuesRowInputStream.cpp
+++ b/dbms/src/Formats/ValuesRowInputStream.cpp
@@ -156,12 +156,12 @@ void registerInputFormatValues(FormatFactory & factory)
         const Context & context,
         UInt64 max_block_size,
         UInt64 rows_portion_size,
-        FormatFactory::BufferCallback /* callback */,
+        FormatFactory::ReadCallback callback,
         const FormatSettings & settings)
     {
         return std::make_shared<BlockInputStreamFromRowInputStream>(
             std::make_shared<ValuesRowInputStream>(buf, sample, context, settings),
-            sample, max_block_size, rows_portion_size, settings);
+            sample, max_block_size, rows_portion_size, callback, settings);
     });
 }
 
diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
index c5bff5f2b1a..23a3f4fe9c3 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
@@ -1,5 +1,7 @@
 #include <Storages/Kafka/KafkaBlockInputStream.h>
 
+#include <DataStreams/ConvertingBlockInputStream.h>
+#include <DataStreams/OneBlockInputStream.h>
 #include <Formats/FormatFactory.h>
 #include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
 
@@ -52,7 +54,7 @@ void KafkaBlockInputStream::readPrefixImpl()
     rows_portion_size = std::max(rows_portion_size, 1ul);
 
     auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support
-    auto buffer_callback = [this]
+    auto read_callback = [this]
     {
         const auto * sub_buffer = buffer->subBufferAs<ReadBufferFromKafkaConsumer>();
         virtual_columns[0]->insert(sub_buffer->currentTopic());  // "topic"
@@ -61,7 +63,7 @@ void KafkaBlockInputStream::readPrefixImpl()
     };
 
     auto child = FormatFactory::instance().getInput(
-        storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size, buffer_callback);
+        storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size, read_callback);
     child->setLimits(limits);
     addChild(child);
 
@@ -70,10 +72,18 @@ void KafkaBlockInputStream::readPrefixImpl()
 
 Block KafkaBlockInputStream::readImpl()
 {
-    /// FIXME: materialize MATERIALIZED columns here.
     Block block = children.back()->read();
-    /// TODO: add virtual columns here
-    return block;
+    Block virtual_block = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneWithColumns(std::move(virtual_columns));
+    virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneEmptyColumns();
+
+    for (const auto & column : virtual_block.getColumnsWithTypeAndName())
+        block.insert(column);
+
+    /// FIXME: materialize MATERIALIZED columns here.
+
+    return ConvertingBlockInputStream(
+               context, std::make_shared<OneBlockInputStream>(block), getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Name)
+        .read();
 }
 
 void KafkaBlockInputStream::readSuffixImpl()

From 1985caed8d70238948ab15e331fb83b3413c99c4 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Thu, 23 May 2019 17:25:41 +0300
Subject: [PATCH 025/191] Add test on virtual columns

---
 .../integration/test_storage_kafka/test.py    | 33 +++++++++++-
 .../test_kafka_virtual.reference              | 50 +++++++++++++++++++
 2 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference

diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py
index c67b95c1e83..c8c29dfceae 100644
--- a/dbms/tests/integration/test_storage_kafka/test.py
+++ b/dbms/tests/integration/test_storage_kafka/test.py
@@ -86,8 +86,8 @@ def kafka_produce_protobuf_messages(topic, start_index, num_messages):
 
 # Since everything is async and shaky when receiving messages from Kafka,
 # we may want to try and check results multiple times in a loop.
-def  kafka_check_result(result, check=False):
-    fpath = p.join(p.dirname(__file__), 'test_kafka_json.reference')
+def  kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'):
+    fpath = p.join(p.dirname(__file__), ref_file)
     with open(fpath) as reference:
         if check:
             assert TSV(result) == TSV(reference)
@@ -365,6 +365,35 @@ def test_kafka_flush_on_big_message(kafka_cluster):
     assert int(result) == kafka_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result)
 
 
+def test_kafka_virtual_columns(kafka_cluster):
+    instance.query('''
+        CREATE TABLE test.kafka (key UInt64, value UInt64)
+            ENGINE = Kafka
+            SETTINGS
+                kafka_broker_list = 'kafka1:19092',
+                kafka_topic_list = 'json',
+                kafka_group_name = 'json',
+                kafka_format = 'JSONEachRow';
+        ''')
+
+    messages = ''
+    for i in range(25):
+        messages += json.dumps({'key': i, 'value': i}) + '\n'
+    kafka_produce('json', [messages])
+
+    messages = ''
+    for i in range(25, 50):
+        messages += json.dumps({'key': i, 'value': i}) + '\n'
+    kafka_produce('json', [messages])
+
+    result = ''
+    for i in range(50):
+        result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka')
+        if kafka_check_result(result):
+            break
+    kafka_check_result(result, True, 'test_kafka_virtual.reference')
+
+
 if __name__ == '__main__':
     cluster.start()
     raw_input("Cluster created, press any key to destroy...")
diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference
new file mode 100644
index 00000000000..0660a969f7f
--- /dev/null
+++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference
@@ -0,0 +1,50 @@
+	0	json	0	0
+	1	json	1	0
+	2	json	2	0
+	3	json	3	0
+	4	json	4	0
+	5	json	5	0
+	6	json	6	0
+	7	json	7	0
+	8	json	8	0
+	9	json	9	0
+	10	json	10	0
+	11	json	11	0
+	12	json	12	0
+	13	json	13	0
+	14	json	14	0
+	15	json	15	0
+	16	json	16	0
+	17	json	17	0
+	18	json	18	0
+	19	json	19	0
+	20	json	20	0
+	21	json	21	0
+	22	json	22	0
+	23	json	23	0
+	24	json	24	0
+	25	json	25	1
+	26	json	26	1
+	27	json	27	1
+	28	json	28	1
+	29	json	29	1
+	30	json	30	1
+	31	json	31	1
+	32	json	32	1
+	33	json	33	1
+	34	json	34	1
+	35	json	35	1
+	36	json	36	1
+	37	json	37	1
+	38	json	38	1
+	39	json	39	1
+	40	json	40	1
+	41	json	41	1
+	42	json	42	1
+	43	json	43	1
+	44	json	44	1
+	45	json	45	1
+	46	json	46	1
+	47	json	47	1
+	48	json	48	1
+	49	json	49	1

From 946fa5b47e53900b81dc95d7f48cd8aa75ba6176 Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Sat, 25 May 2019 16:43:52 +0300
Subject: [PATCH 026/191] fix style and add tests

---
 dbms/CMakeLists.txt                           |  5 +-
 dbms/programs/client/Client.cpp               | 57 +++++++++----------
 dbms/programs/server/HTTPHandler.cpp          |  4 +-
 dbms/src/Interpreters/Context.cpp             | 26 ++++-----
 dbms/src/Interpreters/Context.h               |  8 +--
 .../ReplaceQueryParameterVisitor.cpp          | 15 ++---
 .../ReplaceQueryParameterVisitor.h            |  8 +--
 dbms/src/Interpreters/executeQuery.cpp        |  4 +-
 dbms/src/Parsers/ASTQueryParameter.cpp        |  2 +-
 dbms/src/Parsers/ASTQueryParameter.h          |  8 ++-
 dbms/src/Parsers/ExpressionElementParsers.cpp | 43 ++++++++------
 dbms/src/Parsers/ExpressionElementParsers.h   |  4 +-
 dbms/src/Parsers/Lexer.cpp                    |  4 +-
 dbms/src/Parsers/Lexer.h                      |  4 +-
 dbms/src/Parsers/tests/lexer.cpp              |  3 +
 ...00950_client_prepared_statements.reference |  3 +
 .../00950_client_prepared_statements.sh       | 19 +++++++
 .../00951_http_prepared_statements.reference  |  3 +
 .../00951_http_prepared_statements.sh         | 19 +++++++
 19 files changed, 145 insertions(+), 94 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference
 create mode 100755 dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh
 create mode 100644 dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference
 create mode 100755 dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh

diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index 926b09dc3dd..b37adf22be7 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -87,7 +87,7 @@ endif ()
 add_subdirectory (src)
 
 set(dbms_headers)
-set(dbms_sources src/Interpreters/ReplaceQueryParameterVisitor.cpp src/Interpreters/ReplaceQueryParameterVisitor.h)
+set(dbms_sources)
 
 include(../cmake/dbms_glob_sources.cmake)
 
@@ -134,6 +134,9 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h   src/TableFunctio
 list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp)
 list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h   src/Dictionaries/DictionarySourceFactory.h   src/Dictionaries/DictionaryStructure.h)
 
+list (APPEND dbms_sources src/Interpreters/ReplaceQueryParameterVisitor.cpp)
+list (APPEND dbms_headers src/Interpreters/ReplaceQueryParameterVisitor.h)
+
 add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources})
 
 if (OS_FREEBSD)
diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index 0fee78ddb21..1ea09f7fccd 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -203,7 +203,7 @@ private:
     std::list<ExternalTable> external_tables;
 
     /// Dictionary with query parameters for prepared statements.
-    NameToNameMap params_substitution;
+    NameToNameMap parameters_substitution;
 
     ConnectionParameters connection_parameters;
 
@@ -806,10 +806,10 @@ private:
         if (!parsed_query)
             return true;
 
-        if (!params_substitution.empty())
+        if (!parameters_substitution.empty())
         {
             /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-            ReplaceQueryParameterVisitor visitor(params_substitution);
+            ReplaceQueryParameterVisitor visitor(parameters_substitution);
             visitor.visit(parsed_query);
 
             /// Get new query after substitutions.
@@ -1550,11 +1550,11 @@ private:
         std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
     }
 
-    static std::pair<String, String> parseParam(const String & s)
+    static std::pair<String, String> parseParameter(const String & s)
     {
         size_t pos = s.find('_') + 1;
         /// Cut two first dash "--" and divide arg from name and value
-        return std::make_pair(s.substr(2, pos - 2), s.substr(pos));
+        return {s.substr(2, pos - 2), s.substr(pos)};
     }
 
 public:
@@ -1574,7 +1574,7 @@ public:
 
         Arguments common_arguments{""};        /// 0th argument is ignored.
         std::vector<Arguments> external_tables_arguments;
-        std::vector<Arguments> param_arguments;
+        std::vector<Arguments> parameter_arguments;
 
         bool in_external_group = false;
         for (int arg_num = 1; arg_num < argc; ++arg_num)
@@ -1621,8 +1621,8 @@ public:
                 /// Parameter arg after underline.
                 if (startsWith(arg, "--param_"))
                 {
-                    param_arguments.emplace_back(Arguments{""});
-                    param_arguments.back().emplace_back(arg);
+                    parameter_arguments.emplace_back(Arguments{""});
+                    parameter_arguments.back().emplace_back(arg);
                 }
                 else
                     common_arguments.emplace_back(arg);
@@ -1702,36 +1702,30 @@ public:
         ;
 
         /// Parse commandline options related to prepared statements.
-        po::options_description param_description("Query parameters options");
-        param_description.add_options()
-                ("param_", po::value<std::string>(), "name and value of substitution")
+        po::options_description parameter_description("Query parameters options");
+        parameter_description.add_options()
+                ("param_", po::value<std::string>(), "name and value of substitution, with syntax --param_name=value")
         ;
 
-        for (size_t i = 0; i < param_arguments.size(); ++i)
+        for (size_t i = 0; i < parameter_arguments.size(); ++i)
         {
-            po::parsed_options parsed_param = po::command_line_parser(
-                    param_arguments[i].size(), param_arguments[i].data()).options(param_description).extra_parser(
-                    parseParam).run();
-            po::variables_map param_options;
-            po::store(parsed_param, param_options);
+            po::parsed_options parsed_parameter = po::command_line_parser(
+                    parameter_arguments[i].size(), parameter_arguments[i].data()).options(parameter_description).extra_parser(
+                    parseParameter).run();
+            po::variables_map parameter_options;
+            po::store(parsed_parameter, parameter_options);
 
             /// Save name and values of substitution in dictionary.
-            try {
-                String param = param_options["param_"].as<std::string>();
-                size_t pos = param.find('=');
-                if (pos != String::npos && pos + 1 != param.size())
-                {
-                    if (!params_substitution.insert({param.substr(0, pos), param.substr(pos + 1)}).second)
-                        throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
-                } else
-                    throw Exception("Expected parameter field as --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
-            }
-            catch (const Exception & e)
+            String parameter = parameter_options["param_"].as<std::string>();
+            size_t pos = parameter.find('=');
+            if (pos != String::npos && pos + 1 != parameter.size())
             {
-                std::string text = e.displayText();
-                std::cerr << "Code: " << e.code() << ". " << text << std::endl;
-                exit(e.code());
+                const String name = parameter.substr(0, pos);
+                if (!parameters_substitution.insert({name, parameter.substr(pos + 1)}).second)
+                    throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS);
             }
+            else
+                throw Exception("Expected parameter field as --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
         }
 
         /// Parse main commandline options.
@@ -1758,6 +1752,7 @@ public:
             || (options.count("host") && options["host"].as<std::string>() == "elp"))    /// If user writes -help instead of --help.
         {
             std::cout << main_description << "\n";
+            std::cout << parameter_description << "\n";
             std::cout << external_description << "\n";
             exit(0);
         }
diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp
index 04a3e25a1c5..fdc2823b160 100644
--- a/dbms/programs/server/HTTPHandler.cpp
+++ b/dbms/programs/server/HTTPHandler.cpp
@@ -514,8 +514,8 @@ void HTTPHandler::processQuery(
         else if (startsWith(it->first, "param_"))
         {
             /// Save name and values of substitution in dictionary.
-            String param_name = it->first.substr(strlen("param_"));
-            context.setParamSubstitution(param_name, it->second);
+            const String parameter_name = it->first.substr(strlen("param_"));
+            context.setParameterSubstitution(parameter_name, it->second);
         }
         else
         {
diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index 32bca217ef1..ef51432b211 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -1866,25 +1866,25 @@ Context::SampleBlockCache & Context::getSampleBlockCache() const
 }
 
 
-bool Context::checkEmptyParamSubstitution() const
+bool Context::hasQueryParameters() const
 {
-    return params_substitution.empty();
+    return !parameters_substitution.empty();
 }
 
 
-void Context::setParamSubstitution(const String & name, const String & value)
+NameToNameMap Context::getParameterSubstitution() const
+{
+    if (hasQueryParameters())
+        return parameters_substitution;
+    throw Exception("Query without parameters", ErrorCodes::LOGICAL_ERROR);
+}
+
+
+void Context::setParameterSubstitution(const String & name, const String & value)
 {
     auto lock = getLock();
-    if (!params_substitution.insert({name, value}).second)
-        throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
-}
-
-
-NameToNameMap Context::getParamSubstitution() const
-{
-    if (!params_substitution.empty())
-        return params_substitution;
-    throw Exception("Context haven't query parameters", ErrorCodes::LOGICAL_ERROR);
+    if (!parameters_substitution.insert({name, value}).second)
+        throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS);
 }
 
 
diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h
index 13079b37c62..ddad2566e1f 100644
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@@ -145,7 +145,7 @@ private:
     using DatabasePtr = std::shared_ptr<IDatabase>;
     using Databases = std::map<String, std::shared_ptr<IDatabase>>;
 
-    NameToNameMap params_substitution;   /// Dictionary with query parameters for prepared statements.
+    NameToNameMap parameters_substitution;   /// Dictionary with query parameters for prepared statements.
                                                      /// (key=name, value)
 
     IHostContextPtr host_context;  /// Arbitrary object that may used to attach some host specific information to query context,
@@ -471,9 +471,9 @@ public:
     SampleBlockCache & getSampleBlockCache() const;
 
     /// Query parameters for prepared statements.
-    bool checkEmptyParamSubstitution() const;
-    NameToNameMap getParamSubstitution() const;
-    void setParamSubstitution(const String & name, const String & value);
+    bool hasQueryParameters() const;
+    NameToNameMap getParameterSubstitution() const;
+    void setParameterSubstitution(const String & name, const String & value);
 
 #if USE_EMBEDDED_COMPILER
     std::shared_ptr<CompiledExpressionCache> getCompiledExpressionCache() const;
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index c60706cd1ef..9c77eb9d649 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -12,13 +12,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int UNKNOWN_IDENTIFIER;
-    extern const int LOGICAL_ERROR;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
 void ReplaceQueryParameterVisitor::visit(ASTPtr & ast)
 {
     for (auto & child : ast->children)
@@ -32,11 +25,11 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast)
 
 String ReplaceQueryParameterVisitor::getParamValue(const String & name)
 {
-    auto search = params_substitution.find(name);
-    if (search != params_substitution.end())
+    auto search = parameters_substitution.find(name);
+    if (search != parameters_substitution.end())
         return search->second;
     else
-        throw Exception("Expected same names in parameter field --param_{name}={value} and in query {name:type}", ErrorCodes::BAD_ARGUMENTS);
+        throw Exception("Expected name " + name + " in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS);
 }
 
 void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
@@ -52,7 +45,7 @@ void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
     data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings);
 
     Field field = temp_column[0];
-    ast = std::make_shared<ASTLiteral>(field);
+    ast = std::make_shared<ASTLiteral>(std::move(field));
 }
 
 }
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
index df97a408d6f..c6af66c0eef 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
@@ -12,16 +12,16 @@ class ASTQueryParameter;
 class ReplaceQueryParameterVisitor
 {
 public:
-    ReplaceQueryParameterVisitor(const NameToNameMap & params)
-    :   params_substitution(params)
+    ReplaceQueryParameterVisitor(const NameToNameMap & parameters)
+        : parameters_substitution(parameters)
     {}
 
     void visit(ASTPtr & ast);
 
 private:
-    const NameToNameMap params_substitution;
-    void visitQP(ASTPtr & ast);
+    const NameToNameMap parameters_substitution;
     String getParamValue(const String & name);
+    void visitQP(ASTPtr & ast);
 };
 
 }
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 1ccde2bebb6..fa233f66cbe 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -170,10 +170,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         /// TODO Parser should fail early when max_query_size limit is reached.
         ast = parseQuery(parser, begin, end, "", max_query_size);
 
-        if (!context.checkEmptyParamSubstitution())    /// Avoid change from TCPHandler.
+        if (context.hasQueryParameters())    /// Avoid change from TCPHandler.
         {
             /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-            ReplaceQueryParameterVisitor visitor(context.getParamSubstitution());
+            ReplaceQueryParameterVisitor visitor(context.getParameterSubstitution());
             visitor.visit(ast);
         }
 
diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp
index 559dbe8802d..1dd14a38d05 100644
--- a/dbms/src/Parsers/ASTQueryParameter.cpp
+++ b/dbms/src/Parsers/ASTQueryParameter.cpp
@@ -7,7 +7,7 @@ namespace DB
 
 void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const
 {
-    String name_type = name + type;
+    String name_type = name + ':' + type;
     settings.ostr << name_type;
 }
 
diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h
index f6645b4876a..19c54aa83b8 100644
--- a/dbms/src/Parsers/ASTQueryParameter.h
+++ b/dbms/src/Parsers/ASTQueryParameter.h
@@ -6,16 +6,18 @@
 namespace DB
 {
 
-/// Query parameter: name and type.
+/// Parameter in query with name and type of substitution ({name:type}).
+/// Example: SELECT * FROM table WHERE id = {pid:UInt16}.
 class ASTQueryParameter : public ASTWithAlias
 {
 public:
-    String name, type;
+    String name;
+    String type;
 
     ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {}
 
     /** Get the text that identifies this element. */
-    String getID(char delim) const override { return "QueryParameter" + (delim + name + delim + type); }
+    String getID(char delim) const override { return "QueryParameter" + (delim + name + ':' + type); }
 
     ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); }
 
diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp
index f14f37802c2..63ab0a108ea 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.cpp
+++ b/dbms/src/Parsers/ExpressionElementParsers.cpp
@@ -1200,16 +1200,23 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
 }
 
 
-bool ParserSubstitutionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    if (pos->type != TokenType::OpeningFiguredBracket)
+    if (pos->type != TokenType::OpeningCurlyBrace)
         return false;
 
-    auto old_pos = ++pos;
-    String s_name, s_type;
+    String name;
+    String type;
+    ++pos;
 
-    while (pos.isValid() && pos->type != TokenType::Colon)
-        ++pos;
+    if (pos->type != TokenType::BareWord)
+    {
+        expected.add(pos, "string literal");
+        return false;
+    }
+
+    name = String(pos->begin, pos->end);
+    ++pos;
 
     if (pos->type != TokenType::Colon)
     {
@@ -1217,21 +1224,25 @@ bool ParserSubstitutionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected
         return false;
     }
 
-    s_name = String(old_pos->begin, pos->begin);
-    old_pos = ++pos;
+    ++pos;
 
-    while (pos.isValid() && pos->type != TokenType::ClosingFiguredBracket)
-        ++pos;
-
-    if (pos->type != TokenType::ClosingFiguredBracket)
+    if (pos->type != TokenType::BareWord)
     {
-        expected.add(pos, "closing figured bracket");
+        expected.add(pos, "string literal");
         return false;
     }
 
-    s_type = String(old_pos->begin, pos->begin);
+    type = String(pos->begin, pos->end);
     ++pos;
-    node = std::make_shared<ASTQueryParameter>(s_name, s_type);
+
+    if (pos->type != TokenType::ClosingCurlyBrace)
+    {
+        expected.add(pos, "closing curly brace");
+        return false;
+    }
+
+    ++pos;
+    node = std::make_shared<ASTQueryParameter>(name, type);
     return true;
 }
 
@@ -1256,7 +1267,7 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
         || ParserQualifiedAsterisk().parse(pos, node, expected)
         || ParserAsterisk().parse(pos, node, expected)
         || ParserCompoundIdentifier().parse(pos, node, expected)
-        || ParserSubstitutionExpression().parse(pos, node, expected);
+        || ParserSubstitution().parse(pos, node, expected);
 }
 
 
diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h
index d10670ec888..b4fe77e8bb3 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.h
+++ b/dbms/src/Parsers/ExpressionElementParsers.h
@@ -242,10 +242,10 @@ private:
 };
 
 
-/** A substitution expression.
+/** Prepared statements.
   * Parse query with parameter expression {name:type}.
   */
-class ParserSubstitutionExpression : public IParserBase
+class ParserSubstitution : public IParserBase
 {
 protected:
     const char * getName() const { return "substitution"; }
diff --git a/dbms/src/Parsers/Lexer.cpp b/dbms/src/Parsers/Lexer.cpp
index 3e33759440d..fe56dfadd5b 100644
--- a/dbms/src/Parsers/Lexer.cpp
+++ b/dbms/src/Parsers/Lexer.cpp
@@ -174,9 +174,9 @@ Token Lexer::nextTokenImpl()
         case ']':
             return Token(TokenType::ClosingSquareBracket, token_begin, ++pos);
         case '{':
-            return Token(TokenType::OpeningFiguredBracket, token_begin, ++pos);
+            return Token(TokenType::OpeningCurlyBrace, token_begin, ++pos);
         case '}':
-            return Token(TokenType::ClosingFiguredBracket, token_begin, ++pos);
+            return Token(TokenType::ClosingCurlyBrace, token_begin, ++pos);
         case ',':
             return Token(TokenType::Comma, token_begin, ++pos);
         case ';':
diff --git a/dbms/src/Parsers/Lexer.h b/dbms/src/Parsers/Lexer.h
index 021b6ae7ed3..3f2712bae08 100644
--- a/dbms/src/Parsers/Lexer.h
+++ b/dbms/src/Parsers/Lexer.h
@@ -23,8 +23,8 @@ namespace DB
     M(OpeningSquareBracket) \
     M(ClosingSquareBracket) \
     \
-    M(OpeningFiguredBracket) \
-    M(ClosingFiguredBracket) \
+    M(OpeningCurlyBrace) \
+    M(ClosingCurlyBrace) \
     \
     M(Comma) \
     M(Semicolon) \
diff --git a/dbms/src/Parsers/tests/lexer.cpp b/dbms/src/Parsers/tests/lexer.cpp
index ccc97298ed8..d9135b08c28 100644
--- a/dbms/src/Parsers/tests/lexer.cpp
+++ b/dbms/src/Parsers/tests/lexer.cpp
@@ -28,6 +28,8 @@ std::map<TokenType, const char *> hilite =
     {TokenType::ClosingRoundBracket, "\033[1;33m"},
     {TokenType::OpeningSquareBracket, "\033[1;33m"},
     {TokenType::ClosingSquareBracket, "\033[1;33m"},
+    {TokenType::OpeningCurlyBrace, "\033[1;33m"},
+    {TokenType::ClosingCurlyBrace, "\033[1;33m"},
 
     {TokenType::Comma, "\033[1;33m"},
     {TokenType::Semicolon, "\033[1;33m"},
@@ -76,6 +78,7 @@ int main(int, char **)
 
         if (token.isEnd())
             break;
+
         writeChar(' ', out);
 
         auto it = hilite.find(token.type);
diff --git a/dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference
new file mode 100644
index 00000000000..8b9a188f51e
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference
@@ -0,0 +1,3 @@
+1	Hello, world
+1	Hello, world
+2	test
diff --git a/dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh
new file mode 100755
index 00000000000..d9d057aceec
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps";
+$CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String) ENGINE = Memory";
+
+$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world')";
+$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test')";
+
+$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1\
+	-q "SELECT * FROM ps WHERE i = {id:UInt8}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world'\
+	-q "SELECT * FROM ps WHERE s = {phrase:String}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test'\
+	-q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE ps";
diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference
new file mode 100644
index 00000000000..8b9a188f51e
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference
@@ -0,0 +1,3 @@
+1	Hello, world
+1	Hello, world
+2	test
diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh
new file mode 100755
index 00000000000..cc17e5e7b2b
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE IF EXISTS ps";
+${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String) ENGINE = Memory";
+
+${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world')";
+${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test')";
+
+${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_id=1"\
+	-d "SELECT * FROM ps WHERE i = {id:UInt8}";
+${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\
+	-d "SELECT * FROM ps WHERE s = {phrase:String}";
+${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_id=2&param_phrase=test"\
+	-d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
+
+${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps";

From 461fb1eaa8cc4effdcbb6041e2d2058868f34f5a Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Sun, 26 May 2019 01:51:21 +0300
Subject: [PATCH 027/191] fix test

---
 dbms/programs/client/Client.cpp                      |  1 +
 .../0_stateless/00951_http_prepared_statements.sh    | 12 ++++++------
 ...ce => 00952_client_prepared_statements.reference} |  0
 ...ements.sh => 00952_client_prepared_statements.sh} |  0
 4 files changed, 7 insertions(+), 6 deletions(-)
 rename dbms/tests/queries/0_stateless/{00950_client_prepared_statements.reference => 00952_client_prepared_statements.reference} (100%)
 rename dbms/tests/queries/0_stateless/{00950_client_prepared_statements.sh => 00952_client_prepared_statements.sh} (100%)

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index a2f4d5abdaa..70609ed7f62 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -1553,6 +1553,7 @@ private:
     static std::pair<String, String> parseParameter(const String & s)
     {
         size_t pos = s.find('_') + 1;
+        /// String begins with "--param_", so check is no needed
         /// Cut two first dash "--" and divide arg from name and value
         return {s.substr(2, pos - 2), s.substr(pos)};
     }
diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh
index cc17e5e7b2b..b54fdd939c8 100755
--- a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh
@@ -9,11 +9,11 @@ ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String) E
 ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world')";
 ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test')";
 
-${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_id=1"\
-	-d "SELECT * FROM ps WHERE i = {id:UInt8}";
-${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\
-	-d "SELECT * FROM ps WHERE s = {phrase:String}";
-${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_id=2&param_phrase=test"\
-	-d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1"\
+	-d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s";
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\
+	-d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s";
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2&param_phrase=test"\
+	-d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s";
 
 ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps";
diff --git a/dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference
similarity index 100%
rename from dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference
rename to dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference
diff --git a/dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh
similarity index 100%
rename from dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh
rename to dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh

From d2fd7a449f37be4857d53c19c5ab99c76372bd8e Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Sun, 26 May 2019 23:10:43 +0300
Subject: [PATCH 028/191] Fix build

---
 dbms/src/Formats/tests/block_row_transforms.cpp  | 2 +-
 dbms/src/Formats/tests/tab_separated_streams.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Formats/tests/block_row_transforms.cpp b/dbms/src/Formats/tests/block_row_transforms.cpp
index c880ff7fc39..9d38a37f833 100644
--- a/dbms/src/Formats/tests/block_row_transforms.cpp
+++ b/dbms/src/Formats/tests/block_row_transforms.cpp
@@ -45,7 +45,7 @@ try
     FormatSettings format_settings;
 
     RowInputStreamPtr row_input = std::make_shared<TabSeparatedRowInputStream>(in_buf, sample, false, false, format_settings);
-    BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, format_settings);
+    BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, []{}, format_settings);
     RowOutputStreamPtr row_output = std::make_shared<TabSeparatedRowOutputStream>(out_buf, sample, false, false, format_settings);
     BlockOutputStreamFromRowOutputStream block_output(row_output, sample);
 
diff --git a/dbms/src/Formats/tests/tab_separated_streams.cpp b/dbms/src/Formats/tests/tab_separated_streams.cpp
index 50b9350d4c5..11895699c3b 100644
--- a/dbms/src/Formats/tests/tab_separated_streams.cpp
+++ b/dbms/src/Formats/tests/tab_separated_streams.cpp
@@ -42,7 +42,7 @@ try
     RowInputStreamPtr row_input = std::make_shared<TabSeparatedRowInputStream>(in_buf, sample, false, false, format_settings);
     RowOutputStreamPtr row_output = std::make_shared<TabSeparatedRowOutputStream>(out_buf, sample, false, false, format_settings);
 
-    BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, format_settings);
+    BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, []{}, format_settings);
     BlockOutputStreamFromRowOutputStream block_output(row_output, sample);
 
     copyData(block_input, block_output);

From 13212c9b01b3858e552ef756d31b6b61ad3bc032 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Mon, 27 May 2019 20:25:34 +0300
Subject: [PATCH 029/191] Fix the hang on dropping Kafka table when there is no
 mat. views

---
 .../Storages/Kafka/ReadBufferFromKafkaConsumer.cpp   | 12 ++++++++++--
 .../src/Storages/Kafka/ReadBufferFromKafkaConsumer.h |  1 +
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
index 699fcded737..b3357b0f1e5 100644
--- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
+++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
@@ -3,6 +3,16 @@
 namespace DB
 {
 
+using namespace std::chrono_literals;
+
+ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer()
+{
+    /// NOTE: see https://github.com/edenhill/librdkafka/issues/2077
+    consumer->unsubscribe();
+    consumer->unassign();
+    while(consumer->get_consumer_queue().next_event(1s));
+}
+
 void ReadBufferFromKafkaConsumer::commit()
 {
     if (messages.empty() || current == messages.begin())
@@ -20,8 +30,6 @@ void ReadBufferFromKafkaConsumer::subscribe(const Names & topics)
     // If we're doing a manual select then it's better to get something after a wait, then immediate nothing.
     if (consumer->get_subscription().empty())
     {
-        using namespace std::chrono_literals;
-
         consumer->pause(); // don't accidentally read any messages
         consumer->subscribe(topics);
         consumer->poll(5s);
diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
index acfb88d3160..a637593e10a 100644
--- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
+++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
@@ -27,6 +27,7 @@ public:
         , current(messages.begin())
     {
     }
+    ~ReadBufferFromKafkaConsumer() override;
 
     void commit(); // Commit all processed messages.
     void subscribe(const Names & topics); // Subscribe internal consumer to topics.

From 1eccbc39c5f514188d54af51d638be7c3268f6b6 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Tue, 28 May 2019 00:01:24 +0300
Subject: [PATCH 030/191] Don't add virtual column to empty block

---
 dbms/src/DataStreams/ConvertingBlockInputStream.cpp | 2 +-
 dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp
index 4c78aeb7ce5..49283278bf4 100644
--- a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp
+++ b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp
@@ -60,7 +60,7 @@ ConvertingBlockInputStream::ConvertingBlockInputStream(
                 if (input_header.has(res_elem.name))
                     conversion[result_col_num] = input_header.getPositionByName(res_elem.name);
                 else
-                    throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream",
+                    throw Exception("Cannot find column " + backQuote(res_elem.name) + " in source stream",
                         ErrorCodes::THERE_IS_NO_COLUMN);
                 break;
         }
diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
index 23a3f4fe9c3..396b9edb52b 100644
--- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp
@@ -73,6 +73,9 @@ void KafkaBlockInputStream::readPrefixImpl()
 Block KafkaBlockInputStream::readImpl()
 {
     Block block = children.back()->read();
+    if (!block)
+        return block;
+
     Block virtual_block = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneWithColumns(std::move(virtual_columns));
     virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneEmptyColumns();
 

From 6b1a9e0e52b58a015c918faf193c444ec892a89d Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Tue, 28 May 2019 15:22:10 +0300
Subject: [PATCH 031/191] Fix comment

---
 dbms/src/Storages/IStorage.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h
index 8f1a7b06d9e..9f3a499e1d7 100644
--- a/dbms/src/Storages/IStorage.h
+++ b/dbms/src/Storages/IStorage.h
@@ -116,7 +116,6 @@ protected: /// still thread-unsafe part.
 
     /// Returns whether the column is virtual - by default all columns are real.
     /// Initially reserved virtual column name may be shadowed by real column.
-    /// Returns false even for non-existent non-virtual columns.
     virtual bool isVirtualColumn(const String & column_name) const;
 
 private:

From 8326021d7444f9b21772cae908f034d81898b115 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Mon, 3 Jun 2019 17:36:59 +0300
Subject: [PATCH 032/191] Fix style

---
 dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
index b3357b0f1e5..9eacdce59e1 100644
--- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
+++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
@@ -10,7 +10,7 @@ ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer()
     /// NOTE: see https://github.com/edenhill/librdkafka/issues/2077
     consumer->unsubscribe();
     consumer->unassign();
-    while(consumer->get_consumer_queue().next_event(1s));
+    while (consumer->get_consumer_queue().next_event(1s));
 }
 
 void ReadBufferFromKafkaConsumer::commit()

From d97c2ccdc8a4445707496ac7b9700d21acf5ddc2 Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Tue, 4 Jun 2019 21:15:32 +0300
Subject: [PATCH 033/191] support complex datatype

---
 .../ReplaceQueryParameterVisitor.cpp          | 12 +++++--
 dbms/src/Interpreters/executeQuery.cpp        |  4 +++
 dbms/src/Parsers/ExpressionElementParsers.cpp | 16 ++++++---
 .../00951_http_prepared_statements.reference  |  3 --
 ...00952_client_prepared_statements.reference |  3 --
 .../00952_client_prepared_statements.sh       | 19 -----------
 .../00953_http_prepared_statements.reference  |  4 +++
 ...s.sh => 00953_http_prepared_statements.sh} | 14 ++++----
 ...00954_client_prepared_statements.reference |  4 +++
 .../00954_client_prepared_statements.sh       | 21 ++++++++++++
 ...0955_complex_prepared_statements.reference |  5 +++
 .../00955_complex_prepared_statements.sh      | 33 +++++++++++++++++++
 12 files changed, 101 insertions(+), 37 deletions(-)
 delete mode 100644 dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference
 delete mode 100644 dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference
 delete mode 100755 dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh
 create mode 100644 dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference
 rename dbms/tests/queries/0_stateless/{00951_http_prepared_statements.sh => 00953_http_prepared_statements.sh} (57%)
 create mode 100644 dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
 create mode 100755 dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
 create mode 100644 dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
 create mode 100644 dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh

diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index 9c77eb9d649..27b0e32a354 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -1,4 +1,6 @@
+#include <boost/algorithm/string/replace.hpp>
 #include <Common/typeid_cast.h>
+#include <Common/StringUtils/StringUtils.h>
 #include <Columns/IColumn.h>
 #include <Core/Field.h>
 #include <DataTypes/IDataType.h>
@@ -35,9 +37,15 @@ String ReplaceQueryParameterVisitor::getParamValue(const String & name)
 void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
 {
     auto ast_param = ast->as<ASTQueryParameter>();
-    String value = getParamValue(ast_param->name);
-    const auto data_type = DataTypeFactory::instance().get(ast_param->type);
+    const String value = getParamValue(ast_param->name);
+    String type = ast_param->type;
 
+    /// Replacing all occurrences of types Date and DateTime with String.
+    /// String comparison is used in "WHERE" conditions with this types.
+    boost::replace_all(type, "DateTime", "String");
+    boost::replace_all(type, "Date", "String");
+
+    const auto data_type = DataTypeFactory::instance().get(type);
     auto temp_column_ptr = data_type->createColumn();
     IColumn & temp_column = *temp_column_ptr;
     ReadBufferFromString read_buffer{value};
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 0e780b5d486..1a508dc637c 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -208,6 +208,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
     try
     {
+        if (context.hasQueryParameters())    /// Avoid change from TCPHandler.
+            /// Get new query after substitutions.
+            query = serializeAST(*ast);
+
         logQuery(query.substr(0, settings.log_queries_cut_to_length), context, internal);
 
         /// Check the limits.
diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp
index 63ab0a108ea..0bd9ac8c639 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.cpp
+++ b/dbms/src/Parsers/ExpressionElementParsers.cpp
@@ -1211,7 +1211,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
 
     if (pos->type != TokenType::BareWord)
     {
-        expected.add(pos, "string literal");
+        expected.add(pos, "substitution name (identifier)");
         return false;
     }
 
@@ -1228,12 +1228,20 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
 
     if (pos->type != TokenType::BareWord)
     {
-        expected.add(pos, "string literal");
+        expected.add(pos, "substitution type");
         return false;
     }
 
-    type = String(pos->begin, pos->end);
-    ++pos;
+    auto old_pos = pos;
+
+    while ((pos->type == TokenType::OpeningRoundBracket || pos->type == TokenType::ClosingRoundBracket
+        || pos->type == TokenType::Comma || pos->type == TokenType::BareWord)
+        && pos->type != TokenType::ClosingCurlyBrace)
+    {
+        ++pos;
+    }
+
+    type = String(old_pos->begin, pos->begin);
 
     if (pos->type != TokenType::ClosingCurlyBrace)
     {
diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference
deleted file mode 100644
index 8b9a188f51e..00000000000
--- a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference
+++ /dev/null
@@ -1,3 +0,0 @@
-1	Hello, world
-1	Hello, world
-2	test
diff --git a/dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference
deleted file mode 100644
index 8b9a188f51e..00000000000
--- a/dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference
+++ /dev/null
@@ -1,3 +0,0 @@
-1	Hello, world
-1	Hello, world
-2	test
diff --git a/dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh
deleted file mode 100755
index d9d057aceec..00000000000
--- a/dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-. $CURDIR/../shell_config.sh
-
-$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps";
-$CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String) ENGINE = Memory";
-
-$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world')";
-$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test')";
-
-$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1\
-	-q "SELECT * FROM ps WHERE i = {id:UInt8}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world'\
-	-q "SELECT * FROM ps WHERE s = {phrase:String}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test'\
-	-q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
-
-$CLICKHOUSE_CLIENT -q "DROP TABLE ps";
diff --git a/dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference
new file mode 100644
index 00000000000..28323dae39b
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference
@@ -0,0 +1,4 @@
+1	Hello, world	2005-05-05
+1	Hello, world	2005-05-05
+2	test	2019-05-25
+2	test	2019-05-25
diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh
similarity index 57%
rename from dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh
rename to dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh
index b54fdd939c8..23f47e75e1c 100755
--- a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh
@@ -4,16 +4,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . $CURDIR/../shell_config.sh
 
 ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE IF EXISTS ps";
-${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String) ENGINE = Memory";
+${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String, d Date) ENGINE = Memory";
 
-${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world')";
-${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test')";
+${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05')";
+${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test', '2019-05-25')";
 
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1"\
-	-d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s";
+    -d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s, d";
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\
-	-d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s";
+    -d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s, d";
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_date=2019-05-25"\
+    -d "SELECT * FROM ps WHERE d = {date:Date} ORDER BY i, s, d";
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2&param_phrase=test"\
-	-d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s";
+    -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s, d";
 
 ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps";
diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
new file mode 100644
index 00000000000..c7cafaefba8
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
@@ -0,0 +1,4 @@
+1	Hello, world	2005-05-05 05:05:05
+1	Hello, world	2005-05-05 05:05:05
+2	test	2005-05-25 15:00:00
+2	test	2005-05-25 15:00:00
diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
new file mode 100755
index 00000000000..451ea9cbd2c
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps";
+$CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String, d DateTime) ENGINE = Memory";
+
+$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05 05:05:05')";
+$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test', '2005-05-25 15:00:00')";
+
+$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1\
+    -q "SELECT * FROM ps WHERE i = {id:UInt8}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world'\
+    -q "SELECT * FROM ps WHERE s = {phrase:String}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00'\
+    -q "SELECT * FROM ps WHERE d = {date:DateTime}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test'\
+    -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE ps";
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
new file mode 100644
index 00000000000..a37855d2cb8
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
@@ -0,0 +1,5 @@
+(1,'Hello')
+(1,('dt',2))
+[10,10,10]
+[[10],[10],[10]]
+[10,10,10]	[[10],[10],[10]]	(10,'Test')	(10,('dt',10))	2015-02-15
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
new file mode 100644
index 00000000000..4ea005c5d3a
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps";
+$CLICKHOUSE_CLIENT -q "CREATE TABLE ps (
+    a Array(UInt32), da Array(Array(UInt8)),
+    t Tuple(Int16, String), dt Tuple(UInt8, Tuple(String, UInt8)),
+    n Nullable(Date)
+    ) ENGINE = Memory";
+
+$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (
+    [1, 2], [[1, 1], [2, 2]],
+    (1, 'Hello'), (1, ('dt', 2)),
+    NULL)";
+$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (
+    [10, 10, 10], [[10], [10], [10]],
+    (10, 'Test'), (10, ('dt', 10)),
+    '2015-02-15')";
+
+$CLICKHOUSE_CLIENT --max_threads=1 --param_aui="[1, 2]"\
+    -q "SELECT t FROM ps WHERE a = {aui:Array(UInt16)}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]"\
+    -q "SELECT dt FROM ps WHERE da = {d_a:Array(Array(UInt8))}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')"\
+    -q "SELECT a FROM ps WHERE t = {tisd:Tuple(Int16, String)}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10)))"\
+    -q "SELECT da FROM ps WHERE dt = {d_t:Tuple(UInt8, Tuple(String, UInt8))}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15"\
+    -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}";
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE ps";

From f3ef4666e7912dbd75b1e8efcc185b8c9c1d255e Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Tue, 4 Jun 2019 23:15:44 +0300
Subject: [PATCH 034/191] fix

---
 dbms/programs/server/HTTPHandler.cpp                        | 6 +++---
 .../0_stateless/00955_complex_prepared_statements.sh        | 0
 2 files changed, 3 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh

diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp
index 690c426f929..9b1160d9796 100644
--- a/dbms/programs/server/HTTPHandler.cpp
+++ b/dbms/programs/server/HTTPHandler.cpp
@@ -501,11 +501,11 @@ void HTTPHandler::processQuery(
         else if (param_could_be_skipped(key))
         {
         }
-        else if (startsWith(it->first, "param_"))
+        else if (startsWith(key, "param_"))
         {
             /// Save name and values of substitution in dictionary.
-            const String parameter_name = it->first.substr(strlen("param_"));
-            context.setParameterSubstitution(parameter_name, it->second);
+            const String parameter_name = key.substr(strlen("param_"));
+            context.setParameterSubstitution(parameter_name, value);
         }
         else
         {
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
old mode 100644
new mode 100755

From 8d91419a2533bee81ba8e4ba5a2890daf0cd69df Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Wed, 5 Jun 2019 17:07:50 +0300
Subject: [PATCH 035/191] fix

---
 dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp           | 1 -
 ...ements.reference => 00956_http_prepared_statements.reference} | 0
 ..._prepared_statements.sh => 00956_http_prepared_statements.sh} | 0
 3 files changed, 1 deletion(-)
 rename dbms/tests/queries/0_stateless/{00953_http_prepared_statements.reference => 00956_http_prepared_statements.reference} (100%)
 rename dbms/tests/queries/0_stateless/{00953_http_prepared_statements.sh => 00956_http_prepared_statements.sh} (100%)

diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index 27b0e32a354..9bbeff5a2aa 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -1,6 +1,5 @@
 #include <boost/algorithm/string/replace.hpp>
 #include <Common/typeid_cast.h>
-#include <Common/StringUtils/StringUtils.h>
 #include <Columns/IColumn.h>
 #include <Core/Field.h>
 #include <DataTypes/IDataType.h>
diff --git a/dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.reference
similarity index 100%
rename from dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference
rename to dbms/tests/queries/0_stateless/00956_http_prepared_statements.reference
diff --git a/dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh
similarity index 100%
rename from dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh
rename to dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh

From dec0430327e24b81879b77bfb568486a7e39c44d Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Wed, 5 Jun 2019 23:04:17 +0200
Subject: [PATCH 036/191] check for trash

---
 dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp      | 6 +++++-
 .../0_stateless/00955_complex_prepared_statements.reference | 2 ++
 .../0_stateless/00955_complex_prepared_statements.sh        | 5 ++++-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index 9bbeff5a2aa..1661480e1b7 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -30,7 +30,7 @@ String ReplaceQueryParameterVisitor::getParamValue(const String & name)
     if (search != parameters_substitution.end())
         return search->second;
     else
-        throw Exception("Expected name " + name + " in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS);
+        throw Exception("Expected name '" + name + "' in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS);
 }
 
 void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
@@ -41,6 +41,7 @@ void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
 
     /// Replacing all occurrences of types Date and DateTime with String.
     /// String comparison is used in "WHERE" conditions with this types.
+
     boost::replace_all(type, "DateTime", "String");
     boost::replace_all(type, "Date", "String");
 
@@ -51,6 +52,9 @@ void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
     FormatSettings format_settings;
     data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings);
 
+    if (!read_buffer.eof())
+        throw Exception("Expected correct value in parameter with name '" + ast_param->name + "'", ErrorCodes::BAD_ARGUMENTS);
+
     Field field = temp_column[0];
     ast = std::make_shared<ASTLiteral>(std::move(field));
 }
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
index a37855d2cb8..9042c2ae5ec 100644
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
@@ -3,3 +3,5 @@
 [10,10,10]
 [[10],[10],[10]]
 [10,10,10]	[[10],[10],[10]]	(10,'Test')	(10,('dt',10))	2015-02-15
+Code: 36. DB::Exception: Expected correct value in parameter with name 'injection'
+
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
index 4ea005c5d3a..a5fe72001db 100755
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
@@ -25,9 +25,12 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]"\
     -q "SELECT dt FROM ps WHERE da = {d_a:Array(Array(UInt8))}";
 $CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')"\
     -q "SELECT a FROM ps WHERE t = {tisd:Tuple(Int16, String)}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10)))"\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10))"\
     -q "SELECT da FROM ps WHERE dt = {d_t:Tuple(UInt8, Tuple(String, UInt8))}";
 $CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15"\
     -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}";
+$CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1"\
+    -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1\
+    && grep 'Expected correct value in parameter';
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";

From 57314233e2657e850780034ae9811b21071667e1 Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Wed, 5 Jun 2019 23:09:20 +0200
Subject: [PATCH 037/191] fix test

---
 .../0_stateless/00955_complex_prepared_statements.reference     | 1 -
 .../queries/0_stateless/00955_complex_prepared_statements.sh    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
index 9042c2ae5ec..579452008b8 100644
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
@@ -4,4 +4,3 @@
 [[10],[10],[10]]
 [10,10,10]	[[10],[10],[10]]	(10,'Test')	(10,('dt',10))	2015-02-15
 Code: 36. DB::Exception: Expected correct value in parameter with name 'injection'
-
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
index a5fe72001db..ce540ca65fe 100755
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
@@ -31,6 +31,6 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15"\
     -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}";
 $CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1"\
     -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1\
-    && grep 'Expected correct value in parameter';
+    | grep 'Expected correct value in parameter';
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";

From 2cb301323192116bdbc3f31aea72f48d93f080c7 Mon Sep 17 00:00:00 2001
From: Alexander Tretiakov <tretialex7@google.com>
Date: Thu, 6 Jun 2019 02:31:14 +0300
Subject: [PATCH 038/191] fix tests

---
 .../00954_client_prepared_statements.sh       |  8 +++----
 ...0955_complex_prepared_statements.reference |  2 +-
 .../00955_complex_prepared_statements.sh      | 23 ++++++++++++-------
 .../00956_http_prepared_statements.sh         |  8 +++----
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
index 451ea9cbd2c..9ecd60abab6 100755
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
@@ -9,13 +9,13 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String, d DateTime) ENGINE =
 $CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05 05:05:05')";
 $CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test', '2005-05-25 15:00:00')";
 
-$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1 \
     -q "SELECT * FROM ps WHERE i = {id:UInt8}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world'\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world' \
     -q "SELECT * FROM ps WHERE s = {phrase:String}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00'\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \
     -q "SELECT * FROM ps WHERE d = {date:DateTime}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test'\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \
     -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
index 579452008b8..818e30f1273 100644
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
@@ -3,4 +3,4 @@
 [10,10,10]
 [[10],[10],[10]]
 [10,10,10]	[[10],[10],[10]]	(10,'Test')	(10,('dt',10))	2015-02-15
-Code: 36. DB::Exception: Expected correct value in parameter with name 'injection'
+OK
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
index ce540ca65fe..b73d7d39eaf 100755
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
@@ -3,6 +3,10 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . $CURDIR/../shell_config.sh
 
+EXCEPTION_TEXT="Code: 36. DB::Exception: Expected correct value in parameter with name 'injection'"
+EXCEPTION_SUCCESS_TEXT="OK"
+EXCEPTION_FAIL_TEXT="FAIL"
+
 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps";
 $CLICKHOUSE_CLIENT -q "CREATE TABLE ps (
     a Array(UInt32), da Array(Array(UInt8)),
@@ -19,18 +23,21 @@ $CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (
     (10, 'Test'), (10, ('dt', 10)),
     '2015-02-15')";
 
-$CLICKHOUSE_CLIENT --max_threads=1 --param_aui="[1, 2]"\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_aui="[1, 2]" \
     -q "SELECT t FROM ps WHERE a = {aui:Array(UInt16)}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]"\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]" \
     -q "SELECT dt FROM ps WHERE da = {d_a:Array(Array(UInt8))}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')"\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')" \
     -q "SELECT a FROM ps WHERE t = {tisd:Tuple(Int16, String)}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10))"\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10))" \
     -q "SELECT da FROM ps WHERE dt = {d_t:Tuple(UInt8, Tuple(String, UInt8))}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15"\
+$CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15" \
     -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}";
-$CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1"\
-    -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1\
-    | grep 'Expected correct value in parameter';
+
+# Must throw an exception to avoid SQL injection
+$CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1" \
+    -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1 \
+    | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" \
+    || echo "$EXCEPTION_FAIL_TEXT";
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";
diff --git a/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh
index 23f47e75e1c..e022ff65fc2 100755
--- a/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh
@@ -9,13 +9,13 @@ ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String, d
 ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05')";
 ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test', '2019-05-25')";
 
-${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1"\
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1" \
     -d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s, d";
-${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world" \
     -d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s, d";
-${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_date=2019-05-25"\
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_date=2019-05-25" \
     -d "SELECT * FROM ps WHERE d = {date:Date} ORDER BY i, s, d";
-${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2&param_phrase=test"\
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2&param_phrase=test" \
     -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s, d";
 
 ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps";

From 5daaf60041e0a2a8b1f69407c1bdd87917286789 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Thu, 13 Jun 2019 14:19:49 +0300
Subject: [PATCH 039/191] Update CMakeLists.txt

---
 dbms/CMakeLists.txt | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index adbec105026..993b62801a9 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -135,9 +135,6 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h   src/TableFunctio
 list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp)
 list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h   src/Dictionaries/DictionarySourceFactory.h   src/Dictionaries/DictionaryStructure.h)
 
-list (APPEND dbms_sources src/Interpreters/ReplaceQueryParameterVisitor.cpp)
-list (APPEND dbms_headers src/Interpreters/ReplaceQueryParameterVisitor.h)
-
 add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources})
 
 if (OS_FREEBSD)

From 03076a0f8dc0b4f19051e0861679cdc68e7666d1 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 19:08:07 +0300
Subject: [PATCH 040/191] Update Context.cpp

---
 dbms/src/Interpreters/Context.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index ef51432b211..6d3adb7fab0 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -1876,13 +1876,12 @@ NameToNameMap Context::getParameterSubstitution() const
 {
     if (hasQueryParameters())
         return parameters_substitution;
-    throw Exception("Query without parameters", ErrorCodes::LOGICAL_ERROR);
+    throw Exception("Logical error: there are no parameters to substitute", ErrorCodes::LOGICAL_ERROR);
 }
 
 
 void Context::setParameterSubstitution(const String & name, const String & value)
 {
-    auto lock = getLock();
     if (!parameters_substitution.insert({name, value}).second)
         throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS);
 }

From daca715a0e60e61c5e5562fe86565515e6eb1708 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 19:15:14 +0300
Subject: [PATCH 041/191] Update ReplaceQueryParameterVisitor.h

---
 dbms/src/Interpreters/ReplaceQueryParameterVisitor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
index c6af66c0eef..e1049267beb 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
@@ -21,7 +21,7 @@ public:
 private:
     const NameToNameMap parameters_substitution;
     String getParamValue(const String & name);
-    void visitQP(ASTPtr & ast);
+    void visitQueryParameters(ASTPtr & ast);
 };
 
 }

From 5317c5a08b6a64cb6dc4232829870280a8a6ad4e Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 19:18:24 +0300
Subject: [PATCH 042/191] Update ReplaceQueryParameterVisitor.cpp

---
 .../Interpreters/ReplaceQueryParameterVisitor.cpp  | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index 1661480e1b7..e780421871b 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -18,13 +18,13 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast)
     for (auto & child : ast->children)
     {
         if (child->as<ASTQueryParameter>())
-            visitQP(child);
+            visitvisitQueryParameter(child);
         else
             visit(child);
     }
 }
 
-String ReplaceQueryParameterVisitor::getParamValue(const String & name)
+const String & ReplaceQueryParameterVisitor::getParamValue(const String & name)
 {
     auto search = parameters_substitution.find(name);
     if (search != parameters_substitution.end())
@@ -33,14 +33,16 @@ String ReplaceQueryParameterVisitor::getParamValue(const String & name)
         throw Exception("Expected name '" + name + "' in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS);
 }
 
-void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast)
+void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
 {
-    auto ast_param = ast->as<ASTQueryParameter>();
-    const String value = getParamValue(ast_param->name);
-    String type = ast_param->type;
+    const auto & ast_param = ast->as<ASTQueryParameter &>();
+    const String & value = getParamValue(ast_param.name);
+    const String & type = ast_param.type;
 
     /// Replacing all occurrences of types Date and DateTime with String.
     /// String comparison is used in "WHERE" conditions with this types.
+    
+    /// TODO: WTF, totally incorrect
 
     boost::replace_all(type, "DateTime", "String");
     boost::replace_all(type, "Date", "String");

From 1e385cac7c506badfcb07ace4118ec824a7c9ed8 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 19:18:59 +0300
Subject: [PATCH 043/191] Update ReplaceQueryParameterVisitor.h

---
 dbms/src/Interpreters/ReplaceQueryParameterVisitor.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
index e1049267beb..b8c7f5fd979 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
@@ -20,8 +20,8 @@ public:
 
 private:
     const NameToNameMap parameters_substitution;
-    String getParamValue(const String & name);
-    void visitQueryParameters(ASTPtr & ast);
+    const String & getParamValue(const String & name);
+    void visitQueryParameter(ASTPtr & ast);
 };
 
 }

From 322c73cc748f05882d3d0c60180c00035d2e4c10 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 19:23:01 +0300
Subject: [PATCH 044/191] Update ASTQueryParameter.cpp

---
 dbms/src/Parsers/ASTQueryParameter.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp
index 1dd14a38d05..3696f93229e 100644
--- a/dbms/src/Parsers/ASTQueryParameter.cpp
+++ b/dbms/src/Parsers/ASTQueryParameter.cpp
@@ -7,8 +7,7 @@ namespace DB
 
 void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const
 {
-    String name_type = name + ':' + type;
-    settings.ostr << name_type;
+    settings.ostr << backQuoteIfNeed(name) + ':' + type;
 }
 
 void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const

From 38414bc337a8180f51de0c9948bc8a44473ee3ef Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 19:23:42 +0300
Subject: [PATCH 045/191] Update ASTQueryParameter.h

---
 dbms/src/Parsers/ASTQueryParameter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h
index 19c54aa83b8..b69c71fb10c 100644
--- a/dbms/src/Parsers/ASTQueryParameter.h
+++ b/dbms/src/Parsers/ASTQueryParameter.h
@@ -17,7 +17,7 @@ public:
     ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {}
 
     /** Get the text that identifies this element. */
-    String getID(char delim) const override { return "QueryParameter" + (delim + name + ':' + type); }
+    String getID(char delim) const override { return "QueryParameter" + delim + name + ':' + type; }
 
     ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); }
 

From 71427b08f3e69a9c82b4244689fb92fc6541a43a Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 19:24:56 +0300
Subject: [PATCH 046/191] Update ExpressionElementParsers.cpp

---
 dbms/src/Parsers/ExpressionElementParsers.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp
index 0bd9ac8c639..f09aadafc47 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.cpp
+++ b/dbms/src/Parsers/ExpressionElementParsers.cpp
@@ -1205,8 +1205,6 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     if (pos->type != TokenType::OpeningCurlyBrace)
         return false;
 
-    String name;
-    String type;
     ++pos;
 
     if (pos->type != TokenType::BareWord)
@@ -1215,7 +1213,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
         return false;
     }
 
-    name = String(pos->begin, pos->end);
+    String name(pos->begin, pos->end);
     ++pos;
 
     if (pos->type != TokenType::Colon)
@@ -1241,7 +1239,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
         ++pos;
     }
 
-    type = String(old_pos->begin, pos->begin);
+    String type(old_pos->begin, pos->begin);
 
     if (pos->type != TokenType::ClosingCurlyBrace)
     {

From c2d4c11cb828fffd630af73b77f8e2bc73727ac3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 20:15:30 +0300
Subject: [PATCH 047/191] Fixes for #5331

---
 .../ExecuteScalarSubqueriesVisitor.cpp        | 22 ++++---------------
 .../ReplaceQueryParameterVisitor.cpp          | 21 ++++++------------
 dbms/src/Parsers/ASTQueryParameter.h          |  2 +-
 3 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
index 88049565aeb..59f7f46be70 100644
--- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
+++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
@@ -9,10 +9,12 @@
 #include <Interpreters/QueryNormalizer.h>
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
 #include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
+#include <Interpreters/addTypeConversionToAST.h>
 
 #include <DataStreams/IBlockInputStream.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 
+
 namespace DB
 {
 
@@ -23,22 +25,6 @@ namespace ErrorCodes
 }
 
 
-static ASTPtr addTypeConversion(std::unique_ptr<ASTLiteral> && ast, const String & type_name)
-{
-    auto func = std::make_shared<ASTFunction>();
-    ASTPtr res = func;
-    func->alias = ast->alias;
-    func->prefer_alias_to_column_name = ast->prefer_alias_to_column_name;
-    ast->alias.clear();
-    func->name = "CAST";
-    auto exp_list = std::make_shared<ASTExpressionList>();
-    func->arguments = exp_list;
-    func->children.push_back(func->arguments);
-    exp_list->children.emplace_back(ast.release());
-    exp_list->children.emplace_back(std::make_shared<ASTLiteral>(type_name));
-    return res;
-}
-
 bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
 {
     /// Processed
@@ -110,7 +96,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
         auto lit = std::make_unique<ASTLiteral>((*block.safeGetByPosition(0).column)[0]);
         lit->alias = subquery.alias;
         lit->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name;
-        ast = addTypeConversion(std::move(lit), block.safeGetByPosition(0).type->getName());
+        ast = addTypeConversionToAST(std::move(lit), block.safeGetByPosition(0).type->getName());
     }
     else
     {
@@ -125,7 +111,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
         exp_list->children.resize(columns);
         for (size_t i = 0; i < columns; ++i)
         {
-            exp_list->children[i] = addTypeConversion(
+            exp_list->children[i] = addTypeConversionToAST(
                 std::make_unique<ASTLiteral>((*block.safeGetByPosition(i).column)[0]),
                 block.safeGetByPosition(i).type->getName());
         }
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index e780421871b..c732ee533fe 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -9,6 +9,8 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTQueryParameter.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
+#include <Interpreters/addTypeConversionToAST.h>
+
 
 namespace DB
 {
@@ -18,7 +20,7 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast)
     for (auto & child : ast->children)
     {
         if (child->as<ASTQueryParameter>())
-            visitvisitQueryParameter(child);
+            visitQueryParameter(child);
         else
             visit(child);
     }
@@ -37,17 +39,9 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
 {
     const auto & ast_param = ast->as<ASTQueryParameter &>();
     const String & value = getParamValue(ast_param.name);
-    const String & type = ast_param.type;
+    const String & type_name = ast_param.type;
 
-    /// Replacing all occurrences of types Date and DateTime with String.
-    /// String comparison is used in "WHERE" conditions with this types.
-    
-    /// TODO: WTF, totally incorrect
-
-    boost::replace_all(type, "DateTime", "String");
-    boost::replace_all(type, "Date", "String");
-
-    const auto data_type = DataTypeFactory::instance().get(type);
+    const auto data_type = DataTypeFactory::instance().get(type_name);
     auto temp_column_ptr = data_type->createColumn();
     IColumn & temp_column = *temp_column_ptr;
     ReadBufferFromString read_buffer{value};
@@ -55,10 +49,9 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
     data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings);
 
     if (!read_buffer.eof())
-        throw Exception("Expected correct value in parameter with name '" + ast_param->name + "'", ErrorCodes::BAD_ARGUMENTS);
+        throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '"  + ast_param.name + "'", ErrorCodes::BAD_ARGUMENTS);
 
-    Field field = temp_column[0];
-    ast = std::make_shared<ASTLiteral>(std::move(field));
+    ast = addTypeConversionToAST(std::make_shared<ASTLiteral>(temp_column[0]), type_name);
 }
 
 }
diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h
index b69c71fb10c..858b23a0250 100644
--- a/dbms/src/Parsers/ASTQueryParameter.h
+++ b/dbms/src/Parsers/ASTQueryParameter.h
@@ -17,7 +17,7 @@ public:
     ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {}
 
     /** Get the text that identifies this element. */
-    String getID(char delim) const override { return "QueryParameter" + delim + name + ':' + type; }
+    String getID(char delim) const override { return String("QueryParameter") + delim + name + ':' + type; }
 
     ASTPtr clone() const override { return std::make_shared<ASTQueryParameter>(*this); }
 

From facdd966cce79706aa071d668d67589bf276ed6a Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 14 Jun 2019 20:19:02 +0300
Subject: [PATCH 048/191] Fixes due to review

---
 dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp | 5 +++--
 dbms/src/Formats/BlockInputStreamFromRowInputStream.h   | 5 ++++-
 dbms/src/Formats/FormatFactory.cpp                      | 2 +-
 dbms/src/Formats/FormatFactory.h                        | 2 ++
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp
index 2c693d6ae32..2335363db70 100644
--- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp
+++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp
@@ -34,7 +34,7 @@ BlockInputStreamFromRowInputStream::BlockInputStreamFromRowInputStream(
     , sample(sample_)
     , max_block_size(max_block_size_)
     , rows_portion_size(rows_portion_size_)
-    , read_callback(callback)
+    , read_virtual_columns_callback(callback)
     , allow_errors_num(settings.input_allow_errors_num)
     , allow_errors_ratio(settings.input_allow_errors_ratio)
 {
@@ -79,7 +79,8 @@ Block BlockInputStreamFromRowInputStream::readImpl()
                 RowReadExtension info;
                 if (!row_input->read(columns, info))
                     break;
-                read_callback();
+                if (read_virtual_columns_callback)
+                    read_virtual_columns_callback();
 
                 for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx)
                 {
diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h
index 98dd954fef7..2338af3bf38 100644
--- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h
+++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h
@@ -47,7 +47,10 @@ private:
     Block sample;
     UInt64 max_block_size;
     UInt64 rows_portion_size;
-    FormatFactory::ReadCallback read_callback;
+
+    /// Callback used to setup virtual columns after reading each row.
+    FormatFactory::ReadCallback read_virtual_columns_callback;
+
     BlockMissingValues block_missing_values;
 
     UInt64 allow_errors_num;
diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp
index 8990126ddcf..0a2b867101b 100644
--- a/dbms/src/Formats/FormatFactory.cpp
+++ b/dbms/src/Formats/FormatFactory.cpp
@@ -55,7 +55,7 @@ BlockInputStreamPtr FormatFactory::getInput(
     format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
 
     return input_getter(
-        buf, sample, context, max_block_size, rows_portion_size, callback ? callback : [] {}, format_settings);
+        buf, sample, context, max_block_size, rows_portion_size, callback ? callback : ReadCallback(), format_settings);
 }
 
 
diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h
index accc493fe30..9c8b87e7d8b 100644
--- a/dbms/src/Formats/FormatFactory.h
+++ b/dbms/src/Formats/FormatFactory.h
@@ -25,6 +25,8 @@ class WriteBuffer;
 class FormatFactory final : public ext::singleton<FormatFactory>
 {
 public:
+    /// This callback allows to perform some additional actions after reading a single row.
+    /// It's initial purpose was to extract payload for virtual columns from Kafka Consumer ReadBuffer.
     using ReadCallback = std::function<void()>;
 
 private:

From f535a2f55fee6347c3c4a378b2b222bfaa5cac42 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 21:32:22 +0300
Subject: [PATCH 049/191] Fixes for #5331

---
 dbms/programs/client/Client.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index e30484df43c..2168dd303ee 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -1711,9 +1711,8 @@ public:
 
         for (size_t i = 0; i < parameter_arguments.size(); ++i)
         {
-            po::parsed_options parsed_parameter = po::command_line_parser(
-                    parameter_arguments[i].size(), parameter_arguments[i].data()).options(parameter_description).extra_parser(
-                    parseParameter).run();
+            po::parsed_options parsed_parameter = po::command_line_parser(parameter_arguments[i])
+                .options(parameter_description).extra_parser(parseParameter).run();
             po::variables_map parameter_options;
             po::store(parsed_parameter, parameter_options);
 

From 34072c2ddeb3320bbe5f8b81269e5980ce241b96 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 21:38:58 +0300
Subject: [PATCH 050/191] Fixes for #5331

---
 dbms/src/Parsers/ExpressionElementParsers.cpp | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp
index f09aadafc47..9c0071c64e8 100644
--- a/dbms/src/Parsers/ExpressionElementParsers.cpp
+++ b/dbms/src/Parsers/ExpressionElementParsers.cpp
@@ -1224,21 +1224,14 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
 
     ++pos;
 
-    if (pos->type != TokenType::BareWord)
+    auto old_pos = pos;
+    ParserIdentifierWithOptionalParameters type_parser;
+    if (!type_parser.ignore(pos, expected))
     {
         expected.add(pos, "substitution type");
         return false;
     }
 
-    auto old_pos = pos;
-
-    while ((pos->type == TokenType::OpeningRoundBracket || pos->type == TokenType::ClosingRoundBracket
-        || pos->type == TokenType::Comma || pos->type == TokenType::BareWord)
-        && pos->type != TokenType::ClosingCurlyBrace)
-    {
-        ++pos;
-    }
-
     String type(old_pos->begin, pos->begin);
 
     if (pos->type != TokenType::ClosingCurlyBrace)

From da04db2a93a66fd399ce19d9f346a72788cb088a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 21:56:28 +0300
Subject: [PATCH 051/191] Fixes for #5331

---
 dbms/programs/client/Client.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index 2168dd303ee..10bbf3760ea 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -1622,10 +1622,7 @@ public:
 
                 /// Parameter arg after underline.
                 if (startsWith(arg, "--param_"))
-                {
-                    parameter_arguments.emplace_back(Arguments{""});
-                    parameter_arguments.back().emplace_back(arg);
-                }
+                    parameter_arguments.emplace_back(Arguments{arg});
                 else
                     common_arguments.emplace_back(arg);
             }
@@ -1706,7 +1703,7 @@ public:
         /// Parse commandline options related to prepared statements.
         po::options_description parameter_description("Query parameters options");
         parameter_description.add_options()
-                ("param_", po::value<std::string>(), "name and value of substitution, with syntax --param_name=value")
+            ("param_", po::value<std::string>(), "name and value of substitution, with syntax --param_name=value")
         ;
 
         for (size_t i = 0; i < parameter_arguments.size(); ++i)

From 08636dce92f6a038712e2b349f5dde1fe1c4383d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Fri, 14 Jun 2019 22:39:56 +0300
Subject: [PATCH 052/191] Added missing files

---
 .../Interpreters/addTypeConversionToAST.cpp   | 33 +++++++++++++++++++
 .../src/Interpreters/addTypeConversionToAST.h | 13 ++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 dbms/src/Interpreters/addTypeConversionToAST.cpp
 create mode 100644 dbms/src/Interpreters/addTypeConversionToAST.h

diff --git a/dbms/src/Interpreters/addTypeConversionToAST.cpp b/dbms/src/Interpreters/addTypeConversionToAST.cpp
new file mode 100644
index 00000000000..6640af0ca0d
--- /dev/null
+++ b/dbms/src/Interpreters/addTypeConversionToAST.cpp
@@ -0,0 +1,33 @@
+#include "addTypeConversionToAST.h"
+
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTWithAlias.h>
+
+
+namespace DB
+{
+
+ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
+{
+    auto func = std::make_shared<ASTFunction>();
+    ASTPtr res = func;
+
+    if (ASTWithAlias * ast_with_alias = ast->as<ASTWithAlias>())
+    {
+        func->alias = ast_with_alias->alias;
+        func->prefer_alias_to_column_name = ast_with_alias->prefer_alias_to_column_name;
+        ast_with_alias->alias.clear();
+    }
+
+    func->name = "CAST";
+    auto exp_list = std::make_shared<ASTExpressionList>();
+    func->arguments = exp_list;
+    func->children.push_back(func->arguments);
+    exp_list->children.emplace_back(std::move(ast));
+    exp_list->children.emplace_back(std::make_shared<ASTLiteral>(type_name));
+    return res;
+}
+
+}
diff --git a/dbms/src/Interpreters/addTypeConversionToAST.h b/dbms/src/Interpreters/addTypeConversionToAST.h
new file mode 100644
index 00000000000..56c3a636f45
--- /dev/null
+++ b/dbms/src/Interpreters/addTypeConversionToAST.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <Parsers/IAST_fwd.h>
+
+
+namespace DB
+{
+
+/// It will produce an expression with CAST to get an AST with the required type.
+ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name);
+
+}

From 748b5a5bce4ab4e591be6f7737cfbb23d0b9d556 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 17:32:01 +0300
Subject: [PATCH 053/191] Clarified code in IDataType

---
 dbms/src/DataTypes/IDataType.h | 26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h
index 97579b4ca9c..235040f960d 100644
--- a/dbms/src/DataTypes/IDataType.h
+++ b/dbms/src/DataTypes/IDataType.h
@@ -262,40 +262,18 @@ public:
 protected:
     virtual String doGetName() const;
 
-    /** Text serialization with escaping but without quoting.
-      */
-public: // used somewhere in arcadia
+    /// Default implementations of text serialization in case of 'custom_text_serialization' is not set.
+
     virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
-
-protected:
     virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
-
-    /** Text serialization as a literal that may be inserted into a query.
-      */
     virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
-
     virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
-
-    /** Text serialization for the CSV format.
-      */
     virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
     virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
-
-    /** Text serialization for displaying on a terminal or saving into a text file, and the like.
-      * Without escaping or quoting.
-      */
     virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
-
     virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
-
-    /** Text serialization intended for using in JSON format.
-      * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes.
-      */
     virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
     virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
-
-    /** Text serialization for putting into the XML format.
-      */
     virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
     {
         serializeText(column, row_num, ostr, settings);

From 01762d5167927793ae6921edaa0d9526e9387dbf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 17:47:33 +0300
Subject: [PATCH 054/191] Clarified code in IDataType

---
 dbms/src/DataTypes/IDataType.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h
index 235040f960d..f479bcfa3d2 100644
--- a/dbms/src/DataTypes/IDataType.h
+++ b/dbms/src/DataTypes/IDataType.h
@@ -222,42 +222,42 @@ public:
     /// If method will throw an exception, then column will be in same state as before call to method.
     virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0;
 
+    /** Serialize to a protobuf. */
+    virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0;
+    virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0;
+
     /** Text serialization with escaping but without quoting.
       */
-    virtual void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
+    void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
 
-    virtual void deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
+    void deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
 
     /** Text serialization as a literal that may be inserted into a query.
       */
-    virtual void serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
+    void serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
 
-    virtual void deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
+    void deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
 
     /** Text serialization for the CSV format.
       */
-    virtual void serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
-    virtual void deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
+    void serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
+    void deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
 
     /** Text serialization for displaying on a terminal or saving into a text file, and the like.
       * Without escaping or quoting.
       */
-    virtual void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
+    void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
 
-    virtual void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
+    void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
 
     /** Text serialization intended for using in JSON format.
       */
-    virtual void serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
-    virtual void deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
+    void serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
+    void deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
 
     /** Text serialization for putting into the XML format.
       */
-    virtual void serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
-
-    /** Serialize to a protobuf. */
-    virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0;
-    virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0;
+    void serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
 
 protected:
     virtual String doGetName() const;

From ee102ca9532bda0119a5c78529790c413671d302 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 18:06:44 +0300
Subject: [PATCH 055/191] Style; added comment

---
 dbms/src/DataTypes/IDataType.cpp | 44 --------------------------------
 dbms/src/DataTypes/IDataType.h   |  2 ++
 2 files changed, 2 insertions(+), 44 deletions(-)

diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp
index 83b62a425ae..39d269d8613 100644
--- a/dbms/src/DataTypes/IDataType.cpp
+++ b/dbms/src/DataTypes/IDataType.cpp
@@ -142,133 +142,89 @@ void IDataType::insertDefaultInto(IColumn & column) const
 void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings);
-    }
     else
-    {
         serializeTextEscaped(column, row_num, ostr, settings);
-    }
 }
 
 void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->deserializeTextEscaped(column, istr, settings);
-    }
     else
-    {
         deserializeTextEscaped(column, istr, settings);
-    }
 }
 
 void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings);
-    }
     else
-    {
         serializeTextQuoted(column, row_num, ostr, settings);
-    }
 }
 
 void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->deserializeTextQuoted(column, istr, settings);
-    }
     else
-    {
         deserializeTextQuoted(column, istr, settings);
-    }
 }
 
 void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings);
-    }
     else
-    {
         serializeTextCSV(column, row_num, ostr, settings);
-    }
 }
 
 void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->deserializeTextCSV(column, istr, settings);
-    }
     else
-    {
         deserializeTextCSV(column, istr, settings);
-    }
 }
 
 void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->serializeText(column, row_num, ostr, settings);
-    }
     else
-    {
         serializeText(column, row_num, ostr, settings);
-    }
 }
 
 void IDataType::deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->deserializeWholeText(column, istr, settings);
-    }
     else
-    {
         deserializeWholeText(column, istr, settings);
-    }
 }
 
 void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings);
-    }
     else
-    {
         serializeTextJSON(column, row_num, ostr, settings);
-    }
 }
 
 void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->deserializeTextJSON(column, istr, settings);
-    }
     else
-    {
         deserializeTextJSON(column, istr, settings);
-    }
 }
 
 void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
     if (custom_text_serialization)
-    {
         custom_text_serialization->serializeTextXML(column, row_num, ostr, settings);
-    }
     else
-    {
         serializeTextXML(column, row_num, ostr, settings);
-    }
 }
 
 void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h
index f479bcfa3d2..2c1ec5e9db7 100644
--- a/dbms/src/DataTypes/IDataType.h
+++ b/dbms/src/DataTypes/IDataType.h
@@ -248,6 +248,8 @@ public:
       */
     void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const;
 
+    /** Text deserialization in case when buffer contains only one value, without any escaping and delimiters.
+      */
     void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
 
     /** Text serialization intended for using in JSON format.

From b48284d33418bb4c7b16dffd122d6ae10c43997f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 18:08:48 +0300
Subject: [PATCH 056/191] Removed useless method

---
 dbms/src/DataTypes/IDataType.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h
index 2c1ec5e9db7..f4c22ff9ac8 100644
--- a/dbms/src/DataTypes/IDataType.h
+++ b/dbms/src/DataTypes/IDataType.h
@@ -455,7 +455,6 @@ private:
 
 public:
     const IDataTypeCustomName * getCustomName() const { return custom_name.get(); }
-    const IDataTypeCustomTextSerialization * getCustomTextSerialization() const { return custom_text_serialization.get(); }
 };
 
 

From cb661c0d54276bef398c57d8fb9b7cdb5212ae8b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 18:56:55 +0300
Subject: [PATCH 057/191] Added missing methods for DataTypeDate,
 DataTypeDateTime

---
 dbms/src/DataTypes/DataTypeAggregateFunction.cpp | 5 ++---
 dbms/src/DataTypes/DataTypeDate.cpp              | 5 +++++
 dbms/src/DataTypes/DataTypeDate.h                | 1 +
 dbms/src/DataTypes/DataTypeDateTime.cpp          | 5 +++++
 dbms/src/DataTypes/DataTypeDateTime.h            | 1 +
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
index 683ff60df56..e63da7f1b1d 100644
--- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -30,6 +30,7 @@ namespace ErrorCodes
     extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
 }
 
 
@@ -218,9 +219,7 @@ void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuff
 
 void DataTypeAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
 {
-    String s;
-    readString(s, istr);
-    deserializeFromString(function, column, s);
+    throw Exception("AggregateFunction data type cannot be read from text", ErrorCodes::NOT_IMPLEMENTED);
 }
 
 
diff --git a/dbms/src/DataTypes/DataTypeDate.cpp b/dbms/src/DataTypes/DataTypeDate.cpp
index 73edfd012fa..0b1f502b694 100644
--- a/dbms/src/DataTypes/DataTypeDate.cpp
+++ b/dbms/src/DataTypes/DataTypeDate.cpp
@@ -16,6 +16,11 @@ void DataTypeDate::serializeText(const IColumn & column, size_t row_num, WriteBu
     writeDateText(DayNum(static_cast<const ColumnUInt16 &>(column).getData()[row_num]), ostr);
 }
 
+void DataTypeDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    deserializeTextEscaped(column, istr, settings);
+}
+
 void DataTypeDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
 {
     DayNum x;
diff --git a/dbms/src/DataTypes/DataTypeDate.h b/dbms/src/DataTypes/DataTypeDate.h
index a441d638cc4..7bd4c0d6b02 100644
--- a/dbms/src/DataTypes/DataTypeDate.h
+++ b/dbms/src/DataTypes/DataTypeDate.h
@@ -13,6 +13,7 @@ public:
     const char * getFamilyName() const override { return "Date"; }
 
     void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp
index f3d6efa1488..a6b8f0da92a 100644
--- a/dbms/src/DataTypes/DataTypeDateTime.cpp
+++ b/dbms/src/DataTypes/DataTypeDateTime.cpp
@@ -62,6 +62,11 @@ static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings
 }
 
 
+void DataTypeDateTime::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    deserializeTextEscaped(column, istr, settings);
+}
+
 void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     time_t x;
diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h
index 679a2777472..6a951e0e288 100644
--- a/dbms/src/DataTypes/DataTypeDateTime.h
+++ b/dbms/src/DataTypes/DataTypeDateTime.h
@@ -38,6 +38,7 @@ public:
     TypeIndex getTypeId() const override { return TypeIndex::DateTime; }
 
     void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
     void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
     void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
     void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;

From 54ece5f9685a2c2dc1f1c330d02fe9c72043b3fa Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 18:58:17 +0300
Subject: [PATCH 058/191] Added missing methods for DataTypeDate,
 DataTypeDateTime

---
 dbms/src/DataTypes/DataTypeAggregateFunction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
index e63da7f1b1d..a2c00e18acb 100644
--- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
+++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -217,7 +217,7 @@ void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuff
 }
 
 
-void DataTypeAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+void DataTypeAggregateFunction::deserializeWholeText(IColumn &, ReadBuffer &, const FormatSettings &) const
 {
     throw Exception("AggregateFunction data type cannot be read from text", ErrorCodes::NOT_IMPLEMENTED);
 }

From fad6013270fdeafd35e7a086d1ec79d887172a4c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 20:52:53 +0300
Subject: [PATCH 059/191] Fixing query parameters

---
 dbms/programs/client/Client.cpp                 | 17 +++++++----------
 dbms/programs/server/HTTPHandler.cpp            |  2 +-
 dbms/src/Interpreters/Context.cpp               | 14 ++++++--------
 dbms/src/Interpreters/Context.h                 |  6 +++---
 .../ReplaceQueryParameterVisitor.cpp            |  6 +++---
 .../Interpreters/ReplaceQueryParameterVisitor.h |  6 +++---
 dbms/src/Interpreters/executeQuery.cpp          | 13 +++++--------
 dbms/src/Parsers/ASTQueryParameter.cpp          |  8 +++++++-
 dbms/src/Parsers/IAST.cpp                       | 13 +++++++------
 dbms/src/Parsers/IAST.h                         |  1 +
 .../00954_client_prepared_statements.sh         |  3 +++
 11 files changed, 46 insertions(+), 43 deletions(-)

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index 10bbf3760ea..cd32691c647 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -204,7 +204,7 @@ private:
     std::list<ExternalTable> external_tables;
 
     /// Dictionary with query parameters for prepared statements.
-    NameToNameMap parameters_substitution;
+    NameToNameMap query_parameters;
 
     ConnectionParameters connection_parameters;
 
@@ -807,15 +807,12 @@ private:
         if (!parsed_query)
             return true;
 
-        if (!parameters_substitution.empty())
-        {
-            /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-            ReplaceQueryParameterVisitor visitor(parameters_substitution);
-            visitor.visit(parsed_query);
+        /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
+        ReplaceQueryParameterVisitor visitor(query_parameters);
+        visitor.visit(parsed_query);
 
-            /// Get new query after substitutions.
-            query = serializeAST(*parsed_query);
-        }
+        /// Get new query after substitutions.
+        query = serializeAST(*parsed_query);
 
         processed_rows = 0;
         progress.reset();
@@ -1719,7 +1716,7 @@ public:
             if (pos != String::npos && pos + 1 != parameter.size())
             {
                 const String name = parameter.substr(0, pos);
-                if (!parameters_substitution.insert({name, parameter.substr(pos + 1)}).second)
+                if (!query_parameters.insert({name, parameter.substr(pos + 1)}).second)
                     throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS);
             }
             else
diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp
index 9b1160d9796..8971d29d12b 100644
--- a/dbms/programs/server/HTTPHandler.cpp
+++ b/dbms/programs/server/HTTPHandler.cpp
@@ -505,7 +505,7 @@ void HTTPHandler::processQuery(
         {
             /// Save name and values of substitution in dictionary.
             const String parameter_name = key.substr(strlen("param_"));
-            context.setParameterSubstitution(parameter_name, value);
+            context.setQueryParameter(parameter_name, value);
         }
         else
         {
diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index f7a5ee6d62a..479420420df 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -1868,22 +1868,20 @@ Context::SampleBlockCache & Context::getSampleBlockCache() const
 
 bool Context::hasQueryParameters() const
 {
-    return !parameters_substitution.empty();
+    return !query_parameters.empty();
 }
 
 
-NameToNameMap Context::getParameterSubstitution() const
+const NameToNameMap & Context::getQueryParameters() const
 {
-    if (hasQueryParameters())
-        return parameters_substitution;
-    throw Exception("Logical error: there are no parameters to substitute", ErrorCodes::LOGICAL_ERROR);
+    return query_parameters;
 }
 
 
-void Context::setParameterSubstitution(const String & name, const String & value)
+void Context::setQueryParameter(const String & name, const String & value)
 {
-    if (!parameters_substitution.insert({name, value}).second)
-        throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS);
+    if (!query_parameters.emplace(name, value).second)
+        throw Exception("Duplicate name " + backQuote(name) + " of query parameter", ErrorCodes::BAD_ARGUMENTS);
 }
 
 
diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h
index 2e3440f4be3..7c2b6c25003 100644
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@@ -145,7 +145,7 @@ private:
     using DatabasePtr = std::shared_ptr<IDatabase>;
     using Databases = std::map<String, std::shared_ptr<IDatabase>>;
 
-    NameToNameMap parameters_substitution;   /// Dictionary with query parameters for prepared statements.
+    NameToNameMap query_parameters;   /// Dictionary with query parameters for prepared statements.
                                                      /// (key=name, value)
 
     IHostContextPtr host_context;  /// Arbitrary object that may used to attach some host specific information to query context,
@@ -472,8 +472,8 @@ public:
 
     /// Query parameters for prepared statements.
     bool hasQueryParameters() const;
-    NameToNameMap getParameterSubstitution() const;
-    void setParameterSubstitution(const String & name, const String & value);
+    const NameToNameMap & getQueryParameters() const;
+    void setQueryParameter(const String & name, const String & value);
 
 #if USE_EMBEDDED_COMPILER
     std::shared_ptr<CompiledExpressionCache> getCompiledExpressionCache() const;
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index c732ee533fe..b7f625a7a41 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -28,11 +28,11 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast)
 
 const String & ReplaceQueryParameterVisitor::getParamValue(const String & name)
 {
-    auto search = parameters_substitution.find(name);
-    if (search != parameters_substitution.end())
+    auto search = query_parameters.find(name);
+    if (search != query_parameters.end())
         return search->second;
     else
-        throw Exception("Expected name '" + name + "' in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS);
+        throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::BAD_ARGUMENTS);
 }
 
 void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
index b8c7f5fd979..1931d4c0ba8 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h
@@ -8,18 +8,18 @@ namespace DB
 
 class ASTQueryParameter;
 
-/// Get prepared statements in query, replace ASTQueryParameter with ASTLiteral.
+/// Visit substitutions in a query, replace ASTQueryParameter with ASTLiteral.
 class ReplaceQueryParameterVisitor
 {
 public:
     ReplaceQueryParameterVisitor(const NameToNameMap & parameters)
-        : parameters_substitution(parameters)
+        : query_parameters(parameters)
     {}
 
     void visit(ASTPtr & ast);
 
 private:
-    const NameToNameMap parameters_substitution;
+    const NameToNameMap & query_parameters;
     const String & getParamValue(const String & name);
     void visitQueryParameter(ASTPtr & ast);
 };
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 1a508dc637c..32124f155b0 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -170,12 +170,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         /// TODO Parser should fail early when max_query_size limit is reached.
         ast = parseQuery(parser, begin, end, "", max_query_size);
 
-        if (context.hasQueryParameters())    /// Avoid change from TCPHandler.
-        {
-            /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-            ReplaceQueryParameterVisitor visitor(context.getParameterSubstitution());
-            visitor.visit(ast);
-        }
+        /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
+        ReplaceQueryParameterVisitor visitor(context.getQueryParameters());
+        visitor.visit(ast);
 
         auto * insert_query = ast->as<ASTInsertQuery>();
 
@@ -208,8 +205,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
     try
     {
-        if (context.hasQueryParameters())    /// Avoid change from TCPHandler.
-            /// Get new query after substitutions.
+        /// Get new query after substitutions.
+        if (context.hasQueryParameters())
             query = serializeAST(*ast);
 
         logQuery(query.substr(0, settings.log_queries_cut_to_length), context, internal);
diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp
index 3696f93229e..462a08b0447 100644
--- a/dbms/src/Parsers/ASTQueryParameter.cpp
+++ b/dbms/src/Parsers/ASTQueryParameter.cpp
@@ -7,7 +7,13 @@ namespace DB
 
 void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const
 {
-    settings.ostr << backQuoteIfNeed(name) + ':' + type;
+    settings.ostr
+        << (settings.hilite ? hilite_substitution : "") << '{'
+        << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(name)
+        << (settings.hilite ? hilite_substitution : "") << ':'
+        << (settings.hilite ? hilite_identifier : "") << type
+        << (settings.hilite ? hilite_substitution : "") << '}'
+        << (settings.hilite ? hilite_none : "");
 }
 
 void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const
diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp
index b2014cc0f44..d6b198fc789 100644
--- a/dbms/src/Parsers/IAST.cpp
+++ b/dbms/src/Parsers/IAST.cpp
@@ -17,12 +17,13 @@ namespace ErrorCodes
 }
 
 
-const char * IAST::hilite_keyword    = "\033[1m";
-const char * IAST::hilite_identifier = "\033[0;36m";
-const char * IAST::hilite_function   = "\033[0;33m";
-const char * IAST::hilite_operator   = "\033[1;33m";
-const char * IAST::hilite_alias      = "\033[0;32m";
-const char * IAST::hilite_none       = "\033[0m";
+const char * IAST::hilite_keyword      = "\033[1m";
+const char * IAST::hilite_identifier   = "\033[0;36m";
+const char * IAST::hilite_function     = "\033[0;33m";
+const char * IAST::hilite_operator     = "\033[1;33m";
+const char * IAST::hilite_alias        = "\033[0;32m";
+const char * IAST::hilite_substitution = "\033[1;36m";
+const char * IAST::hilite_none         = "\033[0m";
 
 
 String backQuoteIfNeed(const String & x)
diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h
index 89ab8fb05c3..8ebfd735874 100644
--- a/dbms/src/Parsers/IAST.h
+++ b/dbms/src/Parsers/IAST.h
@@ -201,6 +201,7 @@ public:
     static const char * hilite_function;
     static const char * hilite_operator;
     static const char * hilite_alias;
+    static const char * hilite_substitution;
     static const char * hilite_none;
 
 private:
diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
index 9ecd60abab6..d904f4870a1 100755
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
@@ -18,4 +18,7 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \
 $CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \
     -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
 
+$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -P '^Code: 36\.'
+
+
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";

From b079631f610159e0c9ca7f289e7a5dc99319453e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 21:22:48 +0300
Subject: [PATCH 060/191] Fixed tests

---
 dbms/programs/client/Client.cpp                    | 14 +++++++-------
 dbms/src/Interpreters/executeQuery.cpp             | 10 ++++++----
 .../00954_client_prepared_statements.reference     |  1 +
 .../00955_complex_prepared_statements.sh           |  2 +-
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index cd32691c647..278eaac60d8 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -807,13 +807,6 @@ private:
         if (!parsed_query)
             return true;
 
-        /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-        ReplaceQueryParameterVisitor visitor(query_parameters);
-        visitor.visit(parsed_query);
-
-        /// Get new query after substitutions.
-        query = serializeAST(*parsed_query);
-
         processed_rows = 0;
         progress.reset();
         show_progress_bar = false;
@@ -909,6 +902,13 @@ private:
     /// Process the query that doesn't require transferring data blocks to the server.
     void processOrdinaryQuery()
     {
+        /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
+        ReplaceQueryParameterVisitor visitor(query_parameters);
+        visitor.visit(parsed_query);
+
+        /// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data.
+        query = serializeAST(*parsed_query);
+
         connection->sendQuery(query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true);
         sendExternalTables();
         receiveResult();
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 32124f155b0..1b6a245a99d 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -170,10 +170,6 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         /// TODO Parser should fail early when max_query_size limit is reached.
         ast = parseQuery(parser, begin, end, "", max_query_size);
 
-        /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-        ReplaceQueryParameterVisitor visitor(context.getQueryParameters());
-        visitor.visit(ast);
-
         auto * insert_query = ast->as<ASTInsertQuery>();
 
         if (insert_query && insert_query->settings_ast)
@@ -185,7 +181,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             insert_query->has_tail = has_query_tail;
         }
         else
+        {
             query_end = end;
+        }
     }
     catch (...)
     {
@@ -205,6 +203,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
     try
     {
+        /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
+        ReplaceQueryParameterVisitor visitor(context.getQueryParameters());
+        visitor.visit(ast);
+
         /// Get new query after substitutions.
         if (context.hasQueryParameters())
             query = serializeAST(*ast);
diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
index c7cafaefba8..0c2b40d0d53 100644
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
@@ -2,3 +2,4 @@
 1	Hello, world	2005-05-05 05:05:05
 2	test	2005-05-25 15:00:00
 2	test	2005-05-25 15:00:00
+Code: 36. DB::Exception: Substitution `s` is not set
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
index b73d7d39eaf..b9486bbb1b9 100755
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
@@ -3,7 +3,7 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . $CURDIR/../shell_config.sh
 
-EXCEPTION_TEXT="Code: 36. DB::Exception: Expected correct value in parameter with name 'injection'"
+EXCEPTION_TEXT="Code: 36."
 EXCEPTION_SUCCESS_TEXT="OK"
 EXCEPTION_FAIL_TEXT="FAIL"
 

From 61bf0e9b1245e5ac456f2e675df9df5aced1b788 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 21:25:27 +0300
Subject: [PATCH 061/191] Style

---
 dbms/programs/server/HTTPHandler.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp
index 8971d29d12b..2349ab337f0 100644
--- a/dbms/programs/server/HTTPHandler.cpp
+++ b/dbms/programs/server/HTTPHandler.cpp
@@ -475,9 +475,9 @@ void HTTPHandler::processQuery(
             settings.readonly = 2;
     }
 
-    bool isExternalData = startsWith(request.getContentType().data(), "multipart/form-data");
+    bool has_external_data = startsWith(request.getContentType().data(), "multipart/form-data");
 
-    if (isExternalData)
+    if (has_external_data)
     {
         /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters.
         reserved_param_suffixes.reserve(3);
@@ -522,7 +522,7 @@ void HTTPHandler::processQuery(
     std::string full_query;
 
     /// Support for "external data for query processing".
-    if (isExternalData)
+    if (has_external_data)
     {
         ExternalTablesHandler handler(context, params);
         params.load(request, istr, handler);

From 0f9599bf222683eda6227079db15a3ebdd08310d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 15 Jun 2019 21:56:32 +0300
Subject: [PATCH 062/191] Removed bad code; added support for --param_name
 value

---
 dbms/programs/client/Client.cpp               | 58 +++++++------------
 ...00954_client_prepared_statements.reference |  6 ++
 .../00954_client_prepared_statements.sh       | 11 +++-
 3 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index 278eaac60d8..1f347adefd4 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -1548,14 +1548,6 @@ private:
         std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
     }
 
-    static std::pair<String, String> parseParameter(const String & s)
-    {
-        size_t pos = s.find('_') + 1;
-        /// String begins with "--param_", so check is no needed
-        /// Cut two first dash "--" and divide arg from name and value
-        return {s.substr(2, pos - 2), s.substr(pos)};
-    }
-
 public:
     void init(int argc, char ** argv)
     {
@@ -1573,7 +1565,6 @@ public:
 
         Arguments common_arguments{""};        /// 0th argument is ignored.
         std::vector<Arguments> external_tables_arguments;
-        std::vector<Arguments> parameter_arguments;
 
         bool in_external_group = false;
         for (int arg_num = 1; arg_num < argc; ++arg_num)
@@ -1619,7 +1610,26 @@ public:
 
                 /// Parameter arg after underline.
                 if (startsWith(arg, "--param_"))
-                    parameter_arguments.emplace_back(Arguments{arg});
+                {
+                    const char * param_continuation = arg + strlen("--param_");
+                    const char * equal_pos = strchr(param_continuation, '=');
+
+                    if (equal_pos == param_continuation)
+                        throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
+
+                    if (equal_pos)
+                    {
+                        /// param_name=value
+                        query_parameters.emplace(String(param_continuation, equal_pos), String(equal_pos + 1));
+                    }
+                    else
+                    {
+                        /// param_name value
+                        ++arg_num;
+                        arg = argv[arg_num];
+                        query_parameters.emplace(String(param_continuation), String(arg));
+                    }
+                }
                 else
                     common_arguments.emplace_back(arg);
             }
@@ -1697,32 +1707,6 @@ public:
             ("types", po::value<std::string>(), "types")
         ;
 
-        /// Parse commandline options related to prepared statements.
-        po::options_description parameter_description("Query parameters options");
-        parameter_description.add_options()
-            ("param_", po::value<std::string>(), "name and value of substitution, with syntax --param_name=value")
-        ;
-
-        for (size_t i = 0; i < parameter_arguments.size(); ++i)
-        {
-            po::parsed_options parsed_parameter = po::command_line_parser(parameter_arguments[i])
-                .options(parameter_description).extra_parser(parseParameter).run();
-            po::variables_map parameter_options;
-            po::store(parsed_parameter, parameter_options);
-
-            /// Save name and values of substitution in dictionary.
-            String parameter = parameter_options["param_"].as<std::string>();
-            size_t pos = parameter.find('=');
-            if (pos != String::npos && pos + 1 != parameter.size())
-            {
-                const String name = parameter.substr(0, pos);
-                if (!query_parameters.insert({name, parameter.substr(pos + 1)}).second)
-                    throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS);
-            }
-            else
-                throw Exception("Expected parameter field as --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS);
-        }
-
         /// Parse main commandline options.
         po::parsed_options parsed = po::command_line_parser(common_arguments).options(main_description).run();
         po::variables_map options;
@@ -1746,8 +1730,8 @@ public:
             || (options.count("host") && options["host"].as<std::string>() == "elp"))    /// If user writes -help instead of --help.
         {
             std::cout << main_description << "\n";
-            std::cout << parameter_description << "\n";
             std::cout << external_description << "\n";
+            std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n";
             exit(0);
         }
 
diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
index 0c2b40d0d53..aaf5411a990 100644
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
@@ -3,3 +3,9 @@
 2	test	2005-05-25 15:00:00
 2	test	2005-05-25 15:00:00
 Code: 36. DB::Exception: Substitution `s` is not set
+abc
+abc
+Hello, world
+Hello, world
+0
+0
diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
index d904f4870a1..30d4690742d 100755
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
@@ -20,5 +20,14 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \
 
 $CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -P '^Code: 36\.'
 
-
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";
+
+
+$CLICKHOUSE_CLIENT --param_test abc --query 'SELECT {test:String}'
+$CLICKHOUSE_CLIENT --param_test=abc --query 'SELECT {test:String}'
+
+$CLICKHOUSE_CLIENT --param_test 'Hello, world' --query 'SELECT {test:String}'
+$CLICKHOUSE_CLIENT --param_test='Hello, world' --query 'SELECT {test:String}'
+
+$CLICKHOUSE_CLIENT --param_test '' --query 'SELECT length({test:String})'
+$CLICKHOUSE_CLIENT --param_test='' --query 'SELECT length({test:String})'

From 0dd88a1b033c7069bbaee06122f23f19038aa6ae Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 16 Jun 2019 02:44:51 +0300
Subject: [PATCH 063/191] Fixed build

---
 dbms/src/Databases/DatabaseMySQL.cpp | 5 +++--
 dbms/src/Databases/DatabaseMySQL.h   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Databases/DatabaseMySQL.cpp b/dbms/src/Databases/DatabaseMySQL.cpp
index f5b2e2aec19..3b415e66f93 100644
--- a/dbms/src/Databases/DatabaseMySQL.cpp
+++ b/dbms/src/Databases/DatabaseMySQL.cpp
@@ -65,7 +65,7 @@ bool DatabaseMySQL::empty(const Context &) const
     return local_tables_cache.empty();
 }
 
-DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &)
+DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &, const FilterByNameFunction & filter_by_table_name)
 {
     Tables tables;
     std::lock_guard<std::mutex> lock(mutex);
@@ -73,7 +73,8 @@ DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &)
     fetchTablesIntoLocalCache();
 
     for (const auto & local_table : local_tables_cache)
-        tables[local_table.first] = local_table.second.storage;
+        if (!filter_by_table_name || filter_by_table_name(local_table.first))
+            tables[local_table.first] = local_table.second.storage;
 
     return std::make_unique<DatabaseSnapshotIterator>(tables);
 }
diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h
index 3e89b395208..483429bc03f 100644
--- a/dbms/src/Databases/DatabaseMySQL.h
+++ b/dbms/src/Databases/DatabaseMySQL.h
@@ -28,7 +28,7 @@ public:
 
     bool empty(const Context & context) const override;
 
-    DatabaseIteratorPtr getIterator(const Context & context) override;
+    DatabaseIteratorPtr getIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override;
 
     ASTPtr getCreateDatabaseQuery(const Context & context) const override;
 

From 6bdd020609b3f1276be1be0e434bd7f1f1876632 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 16 Jun 2019 15:02:56 +0300
Subject: [PATCH 064/191] Fixed test

---
 .../0_stateless/00954_client_prepared_statements.reference      | 2 +-
 .../queries/0_stateless/00954_client_prepared_statements.sh     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
index aaf5411a990..2dbd21b2eab 100644
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
@@ -2,7 +2,7 @@
 1	Hello, world	2005-05-05 05:05:05
 2	test	2005-05-25 15:00:00
 2	test	2005-05-25 15:00:00
-Code: 36. DB::Exception: Substitution `s` is not set
+Code: 36.
 abc
 abc
 Hello, world
diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
index 30d4690742d..e6503a99933 100755
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
@@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \
 $CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \
     -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
 
-$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -P '^Code: 36\.'
+$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -oP '^Code: 36\.'
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";
 

From 39198ef45f30299852d8e4fa61c34abd71af4dba Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 16 Jun 2019 15:10:34 +0300
Subject: [PATCH 065/191] Fixed error with COMMENT COLUMN IF EXISTS

---
 dbms/src/Parsers/ASTAlterQuery.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp
index e614f64d208..c7cd100b415 100644
--- a/dbms/src/Parsers/ASTAlterQuery.cpp
+++ b/dbms/src/Parsers/ASTAlterQuery.cpp
@@ -82,6 +82,13 @@ void ASTAlterCommand::formatImpl(
         settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
         col_decl->formatImpl(settings, state, frame);
     }
+    else if (type == ASTAlterCommand::COMMENT_COLUMN)
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
+        column->formatImpl(settings, state, frame);
+        settings.ostr << " " << (settings.hilite ? hilite_none : "");
+        comment->formatImpl(settings, state, frame);
+    }
     else if (type == ASTAlterCommand::MODIFY_ORDER_BY)
     {
         settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : "");
@@ -172,13 +179,6 @@ void ASTAlterCommand::formatImpl(
         settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
         predicate->formatImpl(settings, state, frame);
     }
-    else if (type == ASTAlterCommand::COMMENT_COLUMN)
-    {
-        settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_none : "");
-        column->formatImpl(settings, state, frame);
-        settings.ostr << " " << (settings.hilite ? hilite_none : "");
-        comment->formatImpl(settings, state, frame);
-    }
     else if (type == ASTAlterCommand::MODIFY_TTL)
     {
         settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : "");

From f98d0a108f05601d5469658a7012dddc1a830526 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 16 Jun 2019 15:16:16 +0300
Subject: [PATCH 066/191] Removed useless header file

---
 dbms/src/Interpreters/InterpreterCreateQuery.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
index 973023cd4b2..ac950a6e626 100644
--- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1,9 +1,6 @@
 #include <memory>
 
-#include <boost/range/join.hpp>
-
 #include <Poco/File.h>
-#include <Poco/FileStream.h>
 
 #include <Common/escapeForFileName.h>
 #include <Common/typeid_cast.h>

From 864dacd112f78d8496e20a3f7e366b37f0a20265 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 16 Jun 2019 19:47:47 +0300
Subject: [PATCH 067/191] Merging

---
 dbms/src/Common/ErrorCodes.cpp                |  1 +
 .../Interpreters/InterpreterCreateQuery.cpp   | 26 ++++++++++++-------
 dbms/src/Interpreters/QueryNormalizer.cpp     | 20 ++++++++------
 .../Interpreters/addTypeConversionToAST.cpp   | 13 +++-------
 dbms/src/Parsers/ASTColumnDeclaration.cpp     | 12 ++++-----
 dbms/src/Parsers/ASTColumnDeclaration.h       |  2 +-
 6 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp
index f15d066f8cf..feeefd71a11 100644
--- a/dbms/src/Common/ErrorCodes.cpp
+++ b/dbms/src/Common/ErrorCodes.cpp
@@ -430,6 +430,7 @@ namespace ErrorCodes
     extern const int MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES = 453;
     extern const int OPENSSL_ERROR = 454;
     extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY = 455;
+    extern const int UNKNOWN_QUERY_PARAMETER = 456;
 
     extern const int KEEPER_EXCEPTION = 999;
     extern const int POCO_EXCEPTION = 1000;
diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
index ac950a6e626..7853e0c0841 100644
--- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
@@ -41,10 +41,10 @@
 #include <Databases/DatabaseFactory.h>
 #include <Databases/IDatabase.h>
 
-#include <Common/ZooKeeper/ZooKeeper.h>
-
 #include <Compression/CompressionFactory.h>
+
 #include <Interpreters/InterpreterDropQuery.h>
+#include <Interpreters/addTypeConversionToAST.h>
 
 
 namespace DB
@@ -278,19 +278,25 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres
         /// add column to postprocessing if there is a default_expression specified
         if (col_decl.default_expression)
         {
-            /** for columns with explicitly-specified type create two expressions:
-             *    1. default_expression aliased as column name with _tmp suffix
-             *    2. conversion of expression (1) to explicitly-specified type alias as column name */
+            /** For columns with explicitly-specified type create two expressions:
+              * 1. default_expression aliased as column name with _tmp suffix
+              * 2. conversion of expression (1) to explicitly-specified type alias as column name
+              */
             if (col_decl.type)
             {
                 const auto & final_column_name = col_decl.name;
                 const auto tmp_column_name = final_column_name + "_tmp";
                 const auto data_type_ptr = column_names_and_types.back().type.get();
 
-                default_expr_list->children.emplace_back(setAlias(
-                    makeASTFunction("CAST", std::make_shared<ASTIdentifier>(tmp_column_name),
-                        std::make_shared<ASTLiteral>(data_type_ptr->getName())), final_column_name));
-                default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), tmp_column_name));
+
+                default_expr_list->children.emplace_back(
+                    setAlias(addTypeConversionToAST(std::make_shared<ASTIdentifier>(tmp_column_name), data_type_ptr->getName()),
+                        final_column_name));
+
+                default_expr_list->children.emplace_back(
+                    setAlias(
+                        col_decl.default_expression->clone(),
+                        tmp_column_name));
             }
             else
                 default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name));
@@ -329,7 +335,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres
                 column.type = name_type_it->type;
 
                 if (!column.type->equals(*deduced_type))
-                    default_expr = makeASTFunction("CAST", default_expr, std::make_shared<ASTLiteral>(column.type->getName()));
+                    default_expr = addTypeConversionToAST(std::move(default_expr), column.type->getName());
             }
             else
                 column.type = defaults_sample_block.getByName(column.name).type;
diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp
index 1573202a946..c35c47179c6 100644
--- a/dbms/src/Interpreters/QueryNormalizer.cpp
+++ b/dbms/src/Interpreters/QueryNormalizer.cpp
@@ -8,6 +8,7 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTSelectQuery.h>
+#include <Parsers/ASTQueryParameter.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/typeid_cast.h>
@@ -20,6 +21,7 @@ namespace ErrorCodes
 {
     extern const int TOO_DEEP_AST;
     extern const int CYCLIC_ALIASES;
+    extern const int UNKNOWN_QUERY_PARAMETER;
 }
 
 
@@ -227,14 +229,16 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
             data.current_alias = my_alias;
     }
 
-    if (auto * node = ast->as<ASTFunction>())
-        visit(*node, ast, data);
-    if (auto * node = ast->as<ASTIdentifier>())
-        visit(*node, ast, data);
-    if (auto * node = ast->as<ASTTablesInSelectQueryElement>())
-        visit(*node, ast, data);
-    if (auto * node = ast->as<ASTSelectQuery>())
-        visit(*node, ast, data);
+    if (auto * node_func = ast->as<ASTFunction>())
+        visit(*node_func, ast, data);
+    else if (auto * node_id = ast->as<ASTIdentifier>())
+        visit(*node_id, ast, data);
+    else if (auto * node_tables = ast->as<ASTTablesInSelectQueryElement>())
+        visit(*node_tables, ast, data);
+    else if (auto * node_select = ast->as<ASTSelectQuery>())
+        visit(*node_select, ast, data);
+    else if (auto * node_param = ast->as<ASTQueryParameter>())
+        throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
 
     /// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias.
     if (ast.get() != initial_ast.get())
diff --git a/dbms/src/Interpreters/addTypeConversionToAST.cpp b/dbms/src/Interpreters/addTypeConversionToAST.cpp
index 6640af0ca0d..699c3bd27c3 100644
--- a/dbms/src/Interpreters/addTypeConversionToAST.cpp
+++ b/dbms/src/Interpreters/addTypeConversionToAST.cpp
@@ -11,23 +11,16 @@ namespace DB
 
 ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
 {
-    auto func = std::make_shared<ASTFunction>();
-    ASTPtr res = func;
+    auto func = makeASTFunction("CAST", ast, std::make_shared<ASTLiteral>(type_name));
 
-    if (ASTWithAlias * ast_with_alias = ast->as<ASTWithAlias>())
+    if (ASTWithAlias * ast_with_alias = dynamic_cast<ASTWithAlias *>(ast.get()))
     {
         func->alias = ast_with_alias->alias;
         func->prefer_alias_to_column_name = ast_with_alias->prefer_alias_to_column_name;
         ast_with_alias->alias.clear();
     }
 
-    func->name = "CAST";
-    auto exp_list = std::make_shared<ASTExpressionList>();
-    func->arguments = exp_list;
-    func->children.push_back(func->arguments);
-    exp_list->children.emplace_back(std::move(ast));
-    exp_list->children.emplace_back(std::make_shared<ASTLiteral>(type_name));
-    return res;
+    return func;
 }
 
 }
diff --git a/dbms/src/Parsers/ASTColumnDeclaration.cpp b/dbms/src/Parsers/ASTColumnDeclaration.cpp
index 892be19c6b5..e718d5c292d 100644
--- a/dbms/src/Parsers/ASTColumnDeclaration.cpp
+++ b/dbms/src/Parsers/ASTColumnDeclaration.cpp
@@ -21,18 +21,18 @@ ASTPtr ASTColumnDeclaration::clone() const
         res->children.push_back(res->default_expression);
     }
 
-    if (codec)
-    {
-        res->codec = codec->clone();
-        res->children.push_back(res->codec);
-    }
-
     if (comment)
     {
         res->comment = comment->clone();
         res->children.push_back(res->comment);
     }
 
+    if (codec)
+    {
+        res->codec = codec->clone();
+        res->children.push_back(res->codec);
+    }
+
     if (ttl)
     {
         res->ttl = ttl->clone();
diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h
index 311ceb4efbc..ad23e0669bc 100644
--- a/dbms/src/Parsers/ASTColumnDeclaration.h
+++ b/dbms/src/Parsers/ASTColumnDeclaration.h
@@ -15,8 +15,8 @@ public:
     ASTPtr type;
     String default_specifier;
     ASTPtr default_expression;
-    ASTPtr codec;
     ASTPtr comment;
+    ASTPtr codec;
     ASTPtr ttl;
 
     String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); }

From 02034c5d9116dc4142cf17d2cab87aed22acc918 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 16 Jun 2019 20:32:37 +0300
Subject: [PATCH 068/191] Merging

---
 dbms/src/Common/ErrorCodes.cpp                        |  1 +
 .../src/Interpreters/ReplaceQueryParameterVisitor.cpp | 11 +++++++++--
 dbms/src/Interpreters/executeQuery.cpp                |  7 +++++--
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp
index feeefd71a11..2a7a285ce14 100644
--- a/dbms/src/Common/ErrorCodes.cpp
+++ b/dbms/src/Common/ErrorCodes.cpp
@@ -431,6 +431,7 @@ namespace ErrorCodes
     extern const int OPENSSL_ERROR = 454;
     extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY = 455;
     extern const int UNKNOWN_QUERY_PARAMETER = 456;
+    extern const int BAD_QUERY_PARAMETER = 457;
 
     extern const int KEEPER_EXCEPTION = 999;
     extern const int POCO_EXCEPTION = 1000;
diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index b7f625a7a41..325499d59d2 100644
--- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -15,6 +15,13 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_QUERY_PARAMETER;
+    extern const int BAD_QUERY_PARAMETER;
+}
+
+
 void ReplaceQueryParameterVisitor::visit(ASTPtr & ast)
 {
     for (auto & child : ast->children)
@@ -32,7 +39,7 @@ const String & ReplaceQueryParameterVisitor::getParamValue(const String & name)
     if (search != query_parameters.end())
         return search->second;
     else
-        throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::BAD_ARGUMENTS);
+        throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
 }
 
 void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
@@ -49,7 +56,7 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
     data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings);
 
     if (!read_buffer.eof())
-        throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '"  + ast_param.name + "'", ErrorCodes::BAD_ARGUMENTS);
+        throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '"  + ast_param.name + "'", ErrorCodes::BAD_QUERY_PARAMETER);
 
     ast = addTypeConversionToAST(std::make_shared<ASTLiteral>(temp_column[0]), type_name);
 }
diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp
index 1b6a245a99d..1dfb7def86b 100644
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@@ -204,8 +204,11 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     try
     {
         /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-        ReplaceQueryParameterVisitor visitor(context.getQueryParameters());
-        visitor.visit(ast);
+        if (context.hasQueryParameters())
+        {
+            ReplaceQueryParameterVisitor visitor(context.getQueryParameters());
+            visitor.visit(ast);
+        }
 
         /// Get new query after substitutions.
         if (context.hasQueryParameters())

From 54b633bb86ae1e656b38d8ce59f8233f7b03603b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 17 Jun 2019 01:00:29 +0300
Subject: [PATCH 069/191] Fixed wrong method ASTExplainQuery::formatImpl

---
 dbms/src/Parsers/ASTExplainQuery.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dbms/src/Parsers/ASTExplainQuery.h b/dbms/src/Parsers/ASTExplainQuery.h
index 5ebd02b85f8..d921ff427ae 100644
--- a/dbms/src/Parsers/ASTExplainQuery.h
+++ b/dbms/src/Parsers/ASTExplainQuery.h
@@ -26,9 +26,10 @@ public:
     ASTPtr clone() const override { return std::make_shared<ASTExplainQuery>(*this); }
 
 protected:
-    void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
+    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
     {
-        settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : "");
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : "") << " ";
+        children.at(0)->formatImpl(settings, state, frame);
     }
 
 private:
@@ -38,8 +39,8 @@ private:
     {
         switch (kind)
         {
-            case ParsedAST: return "ParsedAST";
-            case AnalyzedSyntax: return "AnalyzedSyntax";
+            case ParsedAST: return "AST";
+            case AnalyzedSyntax: return "ANALYZE";
         }
 
         __builtin_unreachable();

From 39105fc23318f42ddbbb9a2e6162f9c33c42cbd8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 17 Jun 2019 01:02:56 +0300
Subject: [PATCH 070/191] Updated tests

---
 .../queries/0_stateless/00954_client_prepared_statements.sh     | 2 +-
 .../queries/0_stateless/00955_complex_prepared_statements.sh    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
index e6503a99933..c90dc92a7ef 100755
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh
@@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \
 $CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \
     -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}";
 
-$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -oP '^Code: 36\.'
+$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -oP '^Code: 456\.'
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";
 
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
index b9486bbb1b9..a0e3d5aee54 100755
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
@@ -3,7 +3,7 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . $CURDIR/../shell_config.sh
 
-EXCEPTION_TEXT="Code: 36."
+EXCEPTION_TEXT="Code: 456."
 EXCEPTION_SUCCESS_TEXT="OK"
 EXCEPTION_FAIL_TEXT="FAIL"
 

From 289b9fda999598699ab6c1e4dfa07ff9057064e4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 17 Jun 2019 01:05:43 +0300
Subject: [PATCH 071/191] Updated tests

---
 .../0_stateless/00954_client_prepared_statements.reference | 2 +-
 .../00955_complex_prepared_statements.reference            | 2 +-
 .../0_stateless/00955_complex_prepared_statements.sh       | 7 ++-----
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
index 2dbd21b2eab..f25c522a3c5 100644
--- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
+++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference
@@ -2,7 +2,7 @@
 1	Hello, world	2005-05-05 05:05:05
 2	test	2005-05-25 15:00:00
 2	test	2005-05-25 15:00:00
-Code: 36.
+Code: 456.
 abc
 abc
 Hello, world
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
index 818e30f1273..701cc5f8781 100644
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference
@@ -3,4 +3,4 @@
 [10,10,10]
 [[10],[10],[10]]
 [10,10,10]	[[10],[10],[10]]	(10,'Test')	(10,('dt',10))	2015-02-15
-OK
+Code: 457.
diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
index a0e3d5aee54..fd30921b1ac 100755
--- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
+++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh
@@ -3,9 +3,7 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . $CURDIR/../shell_config.sh
 
-EXCEPTION_TEXT="Code: 456."
-EXCEPTION_SUCCESS_TEXT="OK"
-EXCEPTION_FAIL_TEXT="FAIL"
+EXCEPTION_TEXT="Code: 457."
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps";
 $CLICKHOUSE_CLIENT -q "CREATE TABLE ps (
@@ -37,7 +35,6 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15" \
 # Must throw an exception to avoid SQL injection
 $CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1" \
     -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1 \
-    | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" \
-    || echo "$EXCEPTION_FAIL_TEXT";
+    | grep -o "$EXCEPTION_TEXT"
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE ps";

From da43d1e3e444049ce9c96cfb1f426793fcdc1cc2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 17 Jun 2019 01:28:24 +0300
Subject: [PATCH 072/191] Fixed formatting of queries with clashed expression
 and table aliases

---
 dbms/src/Parsers/ASTTablesInSelectQuery.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp
index 98cf6254a4f..59c10d74969 100644
--- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -81,6 +81,7 @@ ASTPtr ASTTablesInSelectQuery::clone() const
 
 void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
+    frame.current_select = this;
     std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
 
     if (database_and_table_name)

From 0bc2b751eacc4d50218a695ed467e34feae8de97 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 17 Jun 2019 01:30:25 +0300
Subject: [PATCH 073/191] Added test

---
 .../00957_format_with_clashed_aliases.reference        |  7 +++++++
 .../0_stateless/00957_format_with_clashed_aliases.sh   | 10 ++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
 create mode 100755 dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh

diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
new file mode 100644
index 00000000000..c97c2d66b51
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
@@ -0,0 +1,7 @@
+SELECT 
+    1 AS x, 
+    x.y
+FROM 
+(
+    SELECT 'Hello, world' AS y
+) AS x 
diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh
new file mode 100755
index 00000000000..7268a1e1a93
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+set -e
+
+format="$CLICKHOUSE_FORMAT"
+
+echo "SELECT 1 AS x, x.y FROM (SELECT 'Hello, world' AS y) AS x" | $format

From 2c0bdf1d90cf81e067e4f4b2fc3f980cea10b131 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 17 Jun 2019 01:42:06 +0300
Subject: [PATCH 074/191] Fixed formatting of expressions like (x[1].1)[1]

---
 dbms/src/Parsers/ASTFunction.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp
index 5c5dbc9ba90..5d1d11dba27 100644
--- a/dbms/src/Parsers/ASTFunction.cpp
+++ b/dbms/src/Parsers/ASTFunction.cpp
@@ -223,10 +223,16 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
                 {
                     if (lit->value.getType() == Field::Types::UInt64)
                     {
+                        if (frame.need_parens)
+                            settings.ostr << '(';
+
                         arguments->children[0]->formatImpl(settings, state, nested_need_parens);
                         settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : "");
                         arguments->children[1]->formatImpl(settings, state, nested_need_parens);
                         written = true;
+
+                        if (frame.need_parens)
+                            settings.ostr << ')';
                     }
                 }
             }

From dc0391b4469be8933fecf0198fd063b52b6d531c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 17 Jun 2019 01:51:09 +0300
Subject: [PATCH 075/191] Fixed formatting of complex expressions

---
 dbms/src/Parsers/ASTFunction.cpp                 | 16 ++++++++++++++--
 ...00958_format_of_tuple_array_element.reference |  9 +++++++++
 .../00958_format_of_tuple_array_element.sh       | 10 ++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference
 create mode 100755 dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh

diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp
index 5d1d11dba27..b550c7062d1 100644
--- a/dbms/src/Parsers/ASTFunction.cpp
+++ b/dbms/src/Parsers/ASTFunction.cpp
@@ -126,6 +126,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
             {
                 if (0 == strcmp(name.c_str(), func[0]))
                 {
+                    if (frame.need_parens)
+                        settings.ostr << '(';
+
                     settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : "");
 
                     /** A particularly stupid case. If we have a unary minus before a literal that is a negative number
@@ -138,6 +141,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
 
                     arguments->formatImpl(settings, state, nested_need_parens);
                     written = true;
+
+                    if (frame.need_parens)
+                        settings.ostr << ')';
                 }
             }
         }
@@ -209,11 +215,17 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
 
             if (!written && 0 == strcmp(name.c_str(), "arrayElement"))
             {
+                if (frame.need_parens)
+                    settings.ostr << '(';
+
                 arguments->children[0]->formatImpl(settings, state, nested_need_parens);
                 settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : "");
-                arguments->children[1]->formatImpl(settings, state, nested_need_parens);
+                arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens);
                 settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : "");
                 written = true;
+
+                if (frame.need_parens)
+                    settings.ostr << ')';
             }
 
             if (!written && 0 == strcmp(name.c_str(), "tupleElement"))
@@ -228,7 +240,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
 
                         arguments->children[0]->formatImpl(settings, state, nested_need_parens);
                         settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : "");
-                        arguments->children[1]->formatImpl(settings, state, nested_need_parens);
+                        arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens);
                         written = true;
 
                         if (frame.need_parens)
diff --git a/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference
new file mode 100644
index 00000000000..7265311960f
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference
@@ -0,0 +1,9 @@
+SELECT 
+    (x.1)[1], 
+    (((x[1]).1)[1]).1, 
+    (NOT x)[1], 
+    -(x[1]), 
+    (-x)[1], 
+    (NOT x).1, 
+    -(x.1), 
+    (-x).1
diff --git a/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh
new file mode 100755
index 00000000000..47f8e99bbb8
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+set -e
+
+format="$CLICKHOUSE_FORMAT"
+
+echo "SELECT (x.1)[1], (x[1].1)[1].1, (NOT x)[1], -x[1], (-x)[1], (NOT x).1, -x.1, (-x).1" | $format

From d907d8e8ca7742c19c065057210e5f5f095e1f9f Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Mon, 17 Jun 2019 19:27:18 +0300
Subject: [PATCH 076/191] Commit offsets for SELECTing from Kafka table too

---
 .../ExpressionBlockInputStream.cpp            |  5 ++-
 .../Kafka/ReadBufferFromKafkaConsumer.cpp     | 32 ++++++++++++++-----
 dbms/src/Storages/Kafka/StorageKafka.cpp      | 10 +++---
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
index 3ce7601e553..51adc462ef6 100644
--- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
+++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
@@ -31,9 +31,8 @@ Block ExpressionBlockInputStream::getHeader() const
 Block ExpressionBlockInputStream::readImpl()
 {
     Block res = children.back()->read();
-    if (!res)
-        return res;
-    expression->execute(res);
+    if (res)
+        expression->execute(res);
     return res;
 }
 
diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
index 9eacdce59e1..5511f3c4cec 100644
--- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
+++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
@@ -15,13 +15,29 @@ ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer()
 
 void ReadBufferFromKafkaConsumer::commit()
 {
-    if (messages.empty() || current == messages.begin())
-        return;
+    if (current != messages.end())
+    {
+        /// Since we can poll more messages than we already processed,
+        /// commit only processed messages.
+        consumer->async_commit(*current);
+    }
+    else
+    {
+        /// Commit everything we polled so far because either:
+        /// - read all polled messages (current == messages.end()),
+        /// - read nothing at all (messages.empty()),
+        /// - stalled.
+        consumer->async_commit();
+    }
 
-    auto & previous = *std::prev(current);
-
-    LOG_TRACE(log, "Committing message with offset " << previous.get_offset());
-    consumer->async_commit(previous);
+    const auto & offsets = consumer->get_offsets_committed(consumer->get_assignment());
+    for (const auto & topic_part : offsets)
+    {
+        LOG_TRACE(
+            log,
+            "Committed offset " << topic_part.get_offset() << " (topic: " << topic_part.get_topic()
+                                << ", partition: " << topic_part.get_partition() << ")");
+    }
 }
 
 void ReadBufferFromKafkaConsumer::subscribe(const Names & topics)
@@ -45,7 +61,7 @@ void ReadBufferFromKafkaConsumer::unsubscribe()
     consumer->unsubscribe();
 }
 
-/// Do commit messages implicitly after we processed the previous batch.
+/// Try to commit messages implicitly after we processed the previous batch.
 bool ReadBufferFromKafkaConsumer::nextImpl()
 {
     /// NOTE: ReadBuffer was implemented with an immutable underlying contents in mind.
@@ -64,7 +80,7 @@ bool ReadBufferFromKafkaConsumer::nextImpl()
         LOG_TRACE(log, "Polled batch of " << messages.size() << " messages");
     }
 
-    if (messages.empty() || current == messages.end())
+    if (messages.empty())
     {
         stalled = true;
         return false;
diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp
index e43508e5951..ac9d918a726 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.cpp
+++ b/dbms/src/Storages/Kafka/StorageKafka.cpp
@@ -113,21 +113,21 @@ BlockInputStreams StorageKafka::read(
     const Context & context,
     QueryProcessingStage::Enum /* processed_stage */,
     size_t /* max_block_size */,
-    unsigned num_streams)
+    unsigned /* num_streams */)
 {
     if (num_created_consumers == 0)
         return BlockInputStreams();
 
-    const size_t stream_count = std::min(size_t(num_streams), num_created_consumers);
-
+    /// Always use all consumers at once, otherwise SELECT may not read messages from all partitions.
     BlockInputStreams streams;
-    streams.reserve(stream_count);
+    streams.reserve(num_created_consumers);
 
     // Claim as many consumers as requested, but don't block
-    for (size_t i = 0; i < stream_count; ++i)
+    for (size_t i = 0; i < num_created_consumers; ++i)
     {
         /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block
         /// TODO: probably that leads to awful performance.
+        /// FIXME: seems that doesn't help with extra reading and committing unprocessed messages.
         streams.emplace_back(std::make_shared<KafkaBlockInputStream>(*this, context, column_names, 1));
     }
 

From 9fd048cdbd2db255bb1d737f4c172d100a9d80af Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Tue, 18 Jun 2019 19:32:37 +0300
Subject: [PATCH 077/191] Allow to select virtual columns in materialized view

---
 .../DataStreams/PushingToViewsBlockOutputStream.cpp | 13 +++++++++++++
 .../DataStreams/PushingToViewsBlockOutputStream.h   |  2 +-
 dbms/src/Interpreters/InterpreterInsertQuery.cpp    |  8 ++++----
 dbms/src/Storages/IStorage.cpp                      | 10 ++++++++++
 dbms/src/Storages/IStorage.h                        |  1 +
 dbms/src/Storages/Kafka/StorageKafka.cpp            |  2 +-
 dbms/tests/integration/test_storage_kafka/test.py   |  3 ++-
 7 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp
index 195c5edcb07..304d7aa989c 100644
--- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp
+++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp
@@ -63,6 +63,17 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
 }
 
 
+Block PushingToViewsBlockOutputStream::getHeader() const
+{
+    /// If we don't write directly to the destination
+    /// then expect that we're inserting with precalculated virtual columns
+    if (output)
+        return storage->getSampleBlock();
+    else
+        return storage->getSampleBlockWithVirtuals();
+}
+
+
 void PushingToViewsBlockOutputStream::write(const Block & block)
 {
     /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match.
@@ -73,6 +84,8 @@ void PushingToViewsBlockOutputStream::write(const Block & block)
     Nested::validateArraySizes(block);
 
     if (output)
+        /// TODO: to support virtual and alias columns inside MVs, we should return here the inserted block extended
+        ///       with additional columns directly from storage and pass it to MVs instead of raw block.
         output->write(block);
 
     /// Don't process materialized views if this block is duplicate
diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h
index 3381a828ff0..34b8cb43042 100644
--- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h
+++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h
@@ -22,7 +22,7 @@ public:
         const String & database, const String & table, const StoragePtr & storage_,
         const Context & context_, const ASTPtr & query_ptr_, bool no_destination = false);
 
-    Block getHeader() const override { return storage->getSampleBlock(); }
+    Block getHeader() const override;
     void write(const Block & block) override;
 
     void flush() override;
diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp
index e4391f52247..b906d151415 100644
--- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp
@@ -57,8 +57,6 @@ StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query)
 
 Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table)
 {
-
-
     Block table_sample_non_materialized = table->getSampleBlockNonMaterialized();
     /// If the query does not include information about columns
     if (!query.columns)
@@ -66,6 +64,8 @@ Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const
         /// Format Native ignores header and write blocks as is.
         if (query.format == "Native")
             return {};
+        else if (query.no_destination)
+            return table->getSampleBlockWithVirtuals();
         else
             return table_sample_non_materialized;
     }
@@ -108,14 +108,14 @@ BlockIO InterpreterInsertQuery::execute()
     if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()))
     {
         out = std::make_shared<SquashingBlockOutputStream>(
-            out, table->getSampleBlock(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
+            out, out->getHeader(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
     }
     auto query_sample_block = getSampleBlock(query, table);
 
     /// Actually we don't know structure of input blocks from query/table,
     /// because some clients break insertion protocol (columns != header)
     out = std::make_shared<AddingDefaultBlockOutputStream>(
-        out, query_sample_block, table->getSampleBlock(), table->getColumns().getDefaults(), context);
+        out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context);
 
     auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
     out_wrapper->setProcessListElement(context.getProcessListElement());
diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp
index ad8130474a1..114d9d3eea2 100644
--- a/dbms/src/Storages/IStorage.cpp
+++ b/dbms/src/Storages/IStorage.cpp
@@ -62,6 +62,16 @@ Block IStorage::getSampleBlock() const
     return res;
 }
 
+Block IStorage::getSampleBlockWithVirtuals() const
+{
+    auto res = getSampleBlock();
+
+    for (const auto & column : getColumns().getVirtuals())
+        res.insert({column.type->createColumn(), column.type, column.name});
+
+    return res;
+}
+
 Block IStorage::getSampleBlockNonMaterialized() const
 {
     Block res;
diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h
index 9f3a499e1d7..5bfd8224372 100644
--- a/dbms/src/Storages/IStorage.h
+++ b/dbms/src/Storages/IStorage.h
@@ -92,6 +92,7 @@ public: /// thread-unsafe part. lockStructure must be acquired
     virtual bool hasColumn(const String & column_name) const;
 
     Block getSampleBlock() const; /// ordinary + materialized.
+    Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals.
     Block getSampleBlockNonMaterialized() const; /// ordinary.
     Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals.
 
diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp
index de1a31926ee..ef3aac43b0c 100644
--- a/dbms/src/Storages/Kafka/StorageKafka.cpp
+++ b/dbms/src/Storages/Kafka/StorageKafka.cpp
@@ -345,7 +345,7 @@ bool StorageKafka::streamToViews()
     auto insert = std::make_shared<ASTInsertQuery>();
     insert->database = database_name;
     insert->table = table_name;
-    insert->no_destination = true; // Only insert into dependent views
+    insert->no_destination = true; // Only insert into dependent views and expect that input blocks contain virtual columns
 
     const Settings & settings = global_context.getSettingsRef();
     size_t block_size = max_block_size;
diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py
index c8c29dfceae..7769556b400 100644
--- a/dbms/tests/integration/test_storage_kafka/test.py
+++ b/dbms/tests/integration/test_storage_kafka/test.py
@@ -336,7 +336,8 @@ def test_kafka_flush_on_big_message(kafka_cluster):
                 kafka_topic_list = 'flush',
                 kafka_group_name = 'flush',
                 kafka_format = 'JSONEachRow',
-                kafka_max_block_size = 10;
+                kafka_max_block_size = 10,
+                kafka_commit_on_every_batch = 1;
         CREATE TABLE test.view (key UInt64, value String)
             ENGINE = MergeTree
             ORDER BY key;

From bb95d9a86083dcead9962f6dc8508554ef5716f3 Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Wed, 19 Jun 2019 15:28:34 +0300
Subject: [PATCH 078/191] fix race condition in flushing system log

---
 dbms/src/Interpreters/SystemLog.h             | 95 ++++++++++++-------
 .../configs/config.d/query_log.xml            |  9 ++
 .../integration/test_system_queries/test.py   | 17 ++++
 3 files changed, 89 insertions(+), 32 deletions(-)
 create mode 100644 dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml

diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h
index 59dda00e71b..ec4de2f1c83 100644
--- a/dbms/src/Interpreters/SystemLog.h
+++ b/dbms/src/Interpreters/SystemLog.h
@@ -2,6 +2,7 @@
 
 #include <thread>
 #include <atomic>
+#include <condition_variable>
 #include <boost/noncopyable.hpp>
 #include <common/logger_useful.h>
 #include <Core/Types.h>
@@ -101,22 +102,10 @@ public:
     /** Append a record into log.
       * Writing to table will be done asynchronously and in case of failure, record could be lost.
       */
-    void add(const LogElement & element)
-    {
-        if (is_shutdown)
-            return;
-
-        /// Without try we could block here in case of queue overflow.
-        if (!queue.tryPush({false, element}))
-            LOG_ERROR(log, "SystemLog queue is full");
-    }
+    void add(const LogElement & element);
 
     /// Flush data in the buffer to disk
-    void flush()
-    {
-        if (!is_shutdown)
-            flushImpl(false);
-    }
+    void flush();
 
     /// Stop the background flush thread before destructor. No more data will be written.
     void shutdown();
@@ -130,7 +119,14 @@ protected:
     const size_t flush_interval_milliseconds;
     std::atomic<bool> is_shutdown{false};
 
-    using QueueItem = std::pair<bool, LogElement>;        /// First element is shutdown flag for thread.
+    enum class ElementType
+    {
+        REGULAR = 0,
+        SHUTDOWN,
+        FORCE_FLUSH
+    };
+
+    using QueueItem = std::pair<ElementType, LogElement>;
 
     /// Queue is bounded. But its size is quite large to not block in all normal cases.
     ConcurrentBoundedQueue<QueueItem> queue {DBMS_SYSTEM_LOG_QUEUE_SIZE};
@@ -140,7 +136,6 @@ protected:
       *  than accumulation of large amount of log records (for example, for query log - processing of large amount of queries).
       */
     std::vector<LogElement> data;
-    std::mutex data_mutex;
 
     Logger * log;
 
@@ -157,6 +152,12 @@ protected:
     bool is_prepared = false;
     void prepareTable();
 
+    std::mutex flush_mutex;
+    std::mutex condvar_mutex;
+    std::condition_variable flush_condvar;
+    bool force_flushing = false;
+
+    /// flushImpl can be executed only in saving_thread.
     void flushImpl(bool quiet);
 };
 
@@ -178,6 +179,36 @@ SystemLog<LogElement>::SystemLog(Context & context_,
 }
 
 
+template <typename LogElement>
+void SystemLog<LogElement>::add(const LogElement & element)
+{
+    if (is_shutdown)
+        return;
+
+    /// Without try we could block here in case of queue overflow.
+    if (!queue.tryPush({ElementType::REGULAR, element}))
+        LOG_ERROR(log, "SystemLog queue is full");
+}
+
+
+template <typename LogElement>
+void SystemLog<LogElement>::flush()
+{
+    if (is_shutdown)
+        return;
+
+    std::lock_guard flush_lock(flush_mutex);
+    /// Tell thread to execute extra flush.
+    queue.push({ElementType::FORCE_FLUSH, {}});
+
+    /// Wait for flush being finished.
+    std::unique_lock lock(condvar_mutex);
+    force_flushing = true;
+    while (force_flushing)
+        flush_condvar.wait(lock);
+}
+
+
 template <typename LogElement>
 void SystemLog<LogElement>::shutdown()
 {
@@ -186,7 +217,7 @@ void SystemLog<LogElement>::shutdown()
         return;
 
     /// Tell thread to shutdown.
-    queue.push({true, {}});
+    queue.push({ElementType::SHUTDOWN, {}});
     saving_thread.join();
 }
 
@@ -219,16 +250,10 @@ void SystemLog<LogElement>::threadFunction()
             QueueItem element;
             bool has_element = false;
 
-            bool is_empty;
-            {
-                std::unique_lock lock(data_mutex);
-                is_empty = data.empty();
-            }
-
             /// data.size() is increased only in this function
             /// TODO: get rid of data and queue duality
 
-            if (is_empty)
+            if (data.empty())
             {
                 queue.pop(element);
                 has_element = true;
@@ -242,18 +267,20 @@ void SystemLog<LogElement>::threadFunction()
 
             if (has_element)
             {
-                if (element.first)
+                if (element.first == ElementType::SHUTDOWN)
                 {
-                    /// Shutdown.
                     /// NOTE: MergeTree engine can write data even it is already in shutdown state.
-                    flush();
+                    flushImpl(true);
                     break;
                 }
-                else
+                else if (element.first == ElementType::FORCE_FLUSH)
                 {
-                    std::unique_lock lock(data_mutex);
-                    data.push_back(element.second);
+                    flushImpl(false);
+                    time_after_last_write.restart();
+                    continue;
                 }
+                else
+                    data.push_back(element.second);
             }
 
             size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000;
@@ -277,8 +304,6 @@ void SystemLog<LogElement>::threadFunction()
 template <typename LogElement>
 void SystemLog<LogElement>::flushImpl(bool quiet)
 {
-    std::unique_lock lock(data_mutex);
-
     try
     {
         if (quiet && data.empty())
@@ -320,6 +345,12 @@ void SystemLog<LogElement>::flushImpl(bool quiet)
         /// In case of exception, also clean accumulated data - to avoid locking.
         data.clear();
     }
+    if (!quiet)
+    {
+        std::lock_guard lock(condvar_mutex);
+        force_flushing = false;
+        flush_condvar.notify_one();
+    }
 }
 
 
diff --git a/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml b/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml
new file mode 100644
index 00000000000..9f55dcb829e
--- /dev/null
+++ b/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<yandex>
+    <query_log>
+        <database>system</database>
+        <table>query_log</table>
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>300</flush_interval_milliseconds>
+    </query_log>
+</yandex>
diff --git a/dbms/tests/integration/test_system_queries/test.py b/dbms/tests/integration/test_system_queries/test.py
index a3899bab577..1761017362a 100644
--- a/dbms/tests/integration/test_system_queries/test.py
+++ b/dbms/tests/integration/test_system_queries/test.py
@@ -92,6 +92,23 @@ def test_RELOAD_CONFIG_AND_MACROS(started_cluster):
     instance.query("SYSTEM RELOAD CONFIG")
     assert TSV(instance.query("select * from system.macros")) == TSV("mac\tro\n")
 
+
+def test_SYSTEM_FLUSH_LOGS(started_cluster):
+    instance = cluster.instances['ch1']
+    for i in range(4):
+        # Sleep to execute flushing from background thread at first query
+        # by expiration of flush_interval_millisecond and test probable race condition.
+        time.sleep(0.5)
+        result = instance.query('''
+            SET log_queries = 1;
+            SELECT 1 FORMAT Null;
+            SET log_queries = 0;
+            SYSTEM FLUSH LOGS;
+            SELECT count() FROM system.query_log;''')
+        instance.query('TRUNCATE TABLE system.query_log')
+        assert TSV(result) == TSV('4')
+
+
 if __name__ == '__main__':
     with contextmanager(started_cluster)() as cluster:
        for name, instance in cluster.instances.items():

From cba07d646ae6a5646ae512330be8e1f707b9a1cd Mon Sep 17 00:00:00 2001
From: spyros87 <papadopoulos.spyridon@gmail.com>
Date: Wed, 19 Jun 2019 16:08:30 +0200
Subject: [PATCH 079/191] Define kafka_skip_broken_messages correctly as number

Signed-off-by: spyros87 <papadopoulos.spyridon@gmail.com>
---
 docs/en/operations/table_engines/kafka.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md
index 22d0384fd42..b93f53c581d 100644
--- a/docs/en/operations/table_engines/kafka.md
+++ b/docs/en/operations/table_engines/kafka.md
@@ -26,7 +26,7 @@ SETTINGS
     [kafka_row_delimiter = 'delimiter_symbol',]
     [kafka_schema = '',]
     [kafka_num_consumers = N,]
-    [kafka_skip_broken_messages = <0|1>]
+    [kafka_skip_broken_messages = N]
 ```
 Required parameters:
 

From d174d9f867fecdbc7064d7526ba7bde321739f3c Mon Sep 17 00:00:00 2001
From: spyros87 <papadopoulos.spyridon@gmail.com>
Date: Wed, 19 Jun 2019 16:15:34 +0200
Subject: [PATCH 080/191] Rewriten kafka_skip_broken_messages parameter
 description.

Signed-off-by: spyros87 <papadopoulos.spyridon@gmail.com>
---
 docs/en/operations/table_engines/kafka.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md
index b93f53c581d..69d167403da 100644
--- a/docs/en/operations/table_engines/kafka.md
+++ b/docs/en/operations/table_engines/kafka.md
@@ -40,7 +40,7 @@ Optional parameters:
 - `kafka_row_delimiter` – Delimiter character, which ends the message.
 - `kafka_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
 - `kafka_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
-- `kafka_skip_broken_messages` – Kafka message parser mode. If `kafka_skip_broken_messages = 1` then the engine skips the Kafka messages that can't be parsed (a message equals a row of data).
+- `kafka_skip_broken_messages` – Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
 
 Examples:
 

From f52b16e1e1b35bf5992185ce898314ac3597f904 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Fri, 10 May 2019 11:42:28 +0800
Subject: [PATCH 081/191] support bloom filter for any type

---
 dbms/src/Interpreters/BloomFilter.cpp         |  87 ++++-
 dbms/src/Interpreters/BloomFilter.h           |  26 +-
 dbms/src/Interpreters/BloomFilterHash.h       | 141 ++++++++
 .../MergeTreeIndexAggregatorBloomFilter.cpp   |  62 ++++
 .../MergeTreeIndexAggregatorBloomFilter.h     |  29 ++
 .../MergeTree/MergeTreeIndexBloomFilter.cpp   |  87 +++++
 .../MergeTree/MergeTreeIndexBloomFilter.h     |  31 ++
 .../MergeTreeIndexConditionBloomFilter.cpp    | 310 ++++++++++++++++++
 .../MergeTreeIndexConditionBloomFilter.h      |  69 ++++
 ...erIndex.cpp => MergeTreeIndexFullText.cpp} |  88 +++--
 ...FilterIndex.h => MergeTreeIndexFullText.h} |  50 +--
 .../MergeTreeIndexGranuleBloomFilter.cpp      | 116 +++++++
 .../MergeTreeIndexGranuleBloomFilter.h        |  36 ++
 .../Storages/MergeTree/MergeTreeIndices.cpp   |   8 +-
 dbms/src/Storages/MergeTree/RPNBuilder.h      |   5 +-
 15 files changed, 1050 insertions(+), 95 deletions(-)
 create mode 100644 dbms/src/Interpreters/BloomFilterHash.h
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
 rename dbms/src/Storages/MergeTree/{MergeTreeBloomFilterIndex.cpp => MergeTreeIndexFullText.cpp} (87%)
 rename dbms/src/Storages/MergeTree/{MergeTreeBloomFilterIndex.h => MergeTreeIndexFullText.h} (79%)
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
 create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h

diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp
index 765f1ea9478..3f20799cedf 100644
--- a/dbms/src/Interpreters/BloomFilter.cpp
+++ b/dbms/src/Interpreters/BloomFilter.cpp
@@ -1,6 +1,6 @@
 #include <Interpreters/BloomFilter.h>
-
 #include <city.h>
+#include "BloomFilter.h"
 
 
 namespace DB
@@ -9,14 +9,13 @@ namespace DB
 static constexpr UInt64 SEED_GEN_A = 845897321;
 static constexpr UInt64 SEED_GEN_B = 217728422;
 
-
-StringBloomFilter::StringBloomFilter(size_t size_, size_t hashes_, size_t seed_)
+BloomFilter::BloomFilter(size_t size_, size_t hashes_, size_t seed_)
     : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)), filter(words, 0) {}
 
-StringBloomFilter::StringBloomFilter(const StringBloomFilter & bloom_filter)
+BloomFilter::BloomFilter(const BloomFilter & bloom_filter)
     : size(bloom_filter.size), hashes(bloom_filter.hashes), seed(bloom_filter.seed), words(bloom_filter.words), filter(bloom_filter.filter) {}
 
-bool StringBloomFilter::find(const char * data, size_t len)
+bool BloomFilter::find(const char * data, size_t len)
 {
     size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
     size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
@@ -30,7 +29,7 @@ bool StringBloomFilter::find(const char * data, size_t len)
     return true;
 }
 
-void StringBloomFilter::add(const char * data, size_t len)
+void BloomFilter::add(const char * data, size_t len)
 {
     size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
     size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
@@ -42,12 +41,12 @@ void StringBloomFilter::add(const char * data, size_t len)
     }
 }
 
-void StringBloomFilter::clear()
+void BloomFilter::clear()
 {
     filter.assign(words, 0);
 }
 
-bool StringBloomFilter::contains(const StringBloomFilter & bf)
+bool BloomFilter::contains(const BloomFilter & bf)
 {
     for (size_t i = 0; i < words; ++i)
     {
@@ -57,7 +56,7 @@ bool StringBloomFilter::contains(const StringBloomFilter & bf)
     return true;
 }
 
-UInt64 StringBloomFilter::isEmpty() const
+UInt64 BloomFilter::isEmpty() const
 {
     for (size_t i = 0; i < words; ++i)
         if (filter[i] != 0)
@@ -65,7 +64,7 @@ UInt64 StringBloomFilter::isEmpty() const
     return true;
 }
 
-bool operator== (const StringBloomFilter & a, const StringBloomFilter & b)
+bool operator== (const BloomFilter & a, const BloomFilter & b)
 {
     for (size_t i = 0; i < a.words; ++i)
         if (a.filter[i] != b.filter[i])
@@ -73,4 +72,72 @@ bool operator== (const StringBloomFilter & a, const StringBloomFilter & b)
     return true;
 }
 
+void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & seed)
+{
+    size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, seed)) % (8 * size);
+    filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
+}
+
+bool BloomFilter::containsWithSeed(const UInt64 & hash, const UInt64 & seed)
+{
+    size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, seed)) % (8 * size);
+    return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType)))));
+}
+
+static std::pair<sMergeTreeIndexFullText.cppize_t, size_t> calculationBestPracticesImpl(double max_conflict_probability)
+{
+    static const size_t MAX_BITS_PER_ROW = 20;
+    static const size_t MAX_HASH_FUNCTION_COUNT = 15;
+
+    /// For the smallest index per level in probability_lookup_table
+    static const size_t min_probability_index_each_bits[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14};
+
+    static const long double probability_lookup_table[MAX_BITS_PER_ROW + 1][MAX_HASH_FUNCTION_COUNT] =
+    {
+        {1.0},  /// dummy, 0 bits per row
+        {1.0, 1.0},
+        {1.0, 0.393,  0.400},
+        {1.0, 0.283,  0.237,   0.253},
+        {1.0, 0.221,  0.155,   0.147,   0.160},
+        {1.0, 0.181,  0.109,   0.092,   0.092,   0.101}, // 5
+        {1.0, 0.154,  0.0804,  0.0609,  0.0561,  0.0578,   0.0638},
+        {1.0, 0.133,  0.0618,  0.0423,  0.0359,  0.0347,   0.0364},
+        {1.0, 0.118,  0.0489,  0.0306,  0.024,   0.0217,   0.0216,   0.0229},
+        {1.0, 0.105,  0.0397,  0.0228,  0.0166,  0.0141,   0.0133,   0.0135,   0.0145},
+        {1.0, 0.0952, 0.0329,  0.0174,  0.0118,  0.00943,  0.00844,  0.00819,  0.00846}, // 10
+        {1.0, 0.0869, 0.0276,  0.0136,  0.00864, 0.0065,   0.00552,  0.00513,  0.00509},
+        {1.0, 0.08,   0.0236,  0.0108,  0.00646, 0.00459,  0.00371,  0.00329,  0.00314},
+        {1.0, 0.074,  0.0203,  0.00875, 0.00492, 0.00332,  0.00255,  0.00217,  0.00199,  0.00194},
+        {1.0, 0.0689, 0.0177,  0.00718, 0.00381, 0.00244,  0.00179,  0.00146,  0.00129,  0.00121,  0.0012},
+        {1.0, 0.0645, 0.0156,  0.00596, 0.003,   0.00183,  0.00128,  0.001,    0.000852, 0.000775, 0.000744}, // 15
+        {1.0, 0.0606, 0.0138,  0.005,   0.00239, 0.00139,  0.000935, 0.000702, 0.000574, 0.000505, 0.00047,  0.000459},
+        {1.0, 0.0571, 0.0123,  0.00423, 0.00193, 0.00107,  0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284},
+        {1.0, 0.054,  0.0111,  0.00362, 0.00158, 0.000839, 0.000519, 0.00036,  0.000275, 0.000226, 0.000198, 0.000183, 0.000176},
+        {1.0, 0.0513, 0.00998, 0.00312, 0.0013,  0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109},
+        {1.0, 0.0488, 0.00906, 0.0027,  0.00108, 0.00053,  0.000303, 0.000196, 0.00014,  0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20
+    };
+
+    for (size_t bits_per_row = 1; bits_per_row < MAX_BITS_PER_ROW; ++bits_per_row)
+    {
+        if (probability_lookup_table[bits_per_row][min_probability_index_each_bits[bits_per_row]] <= max_conflict_probability)
+        {
+            size_t max_size_of_hash_functions = min_probability_index_each_bits[bits_per_row];
+            for (size_t size_of_hash_functions = max_size_of_hash_functions; size_of_hash_functions > 0; --size_of_hash_functions)
+                if (probability_lookup_table[bits_per_row][size_of_hash_functions] > max_conflict_probability)
+                {
+                    std::cout << "Best bf:" <<  bits_per_row << ", " << (size_of_hash_functions + 1) << "\n";
+                    return std::pair<size_t, size_t>(bits_per_row, size_of_hash_functions + 1);
+                }
+
+        }
+    }
+
+    return std::pair<size_t, size_t>(MAX_BITS_PER_ROW - 1, min_probability_index_each_bits[MAX_BITS_PER_ROW - 1]);
+}
+
+std::pair<size_t, size_t> calculationBestPractices(double max_conflict_probability)
+{
+    return calculationBestPracticesImpl(max_conflict_probability);
+}
+
 }
diff --git a/dbms/src/Interpreters/BloomFilter.h b/dbms/src/Interpreters/BloomFilter.h
index 1825dbec4bd..23bf7baba20 100644
--- a/dbms/src/Interpreters/BloomFilter.h
+++ b/dbms/src/Interpreters/BloomFilter.h
@@ -1,15 +1,17 @@
 #pragma once
 
-#include <Core/Types.h>
 #include <vector>
-
+#include <Core/Types.h>
+#include <Common/PODArray.h>
+#include <Common/Allocator.h>
+#include <Columns/ColumnVector.h>
 
 namespace DB
 {
 
-/// Bloom filter for strings.
-class StringBloomFilter
+class BloomFilter
 {
+
 public:
     using UnderType = UInt64;
     using Container = std::vector<UnderType>;
@@ -17,16 +19,19 @@ public:
     /// size -- size of filter in bytes.
     /// hashes -- number of used hash functions.
     /// seed -- random seed for hash functions generation.
-    StringBloomFilter(size_t size_, size_t hashes_, size_t seed_);
-    StringBloomFilter(const StringBloomFilter & bloom_filter);
+    BloomFilter(size_t size_, size_t hashes_, size_t seed_);
+    BloomFilter(const BloomFilter & bloom_filter);
 
     bool find(const char * data, size_t len);
     void add(const char * data, size_t len);
     void clear();
 
+    void addHashWithSeed(const UInt64 & hash, const UInt64 & seed);
+    bool containsWithSeed(const UInt64 & hash, const UInt64 & seed);
+
     /// Checks if this contains everything from another bloom filter.
     /// Bloom filters must have equal size and seed.
-    bool contains(const StringBloomFilter & bf);
+    bool contains(const BloomFilter & bf);
 
     const Container & getFilter() const { return filter; }
     Container & getFilter() { return filter; }
@@ -34,7 +39,7 @@ public:
     /// For debug.
     UInt64 isEmpty() const;
 
-    friend bool operator== (const StringBloomFilter & a, const StringBloomFilter & b);
+    friend bool operator== (const BloomFilter & a, const BloomFilter & b);
 private:
 
     size_t size;
@@ -44,7 +49,10 @@ private:
     Container filter;
 };
 
+using BloomFilterPtr = std::shared_ptr<BloomFilter>;
 
-bool operator== (const StringBloomFilter & a, const StringBloomFilter & b);
+bool operator== (const BloomFilter & a, const BloomFilter & b);
+
+std::pair<size_t, size_t> calculationBestPractices(double max_conflict_probability);
 
 }
diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
new file mode 100644
index 00000000000..4c5fc1934fa
--- /dev/null
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -0,0 +1,141 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnFixedString.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <ext/bit_cast.h>
+#include <Common/HashTable/Hash.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
+struct BloomFilterHash
+{
+    static constexpr UInt64 bf_hash_seed[15] = {
+        13635471485423070496ULL, 10336109063487487899ULL, 17779957404565211594ULL, 8988612159822229247ULL, 4954614162757618085ULL,
+        12980113590177089081ULL, 9263883436177860930ULL, 3656772712723269762ULL, 10362091744962961274ULL, 7582936617938287249ULL,
+        15033938188484401405ULL, 18286745649494826751ULL, 6852245486148412312ULL, 8886056245089344681ULL, 10151472371158292780ULL
+    };
+
+    static ColumnPtr hashWithField(const IDataType * data_type, const Field & field)
+    {
+        WhichDataType which(data_type);
+
+        if (which.isUInt())
+            return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet<UInt64>())), 1);
+        else if (which.isInt())
+            return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()))), 1);
+        else if (which.isString() || which.isFixedString())
+        {
+            const auto & value = field.safeGet<String>();
+            return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1);
+        }
+        else
+            throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
+    }
+
+    static ColumnPtr hashWithColumn(const IDataType * data_type, const IColumn * column, size_t pos, size_t limit)
+    {
+        auto index_column = ColumnUInt64::create(limit);
+        ColumnUInt64::Container & index_column_vec = index_column->getData();
+        getAnyTypeHash<true>(data_type, column, index_column_vec, pos);
+        return index_column;
+    }
+
+    template <bool is_first>
+    static void getAnyTypeHash(const IDataType *data_type, const IColumn *column, ColumnUInt64::Container &vec, size_t pos)
+    {
+        WhichDataType which(data_type);
+
+        if      (which.isUInt8()) getNumberTypeHash<UInt8, is_first>(column, vec, pos);
+        else if (which.isUInt16()) getNumberTypeHash<UInt16, is_first>(column, vec, pos);
+        else if (which.isUInt32()) getNumberTypeHash<UInt32, is_first>(column, vec, pos);
+        else if (which.isUInt64()) getNumberTypeHash<UInt64, is_first>(column, vec, pos);
+        else if (which.isInt8()) getNumberTypeHash<Int8, is_first>(column, vec, pos);
+        else if (which.isInt16()) getNumberTypeHash<Int16, is_first>(column, vec, pos);
+        else if (which.isInt32()) getNumberTypeHash<Int32, is_first>(column, vec, pos);
+        else if (which.isInt64()) getNumberTypeHash<Int64, is_first>(column, vec, pos);
+        else if (which.isEnum8()) getNumberTypeHash<Int8, is_first>(column, vec, pos);
+        else if (which.isEnum16()) getNumberTypeHash<Int16, is_first>(column, vec, pos);
+        else if (which.isDate()) getNumberTypeHash<UInt16, is_first>(column, vec, pos);
+        else if (which.isDateTime()) getNumberTypeHash<UInt32, is_first>(column, vec, pos);
+        else if (which.isFloat32()) getNumberTypeHash<Float32, is_first>(column, vec, pos);
+        else if (which.isFloat64()) getNumberTypeHash<Float64, is_first>(column, vec, pos);
+        else if (which.isString()) getStringTypeHash<is_first>(column, vec, pos);
+        else if (which.isFixedString()) getStringTypeHash<is_first>(column, vec, pos);
+        else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
+    }
+
+    template <typename Type, bool is_first>
+    static void getNumberTypeHash(const IColumn * column, ColumnUInt64::Container & vec, size_t pos)
+    {
+        const auto * index_column = typeid_cast<const ColumnVector<Type> *>(column);
+
+        if (unlikely(!index_column))
+            throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN);
+
+        const typename ColumnVector<Type>::Container & vec_from = index_column->getData();
+
+        for (size_t index = 0, size = vec.size(); index < size; ++index)
+        {
+            UInt64 hash = intHash64(ext::bit_cast<UInt64>(vec_from[index + pos]));
+
+            if constexpr (is_first)
+                vec[index] = hash;
+            else
+                vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash));
+        }
+    }
+
+    template <bool is_first>
+    static void getStringTypeHash(const IColumn * column, ColumnUInt64::Container & vec, size_t pos)
+    {
+        if (const auto * index_column = typeid_cast<const ColumnString *>(column))
+        {
+            const ColumnString::Chars & data = index_column->getChars();
+            const ColumnString::Offsets & offsets = index_column->getOffsets();
+
+            ColumnString::Offset current_offset = pos;
+            for (size_t index = 0, size = vec.size(); index < size; ++index)
+            {
+                UInt64 city_hash = CityHash_v1_0_2::CityHash64(
+                    reinterpret_cast<const char *>(&data[current_offset]), offsets[index + pos] - current_offset - 1);
+
+                if constexpr (is_first)
+                    vec[index] = city_hash;
+                else
+                    vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash));
+
+                current_offset = offsets[index + pos];
+            }
+        }
+        else if (const auto * fixed_string_index_column = typeid_cast<const ColumnFixedString *>(column))
+        {
+            size_t fixed_len = fixed_string_index_column->getN();
+            const auto & data = fixed_string_index_column->getChars();
+
+            for (size_t index = 0, size = vec.size(); index < size; ++index)
+            {
+                UInt64 city_hash = CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&data[(index + pos) * fixed_len]), fixed_len);
+
+                if constexpr (is_first)
+                    vec[index] = city_hash;
+                else
+                    vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash));
+            }
+        }
+        else
+            throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN);
+    }
+};
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
new file mode 100644
index 00000000000..c3f7150548c
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
@@ -0,0 +1,62 @@
+#include <Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h>
+
+#include <ext/bit_cast.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnFixedString.h>
+#include <Common/HashTable/Hash.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/BloomFilterHash.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int ILLEGAL_COLUMN;
+}
+
+MergeTreeIndexAggregatorBloomFilter::MergeTreeIndexAggregatorBloomFilter(
+    size_t bits_per_row_, size_t hash_functions_, const Names & columns_name_)
+    : bits_per_row(bits_per_row_), hash_functions(hash_functions_), index_columns_name(columns_name_)
+{
+}
+
+bool MergeTreeIndexAggregatorBloomFilter::empty() const
+{
+    return !total_rows;
+}
+
+MergeTreeIndexGranulePtr MergeTreeIndexAggregatorBloomFilter::getGranuleAndReset()
+{
+    const auto granule = std::make_shared<MergeTreeIndexGranuleBloomFilter>(bits_per_row, hash_functions, total_rows, granule_index_blocks);
+    total_rows = 0;
+    granule_index_blocks.clear();
+    return granule;
+}
+
+void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * pos, size_t limit)
+{
+    if (*pos >= block.rows())
+        throw Exception("The provided position is not less than the number of block rows. Position: " + toString(*pos) + ", Block rows: " +
+                        toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR);
+
+    Block granule_index_block;
+    size_t max_read_rows = std::min(block.rows() - *pos, limit);
+
+    for (size_t index = 0; index < index_columns_name.size(); ++index)
+    {
+        const auto & column_and_type = block.getByName(index_columns_name[index]);
+        const auto & index_column = BloomFilterHash::hashWithColumn(&*column_and_type.type, &*column_and_type.column, *pos, max_read_rows);
+
+        granule_index_block.insert({std::move(index_column), std::make_shared<DataTypeUInt64>(), column_and_type.name});
+    }
+
+    *pos += max_read_rows;
+    total_rows += max_read_rows;
+    granule_index_blocks.push_back(granule_index_block);
+}
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h
new file mode 100644
index 00000000000..ebbe9865313
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <Storages/MergeTree/MergeTreeIndices.h>
+#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
+
+namespace DB
+{
+
+class MergeTreeIndexAggregatorBloomFilter : public IMergeTreeIndexAggregator
+{
+public:
+    MergeTreeIndexAggregatorBloomFilter(size_t bits_per_row_, size_t hash_functions_, const Names & columns_name_);
+
+    bool empty() const override;
+
+    MergeTreeIndexGranulePtr getGranuleAndReset() override;
+
+    void update(const Block & block, size_t * pos, size_t limit) override;
+
+private:
+    size_t bits_per_row;
+    size_t hash_functions;
+    const Names index_columns_name;
+
+    size_t total_rows = 0;
+    Blocks granule_index_blocks;
+};
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
new file mode 100644
index 00000000000..dff73a80576
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -0,0 +1,87 @@
+#include <Storages/MergeTree/MergeTreeIndexBloomFilter.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <Interpreters/SyntaxAnalyzer.h>
+#include <Interpreters/ExpressionAnalyzer.h>
+#include <Core/Types.h>
+#include <ext/bit_cast.h>
+#include <Parsers/ASTLiteral.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
+#include <Parsers/queryToString.h>
+#include <Columns/ColumnConst.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int INCORRECT_QUERY;
+}
+
+MergeTreeIndexBloomFilter::MergeTreeIndexBloomFilter(
+    const String & name, const ExpressionActionsPtr & expr, const Names & columns, const DataTypes & data_types, const Block & header,
+    size_t granularity, size_t bits_per_row_, size_t hash_functions_)
+    : IMergeTreeIndex(name, expr, columns, data_types, header, granularity), bits_per_row(bits_per_row_), hash_functions(hash_functions_)
+{
+}
+
+MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const
+{
+    return std::make_shared<MergeTreeIndexGranuleBloomFilter>(bits_per_row, hash_functions, columns.size());
+}
+
+bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const
+{
+    const String column_name = node->getColumnName();
+
+    for (const auto & name : columns)
+        if (column_name == name)
+            return true;
+
+    if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
+        if (func->arguments->children.size() == 1)
+            return mayBenefitFromIndexForIn(func->arguments->children.front());
+
+    return false;
+}
+
+MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() const
+{
+    return std::make_shared<MergeTreeIndexAggregatorBloomFilter>(bits_per_row, hash_functions, columns);
+}
+
+IndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const
+{
+    return std::make_shared<MergeTreeIndexConditionBloomFilter>(query_info, context, header, hash_functions);
+}
+
+std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context)
+{
+    if (node->name.empty())
+        throw Exception("Index must have unique name.", ErrorCodes::INCORRECT_QUERY);
+
+    ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone());
+
+    auto syntax = SyntaxAnalyzer(context, {}).analyze(expr_list, columns);
+    auto index_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
+    auto index_sample = ExpressionAnalyzer(expr_list, syntax, context).getActions(true)->getSampleBlock();
+
+    if (!index_sample || !index_sample.columns())
+        throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
+
+    double max_conflict_probability = 0.025;
+    if (node->type->arguments && !node->type->arguments->children.empty())
+        max_conflict_probability = typeid_cast<const ASTLiteral &>(*node->type->arguments->children[0]).value.get<Float64>();
+
+    const auto & bits_per_row_and_size_of_hash_functions = calculationBestPractices(max_conflict_probability);
+
+    return std::make_unique<MergeTreeIndexBloomFilter>(
+        node->name, std::move(index_expr), index_sample.getNames(), index_sample.getDataTypes(), index_sample, node->granularity,
+        bits_per_row_and_size_of_hash_functions.first, bits_per_row_and_size_of_hash_functions.second);
+}
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
new file mode 100644
index 00000000000..5b506846754
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <Interpreters/BloomFilter.h>
+#include <Storages/MergeTree/MergeTreeIndices.h>
+#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
+#include <Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h>
+
+namespace DB
+{
+
+class MergeTreeIndexBloomFilter : public IMergeTreeIndex
+{
+public:
+    MergeTreeIndexBloomFilter(
+        const String & name, const ExpressionActionsPtr & expr, const Names & columns, const DataTypes & data_types,
+        const Block & header, size_t granularity, size_t bits_per_row_, size_t hash_functions_);
+
+    MergeTreeIndexGranulePtr createIndexGranule() const override;
+
+    MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
+
+    IndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override;
+
+    bool mayBenefitFromIndexForIn(const ASTPtr & node) const override;
+
+private:
+    size_t bits_per_row;
+    size_t hash_functions;
+};
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
new file mode 100644
index 00000000000..5d9a3c2baee
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -0,0 +1,310 @@
+#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
+#include <Interpreters/QueryNormalizer.h>
+#include <Interpreters/BloomFilterHash.h>
+#include <Common/HashTable/ClearableHashMap.h>
+#include <Storages/MergeTree/RPNBuilder.h>
+#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <Columns/ColumnConst.h>
+#include <ext/bit_cast.h>
+#include <Parsers/ASTSubquery.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Columns/ColumnTuple.h>
+
+
+namespace DB
+{
+
+namespace
+{
+
+PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_type)
+{
+    /// If the data type is tuple, let's try unbox once
+    if (node->as<ASTSubquery>() || node->as<ASTIdentifier>())
+        return PreparedSetKey::forSubquery(*node);
+
+    if (const auto * date_type_tuple = typeid_cast<const DataTypeTuple *>(&*data_type))
+        return PreparedSetKey::forLiteral(*node, date_type_tuple->getElements());
+
+    return PreparedSetKey::forLiteral(*node, DataTypes(1, data_type));
+}
+
+bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & bloom_filter, size_t hash_functions)
+{
+    const auto const_column = typeid_cast<const ColumnConst *>(hash_column);
+    const auto non_const_column = typeid_cast<const ColumnUInt64 *>(hash_column);
+
+    if (!const_column && !non_const_column)
+        throw Exception("LOGICAL ERROR: hash column must be Const Column or UInt64 Column.", ErrorCodes::LOGICAL_ERROR);
+
+    if (const_column)
+    {
+        for (size_t index = 0; index < hash_functions; ++index)
+            if (!bloom_filter->containsWithSeed(const_column->getValue<UInt64>(), BloomFilterHash::bf_hash_seed[index]))
+                return false;
+        return true;
+    }
+    else
+    {
+        bool missing_rows = true;
+        const ColumnUInt64::Container & data = non_const_column->getData();
+
+        for (size_t index = 0, size = data.size(); missing_rows && index < size; ++index)
+        {
+            bool match_row = true;
+            for (size_t hash_index = 0; match_row && hash_index < hash_functions; ++hash_index)
+                match_row = bloom_filter->containsWithSeed(data[index], BloomFilterHash::bf_hash_seed[hash_index]);
+
+            missing_rows = !match_row;
+        }
+
+        return !missing_rows;
+    }
+}
+
+}
+
+MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter(
+    const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions)
+    : header(header), query_info(info), hash_functions(hash_functions)
+{
+    auto atomFromAST = [this](auto & node, auto &, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); };
+    rpn = std::move(RPNBuilder<RPNElement>(info, context, atomFromAST).extractRPN());
+}
+
+bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const
+{
+    std::vector<bool> rpn_stack;
+
+    for (const auto & element : rpn)
+    {
+        if (element.function == RPNElement::FUNCTION_UNKNOWN
+            || element.function == RPNElement::ALWAYS_TRUE)
+        {
+            rpn_stack.push_back(true);
+        }
+        else if (element.function == RPNElement::FUNCTION_EQUALS
+                 || element.function == RPNElement::FUNCTION_NOT_EQUALS
+                 || element.function == RPNElement::FUNCTION_IN
+                 || element.function == RPNElement::FUNCTION_NOT_IN
+                 || element.function == RPNElement::ALWAYS_FALSE)
+        {
+            rpn_stack.push_back(false);
+        }
+        else if (element.function == RPNElement::FUNCTION_NOT)
+        {
+            // do nothing
+        }
+        else if (element.function == RPNElement::FUNCTION_AND)
+        {
+            auto arg1 = rpn_stack.back();
+            rpn_stack.pop_back();
+            auto arg2 = rpn_stack.back();
+            rpn_stack.back() = arg1 && arg2;
+        }
+        else if (element.function == RPNElement::FUNCTION_OR)
+        {
+            auto arg1 = rpn_stack.back();
+            rpn_stack.pop_back();
+            auto arg2 = rpn_stack.back();
+            rpn_stack.back() = arg1 || arg2;
+        }
+        else
+            throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
+    }
+
+    return rpn_stack[0];
+}
+
+bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const
+{
+    std::vector<BoolMask> rpn_stack;
+    const auto & filters = granule->getFilters();
+
+    for (const auto & element : rpn)
+    {
+        if (element.function == RPNElement::FUNCTION_UNKNOWN)
+        {
+            rpn_stack.emplace_back(true, true);
+        }
+        else if (element.function == RPNElement::FUNCTION_IN
+            || element.function == RPNElement::FUNCTION_NOT_IN
+            || element.function == RPNElement::FUNCTION_EQUALS
+            || element.function == RPNElement::FUNCTION_NOT_EQUALS)
+        {
+            bool match_rows = true;
+            const auto & predicate = element.predicate;
+            for (size_t index = 0; match_rows && index < predicate.size(); ++index)
+            {
+                const auto & query_index_hash = predicate[index];
+                const auto & filter = filters[query_index_hash.first];
+                const ColumnPtr & hash_column = query_index_hash.second;
+                match_rows = maybeTrueOnBloomFilter(&*hash_column, filter, hash_functions);
+            }
+
+            rpn_stack.emplace_back(match_rows, !match_rows);
+            if (element.function == RPNElement::FUNCTION_NOT_EQUALS || element.function == RPNElement::FUNCTION_NOT_IN)
+                rpn_stack.back() = !rpn_stack.back();
+        }
+        else if (element.function == RPNElement::FUNCTION_NOT)
+        {
+            rpn_stack.back() = !rpn_stack.back();
+        }
+        else if (element.function == RPNElement::FUNCTION_OR)
+        {
+            auto arg1 = rpn_stack.back();
+            rpn_stack.pop_back();
+            auto arg2 = rpn_stack.back();
+            rpn_stack.back() = arg1 | arg2;
+        }
+        else if (element.function == RPNElement::FUNCTION_AND)
+        {
+            auto arg1 = rpn_stack.back();
+            rpn_stack.pop_back();
+            auto arg2 = rpn_stack.back();
+            rpn_stack.back() = arg1 & arg2;
+        }
+        else if (element.function == RPNElement::ALWAYS_TRUE)
+        {
+            rpn_stack.emplace_back(true, false);
+        }
+        else if (element.function == RPNElement::ALWAYS_FALSE)
+        {
+            rpn_stack.emplace_back(false, true);
+        }
+        else
+            throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
+    }
+
+    if (rpn_stack.size() != 1)
+        throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR);
+
+    return rpn_stack[0].can_be_true;
+}
+
+bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out)
+{
+    {
+        Field const_value;
+        DataTypePtr const_type;
+        if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type))
+        {
+            if (const_value.getType() == Field::Types::UInt64 || const_value.getType() == Field::Types::Int64 ||
+                const_value.getType() == Field::Types::Float64)
+            {
+                /// Zero in all types is represented in memory the same way as in UInt64.
+                out.function = const_value.get<UInt64>() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE;
+                return true;
+            }
+        }
+    }
+
+    if (const auto * function = node->as<ASTFunction>())
+    {
+        const ASTs & arguments = function->arguments->children;
+
+        if (arguments.size() != 2)
+            return false;
+
+        if (functionIsInOrGlobalInOperator(function->name))
+            return processInOrNotInOperator(function->name, arguments[0], arguments[1], out);
+
+        if (function->name == "equals" || function->name  == "notEquals")
+        {
+            Field const_value;
+            DataTypePtr const_type;
+            if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type))
+                return processEqualsOrNotEquals(function->name, arguments[0], const_type, const_value, out);
+            else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type))
+                return processEqualsOrNotEquals(function->name, arguments[1], const_type, const_value, out);
+        }
+    }
+
+    return false;
+}
+
+bool MergeTreeIndexConditionBloomFilter::processInOrNotInOperator(
+    const String & function_name, const ASTPtr & key_ast, const ASTPtr & expr_list, RPNElement & out)
+{
+    if (header.has(key_ast->getColumnName()))
+    {
+        const auto & column_and_type = header.getByName(key_ast->getColumnName());
+        const auto & prepared_set_it = query_info.sets.find(getPreparedSetKey(expr_list, column_and_type.type));
+
+        if (prepared_set_it != query_info.sets.end() && prepared_set_it->second->hasExplicitSetElements())
+        {
+            const IDataType * type = &*column_and_type.type;
+            const auto & prepared_set = prepared_set_it->second;
+
+            if (!typeid_cast<const DataTypeTuple *>(type))
+            {
+                const Columns & columns = prepared_set->getSetElements();
+
+                if (columns.size() != 1)
+                    throw Exception("LOGICAL ERROR: prepared_set columns size must be 1.", ErrorCodes::LOGICAL_ERROR);
+
+                ColumnPtr column = columns[0];
+                size_t position = header.getPositionByName(key_ast->getColumnName());
+                out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, &*column, 0, column->size())));
+            }
+            else
+            {
+                size_t position = header.getPositionByName(key_ast->getColumnName());
+                const auto & tuple_column = ColumnTuple::create(prepared_set->getSetElements());
+                const auto & bf_hash_column = BloomFilterHash::hashWithColumn(type, &*tuple_column, 0, prepared_set->getTotalRowCount());
+                out.predicate.emplace_back(std::make_pair(position, bf_hash_column));
+            }
+
+            if (function_name == "in"  || function_name == "globalIn")
+                out.function = RPNElement::FUNCTION_IN;
+
+            if (function_name == "notIn"  || function_name == "globalNotIn")
+                out.function = RPNElement::FUNCTION_NOT_IN;
+
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals(
+    const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out)
+{
+    if (header.has(key_ast->getColumnName()))
+    {
+        size_t position = header.getPositionByName(key_ast->getColumnName());
+        out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*value_type, value_field)));
+        out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
+        return true;
+    }
+
+    if (const auto * function = key_ast->as<ASTFunction>())
+    {
+        WhichDataType which(value_type);
+
+        /// TODO: support SQL: where array(index_column_x, column_y) = [1, 2]
+        if (which.isTuple() && function->name == "tuple")
+        {
+            const TupleBackend & tuple = get<const Tuple &>(value_field).toUnderType();
+            const auto value_tuple_data_type = typeid_cast<const DataTypeTuple *>(value_type.get());
+            const ASTs & arguments = typeid_cast<const ASTExpressionList &>(*function->arguments).children;
+
+            if (tuple.size() != arguments.size())
+                throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+            bool match_with_subtype = false;
+            const DataTypes & subtypes = value_tuple_data_type->getElements();
+
+            for (size_t index = 0; index < tuple.size(); ++index)
+                match_with_subtype |= processEqualsOrNotEquals(function_name, arguments[index], subtypes[index], tuple[index], out);
+
+            return match_with_subtype;
+        }
+    }
+
+    return false;
+}
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
new file mode 100644
index 00000000000..d002936101f
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <Interpreters/BloomFilter.h>
+#include <Storages/MergeTree/KeyCondition.h>
+#include <Storages/MergeTree/MergeTreeIndices.h>
+#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
+
+namespace DB
+{
+
+class MergeTreeIndexConditionBloomFilter : public IIndexCondition
+{
+public:
+    struct RPNElement
+    {
+        enum Function
+        {
+            /// Atoms of a Boolean expression.
+            FUNCTION_EQUALS,
+            FUNCTION_NOT_EQUALS,
+            FUNCTION_IN,
+            FUNCTION_NOT_IN,
+            FUNCTION_UNKNOWN, /// Can take any value.
+            /// Operators of the logical expression.
+            FUNCTION_NOT,
+            FUNCTION_AND,
+            FUNCTION_OR,
+            /// Constants
+            ALWAYS_FALSE,
+            ALWAYS_TRUE,
+        };
+
+        RPNElement(Function function_ = FUNCTION_UNKNOWN) : function(function_) {}
+
+        Function function = FUNCTION_UNKNOWN;
+        std::vector<std::pair<size_t, ColumnPtr>> predicate;
+    };
+
+    MergeTreeIndexConditionBloomFilter(const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions);
+
+    bool alwaysUnknownOrTrue() const override;
+
+    bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override
+    {
+        if (const auto & bf_granule = typeid_cast<const MergeTreeIndexGranuleBloomFilter *>(granule.get()))
+        {
+            return mayBeTrueOnGranule(bf_granule);
+        }
+
+        throw Exception("LOGICAL ERROR: require bloom filter index granule.", ErrorCodes::LOGICAL_ERROR);
+    }
+
+private:
+    const Block & header;
+    const SelectQueryInfo & query_info;
+    const size_t hash_functions;
+    std::vector<RPNElement> rpn;
+
+    bool mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const;
+
+    bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
+
+    bool processInOrNotInOperator(const String &function_name, const ASTPtr &key_ast, const ASTPtr &expr_list, RPNElement &out);
+
+    bool processEqualsOrNotEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out);
+};
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
similarity index 87%
rename from dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp
rename to dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 966775e4017..e597cc99a36 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -1,4 +1,4 @@
-#include <Storages/MergeTree/MergeTreeBloomFilterIndex.h>
+#include <Storages/MergeTree/MergeTreeIndexFullText.h>
 
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/UTF8Helpers.h>
@@ -31,7 +31,7 @@ namespace ErrorCodes
 
 /// Adds all tokens from string to bloom filter.
 static void stringToBloomFilter(
-    const char * data, size_t size, const std::unique_ptr<ITokenExtractor> & token_extractor, StringBloomFilter & bloom_filter)
+    const char * data, size_t size, const std::unique_ptr<ITokenExtractor> & token_extractor, BloomFilter & bloom_filter)
 {
     size_t cur = 0;
     size_t token_start = 0;
@@ -42,7 +42,7 @@ static void stringToBloomFilter(
 
 /// Adds all tokens from like pattern string to bloom filter. (Because like pattern can contain `\%` and `\_`.)
 static void likeStringToBloomFilter(
-    const String & data, const std::unique_ptr<ITokenExtractor> & token_extractor, StringBloomFilter & bloom_filter)
+    const String & data, const std::unique_ptr<ITokenExtractor> & token_extractor, BloomFilter & bloom_filter)
 {
     size_t cur = 0;
     String token;
@@ -51,24 +51,23 @@ static void likeStringToBloomFilter(
 }
 
 
-MergeTreeBloomFilterIndexGranule::MergeTreeBloomFilterIndexGranule(const MergeTreeBloomFilterIndex & index)
+MergeTreeIndexGranuleFullText::MergeTreeIndexGranuleFullText(const MergeTreeIndexFullText & index)
     : IMergeTreeIndexGranule()
     , index(index)
     , bloom_filters(
-            index.columns.size(), StringBloomFilter(index.bloom_filter_size, index.bloom_filter_hashes, index.seed))
+            index.columns.size(), BloomFilter(index.bloom_filter_size, index.bloom_filter_hashes, index.seed))
     , has_elems(false) {}
 
-void MergeTreeBloomFilterIndexGranule::serializeBinary(WriteBuffer & ostr) const
+void MergeTreeIndexGranuleFullText::serializeBinary(WriteBuffer & ostr) const
 {
     if (empty())
-        throw Exception(
-                "Attempt to write empty minmax index " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR);
+        throw Exception("Attempt to write empty minmax index " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR);
 
     for (const auto & bloom_filter : bloom_filters)
         ostr.write(reinterpret_cast<const char *>(bloom_filter.getFilter().data()), index.bloom_filter_size);
 }
 
-void MergeTreeBloomFilterIndexGranule::deserializeBinary(ReadBuffer & istr)
+void MergeTreeIndexGranuleFullText::deserializeBinary(ReadBuffer & istr)
 {
     for (auto & bloom_filter : bloom_filters)
     {
@@ -78,17 +77,17 @@ void MergeTreeBloomFilterIndexGranule::deserializeBinary(ReadBuffer & istr)
 }
 
 
-MergeTreeBloomFilterIndexAggregator::MergeTreeBloomFilterIndexAggregator(const MergeTreeBloomFilterIndex & index)
-    : index(index), granule(std::make_shared<MergeTreeBloomFilterIndexGranule>(index)) {}
+MergeTreeIndexAggregatorFullText::MergeTreeIndexAggregatorFullText(const MergeTreeIndexFullText & index)
+    : index(index), granule(std::make_shared<MergeTreeIndexGranuleFullText>(index)) {}
 
-MergeTreeIndexGranulePtr MergeTreeBloomFilterIndexAggregator::getGranuleAndReset()
+MergeTreeIndexGranulePtr MergeTreeIndexAggregatorFullText::getGranuleAndReset()
 {
-    auto new_granule = std::make_shared<MergeTreeBloomFilterIndexGranule>(index);
+    auto new_granule = std::make_shared<MergeTreeIndexGranuleFullText>(index);
     new_granule.swap(granule);
     return new_granule;
 }
 
-void MergeTreeBloomFilterIndexAggregator::update(const Block & block, size_t * pos, size_t limit)
+void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, size_t limit)
 {
     if (*pos >= block.rows())
         throw Exception(
@@ -111,14 +110,14 @@ void MergeTreeBloomFilterIndexAggregator::update(const Block & block, size_t * p
 }
 
 
-const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map
+const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
 {
         {
                 "notEquals",
-                [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx)
+                [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
                 {
                     out.function = RPNElement::FUNCTION_NOT_EQUALS;
-                    out.bloom_filter = std::make_unique<StringBloomFilter>(
+                    out.bloom_filter = std::make_unique<BloomFilter>(
                             idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
 
                     const auto & str = value.get<String>();
@@ -128,10 +127,10 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map
         },
         {
                 "equals",
-                [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx)
+                [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
                 {
                     out.function = RPNElement::FUNCTION_EQUALS;
-                    out.bloom_filter = std::make_unique<StringBloomFilter>(
+                    out.bloom_filter = std::make_unique<BloomFilter>(
                             idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
 
                     const auto & str = value.get<String>();
@@ -141,10 +140,10 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map
         },
         {
                 "like",
-                [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx)
+                [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
                 {
                     out.function = RPNElement::FUNCTION_LIKE;
-                    out.bloom_filter = std::make_unique<StringBloomFilter>(
+                    out.bloom_filter = std::make_unique<BloomFilter>(
                             idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
 
                     const auto & str = value.get<String>();
@@ -154,7 +153,7 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map
         },
         {
                 "notIn",
-                [] (RPNElement & out, const Field &, const MergeTreeBloomFilterIndex &)
+                [] (RPNElement & out, const Field &, const MergeTreeIndexFullText &)
                 {
                     out.function = RPNElement::FUNCTION_NOT_IN;
                     return true;
@@ -162,7 +161,7 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map
         },
         {
                 "in",
-                [] (RPNElement & out, const Field &, const MergeTreeBloomFilterIndex &)
+                [] (RPNElement & out, const Field &, const MergeTreeIndexFullText &)
                 {
                     out.function = RPNElement::FUNCTION_IN;
                     return true;
@@ -170,24 +169,21 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map
         },
 };
 
-BloomFilterCondition::BloomFilterCondition(
+MergeTreeConditionFullText::MergeTreeConditionFullText(
     const SelectQueryInfo & query_info,
     const Context & context,
-    const MergeTreeBloomFilterIndex & index_) : index(index_), prepared_sets(query_info.sets)
+    const MergeTreeIndexFullText & index_) : index(index_), prepared_sets(query_info.sets)
 {
     rpn = std::move(
             RPNBuilder<RPNElement>(
                     query_info, context,
-                    [this] (const ASTPtr & node,
-                            const Context & /* context */,
-                            Block & block_with_constants,
-                            RPNElement & out) -> bool
+                    [this] (const ASTPtr & node, const Context & /* context */, Block & block_with_constants, RPNElement & out) -> bool
                     {
                         return this->atomFromAST(node, block_with_constants, out);
                     }).extractRPN());
 }
 
-bool BloomFilterCondition::alwaysUnknownOrTrue() const
+bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
 {
     /// Check like in KeyCondition.
     std::vector<bool> rpn_stack;
@@ -234,10 +230,10 @@ bool BloomFilterCondition::alwaysUnknownOrTrue() const
     return rpn_stack[0];
 }
 
-bool BloomFilterCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
+bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
 {
-    std::shared_ptr<MergeTreeBloomFilterIndexGranule> granule
-            = std::dynamic_pointer_cast<MergeTreeBloomFilterIndexGranule>(idx_granule);
+    std::shared_ptr<MergeTreeIndexGranuleFullText> granule
+            = std::dynamic_pointer_cast<MergeTreeIndexGranuleFullText>(idx_granule);
     if (!granule)
         throw Exception(
                 "BloomFilter index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
@@ -323,7 +319,7 @@ bool BloomFilterCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granu
     return rpn_stack[0].can_be_true;
 }
 
-bool BloomFilterCondition::getKey(const ASTPtr & node, size_t & key_column_num)
+bool MergeTreeConditionFullText::getKey(const ASTPtr & node, size_t & key_column_num)
 {
     auto it = std::find(index.columns.begin(), index.columns.end(), node->getColumnName());
     if (it == index.columns.end())
@@ -333,7 +329,7 @@ bool BloomFilterCondition::getKey(const ASTPtr & node, size_t & key_column_num)
     return true;
 }
 
-bool BloomFilterCondition::atomFromAST(
+bool MergeTreeConditionFullText::atomFromAST(
     const ASTPtr & node, Block & block_with_constants, RPNElement & out)
 {
     Field const_value;
@@ -399,7 +395,7 @@ bool BloomFilterCondition::atomFromAST(
     return false;
 }
 
-bool BloomFilterCondition::tryPrepareSetBloomFilter(
+bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
     const ASTs & args,
     RPNElement & out)
 {
@@ -454,7 +450,7 @@ bool BloomFilterCondition::tryPrepareSetBloomFilter(
         if (data_type->getTypeId() != TypeIndex::String && data_type->getTypeId() != TypeIndex::FixedString)
             return false;
 
-    std::vector<std::vector<StringBloomFilter>> bloom_filters;
+    std::vector<std::vector<BloomFilter>> bloom_filters;
     std::vector<size_t> key_position;
 
     Columns columns = prepared_set->getSetElements();
@@ -480,23 +476,23 @@ bool BloomFilterCondition::tryPrepareSetBloomFilter(
 }
 
 
-MergeTreeIndexGranulePtr MergeTreeBloomFilterIndex::createIndexGranule() const
+MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const
 {
-    return std::make_shared<MergeTreeBloomFilterIndexGranule>(*this);
+    return std::make_shared<MergeTreeIndexGranuleFullText>(*this);
 }
 
-MergeTreeIndexAggregatorPtr MergeTreeBloomFilterIndex::createIndexAggregator() const
+MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator() const
 {
-    return std::make_shared<MergeTreeBloomFilterIndexAggregator>(*this);
+    return std::make_shared<MergeTreeIndexAggregatorFullText>(*this);
 }
 
-IndexConditionPtr MergeTreeBloomFilterIndex::createIndexCondition(
+IndexConditionPtr MergeTreeIndexFullText::createIndexCondition(
         const SelectQueryInfo & query, const Context & context) const
 {
-    return std::make_shared<BloomFilterCondition>(query, context, *this);
+    return std::make_shared<MergeTreeConditionFullText>(query, context, *this);
 };
 
-bool MergeTreeBloomFilterIndex::mayBenefitFromIndexForIn(const ASTPtr & node) const
+bool MergeTreeIndexFullText::mayBenefitFromIndexForIn(const ASTPtr & node) const
 {
     return std::find(std::cbegin(columns), std::cend(columns), node->getColumnName()) != std::cend(columns);
 }
@@ -679,7 +675,7 @@ std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreator(
 
         auto tokenizer = std::make_unique<NgramTokenExtractor>(n);
 
-        return std::make_unique<MergeTreeBloomFilterIndex>(
+        return std::make_unique<MergeTreeIndexFullText>(
                 node->name, std::move(index_expr), columns, data_types, sample, node->granularity,
                 bloom_filter_size, bloom_filter_hashes, seed, std::move(tokenizer));
     }
@@ -697,7 +693,7 @@ std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreator(
 
         auto tokenizer = std::make_unique<SplitTokenExtractor>();
 
-        return std::make_unique<MergeTreeBloomFilterIndex>(
+        return std::make_unique<MergeTreeIndexFullText>(
                 node->name, std::move(index_expr), columns, data_types, sample, node->granularity,
                 bloom_filter_size, bloom_filter_hashes, seed, std::move(tokenizer));
     }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h
similarity index 79%
rename from dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h
rename to dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h
index 888ffe7f9cc..9b9eefd1d43 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h
@@ -10,54 +10,54 @@
 namespace DB
 {
 
-class MergeTreeBloomFilterIndex;
+class MergeTreeIndexFullText;
 
 
-struct MergeTreeBloomFilterIndexGranule : public IMergeTreeIndexGranule
+struct MergeTreeIndexGranuleFullText : public IMergeTreeIndexGranule
 {
-    explicit MergeTreeBloomFilterIndexGranule(
-            const MergeTreeBloomFilterIndex & index);
+    explicit MergeTreeIndexGranuleFullText(
+            const MergeTreeIndexFullText & index);
 
-    ~MergeTreeBloomFilterIndexGranule() override = default;
+    ~MergeTreeIndexGranuleFullText() override = default;
 
     void serializeBinary(WriteBuffer & ostr) const override;
     void deserializeBinary(ReadBuffer & istr) override;
 
     bool empty() const override { return !has_elems; }
 
-    const MergeTreeBloomFilterIndex & index;
-    std::vector<StringBloomFilter> bloom_filters;
+    const MergeTreeIndexFullText & index;
+    std::vector<BloomFilter> bloom_filters;
     bool has_elems;
 };
 
-using MergeTreeBloomFilterIndexGranulePtr = std::shared_ptr<MergeTreeBloomFilterIndexGranule>;
+using MergeTreeIndexGranuleFullTextPtr = std::shared_ptr<MergeTreeIndexGranuleFullText>;
 
 
-struct MergeTreeBloomFilterIndexAggregator : IMergeTreeIndexAggregator
+struct MergeTreeIndexAggregatorFullText : IMergeTreeIndexAggregator
 {
-    explicit MergeTreeBloomFilterIndexAggregator(const MergeTreeBloomFilterIndex & index);
+    explicit MergeTreeIndexAggregatorFullText(const MergeTreeIndexFullText & index);
 
-    ~MergeTreeBloomFilterIndexAggregator() override = default;
+    ~MergeTreeIndexAggregatorFullText() override = default;
 
     bool empty() const override { return !granule || granule->empty(); }
     MergeTreeIndexGranulePtr getGranuleAndReset() override;
 
     void update(const Block & block, size_t * pos, size_t limit) override;
 
-    const MergeTreeBloomFilterIndex & index;
-    MergeTreeBloomFilterIndexGranulePtr granule;
+    const MergeTreeIndexFullText & index;
+    MergeTreeIndexGranuleFullTextPtr granule;
 };
 
 
-class BloomFilterCondition : public IIndexCondition
+class MergeTreeConditionFullText : public IIndexCondition
 {
 public:
-    BloomFilterCondition(
+    MergeTreeConditionFullText(
             const SelectQueryInfo & query_info,
             const Context & context,
-            const MergeTreeBloomFilterIndex & index_);
+            const MergeTreeIndexFullText & index_);
 
-    ~BloomFilterCondition() override = default;
+    ~MergeTreeConditionFullText() override = default;
 
     bool alwaysUnknownOrTrue() const override;
 
@@ -93,19 +93,19 @@ private:
         };
 
         RPNElement(
-            Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr<StringBloomFilter> && const_bloom_filter_ = nullptr)
+            Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr<BloomFilter> && const_bloom_filter_ = nullptr)
             : function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {}
 
         Function function = FUNCTION_UNKNOWN;
         /// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_LIKE, FUNCTION_NOT_LIKE.
         size_t key_column;
-        std::unique_ptr<StringBloomFilter> bloom_filter;
+        std::unique_ptr<BloomFilter> bloom_filter;
         /// For FUNCTION_IN and FUNCTION_NOT_IN
-        std::vector<std::vector<StringBloomFilter>> set_bloom_filters;
+        std::vector<std::vector<BloomFilter>> set_bloom_filters;
         std::vector<size_t> set_key_position;
     };
 
-    using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx)>;
+    using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)>;
     using RPN = std::vector<RPNElement>;
 
     bool atomFromAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
@@ -115,7 +115,7 @@ private:
 
     static const AtomMap atom_map;
 
-    const MergeTreeBloomFilterIndex & index;
+    const MergeTreeIndexFullText & index;
     RPN rpn;
     /// Sets from syntax analyzer.
     PreparedSets prepared_sets;
@@ -164,10 +164,10 @@ struct SplitTokenExtractor : public ITokenExtractor
 };
 
 
-class MergeTreeBloomFilterIndex : public IMergeTreeIndex
+class MergeTreeIndexFullText : public IMergeTreeIndex
 {
 public:
-    MergeTreeBloomFilterIndex(
+    MergeTreeIndexFullText(
             String name_,
             ExpressionActionsPtr expr_,
             const Names & columns_,
@@ -184,7 +184,7 @@ public:
             , seed(seed_)
             , token_extractor_func(std::move(token_extractor_func_)) {}
 
-    ~MergeTreeBloomFilterIndex() override = default;
+    ~MergeTreeIndexFullText() override = default;
 
     MergeTreeIndexGranulePtr createIndexGranule() const override;
     MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
new file mode 100644
index 00000000000..365c94dcbaa
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
@@ -0,0 +1,116 @@
+#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnFixedString.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Common/HashTable/Hash.h>
+#include <ext/bit_cast.h>
+#include <Interpreters/BloomFilterHash.h>
+
+
+namespace DB
+{
+
+MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t index_columns)
+    : bits_per_row(bits_per_row), hash_functions(hash_functions)
+{
+    total_rows = 0;
+    bloom_filters.resize(index_columns);
+}
+
+MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(
+    size_t bits_per_row, size_t hash_functions, size_t total_rows, const Blocks & granule_index_blocks)
+        : total_rows(total_rows), bits_per_row(bits_per_row), hash_functions(hash_functions)
+{
+    if (granule_index_blocks.empty() || !total_rows)
+        throw Exception("LOGICAL ERROR: granule_index_blocks empty or total_rows is zero.", ErrorCodes::LOGICAL_ERROR);
+
+    assertGranuleBlocksStructure(granule_index_blocks);
+
+    for (size_t index = 0; index < granule_index_blocks.size(); ++index)
+    {
+        Block granule_index_block = granule_index_blocks[index];
+
+        if (unlikely(!granule_index_block || !granule_index_block.rows()))
+            throw Exception("LOGICAL ERROR: granule_index_block is empty.", ErrorCodes::LOGICAL_ERROR);
+
+        if (index == 0)
+        {
+            static size_t atom_size = 8;
+            size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size;
+
+            for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column)
+                bloom_filters.emplace_back(std::make_shared<BloomFilter>(bytes_size, hash_functions, 0));
+        }
+
+        for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column)
+            fillingBloomFilter(bloom_filters[column], granule_index_block, column, hash_functions);
+    }
+}
+
+bool MergeTreeIndexGranuleBloomFilter::empty() const
+{
+    return !total_rows;
+}
+
+void MergeTreeIndexGranuleBloomFilter::deserializeBinary(ReadBuffer & istr)
+{
+    if (!empty())
+        throw Exception("Cannot read data to a non-empty bloom filter index.", ErrorCodes::LOGICAL_ERROR);
+
+    readVarUInt(total_rows, istr);
+    for (size_t index = 0; index < bloom_filters.size(); ++index)
+    {
+        static size_t atom_size = 8;
+        size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size;
+        bloom_filters[index] = std::make_shared<BloomFilter>(bytes_size, hash_functions, 0);
+        istr.read(reinterpret_cast<char *>(bloom_filters[index]->getFilter().data()), bytes_size);
+    }
+}
+
+void MergeTreeIndexGranuleBloomFilter::serializeBinary(WriteBuffer & ostr) const
+{
+    if (empty())
+        throw Exception("Attempt to write empty bloom filter index.", ErrorCodes::LOGICAL_ERROR);
+
+    static size_t atom_size = 8;
+    writeVarUInt(total_rows, ostr);
+    size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size;
+    for (const auto & bloom_filter : bloom_filters)
+        ostr.write(reinterpret_cast<const char *>(bloom_filter->getFilter().data()), bytes_size);
+}
+
+void MergeTreeIndexGranuleBloomFilter::assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const
+{
+    Block prev_block;
+    for (size_t index = 0; index < granule_index_blocks.size(); ++index)
+    {
+        Block granule_index_block = granule_index_blocks[index];
+
+        if (index != 0)
+            assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure.");
+
+        prev_block = granule_index_block;
+    }
+}
+
+void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(
+    std::shared_ptr<BloomFilter> & bf, const Block & granule_index_block, size_t index_hash_column, size_t hash_functions)
+{
+    const auto & column = granule_index_block.getByPosition(index_hash_column);
+
+    if (const auto hash_column = typeid_cast<const ColumnUInt64 *>(column.column.get()))
+    {
+        const auto & hash_column_vec = hash_column->getData();
+
+        for (size_t index = 0, size = hash_column_vec.size(); index < size; ++index)
+        {
+            const UInt64 & bf_base_hash = hash_column_vec[index];
+
+            for (size_t i = 0; i < hash_functions; ++i)
+                bf->addHashWithSeed(bf_base_hash, BloomFilterHash::bf_hash_seed[i]);
+        }
+    }
+}
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
new file mode 100644
index 00000000000..6aea7601a73
--- /dev/null
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <Interpreters/BloomFilter.h>
+#include <Storages/MergeTree/MergeTreeIndices.h>
+
+namespace DB
+{
+
+class MergeTreeIndexGranuleBloomFilter : public IMergeTreeIndexGranule
+{
+public:
+    MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t index_columns);
+
+    MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t total_rows, const Blocks & granule_index_blocks);
+
+    bool empty() const override;
+
+    void serializeBinary(WriteBuffer & ostr) const override;
+
+    void deserializeBinary(ReadBuffer & istr) override;
+
+    const std::vector<BloomFilterPtr> getFilters() const { return bloom_filters; }
+
+private:
+    size_t total_rows;
+    size_t bits_per_row;
+    size_t hash_functions;
+    std::vector<BloomFilterPtr> bloom_filters;
+
+    void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const;
+
+    void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column, size_t hash_functions);
+};
+
+
+}
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp
index 74eb31ecd46..e19aafbd25d 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp
@@ -19,7 +19,7 @@ namespace ErrorCodes
     extern const int UNKNOWN_EXCEPTION;
 }
 
-void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator)
+void MergeTreeIndexFactory::registerIndex(const std::string & name, Creator creator)
 {
     if (!indexes.emplace(name, std::move(creator)).second)
         throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique",
@@ -70,6 +70,11 @@ std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreator(
         std::shared_ptr<ASTIndexDeclaration> node,
         const Context & context);
 
+std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(
+    const NamesAndTypesList & columns,
+    std::shared_ptr<ASTIndexDeclaration> node,
+    const Context & context);
+
 
 MergeTreeIndexFactory::MergeTreeIndexFactory()
 {
@@ -77,6 +82,7 @@ MergeTreeIndexFactory::MergeTreeIndexFactory()
     registerIndex("set", setIndexCreator);
     registerIndex("ngrambf_v1", bloomFilterIndexCreator);
     registerIndex("tokenbf_v1", bloomFilterIndexCreator);
+    registerIndex("bloom_filter", bloomFilterIndexCreatorNew);
 }
 
 }
diff --git a/dbms/src/Storages/MergeTree/RPNBuilder.h b/dbms/src/Storages/MergeTree/RPNBuilder.h
index 6a557cb5f6a..d5244c3285d 100644
--- a/dbms/src/Storages/MergeTree/RPNBuilder.h
+++ b/dbms/src/Storages/MergeTree/RPNBuilder.h
@@ -24,10 +24,7 @@ public:
     using AtomFromASTFunc = std::function<
             bool(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)>;
 
-    RPNBuilder(
-        const SelectQueryInfo & query_info,
-        const Context & context_,
-        const AtomFromASTFunc & atomFromAST_)
+    RPNBuilder(const SelectQueryInfo & query_info, const Context & context_, const AtomFromASTFunc & atomFromAST_)
         : context(context_), atomFromAST(atomFromAST_)
     {
         /** Evaluation of expressions that depend only on constants.

From 6c8ff6dc315ca6c086c921a4310e08ae9287c979 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Wed, 19 Jun 2019 16:51:35 +0800
Subject: [PATCH 082/191] add some test

---
 dbms/src/Interpreters/BloomFilterHash.h       |   6 +-
 .../MergeTreeIndexAggregatorBloomFilter.cpp   |   2 +-
 .../MergeTreeIndexConditionBloomFilter.cpp    | 113 ++++++++++++------
 .../MergeTreeIndexConditionBloomFilter.h      |  10 +-
 ...oom_filter_index_with_merge_tree.reference |   0
 ...eate_bloom_filter_index_with_merge_tree.sh |  12 ++
 .../00945_bloom_filter_index.reference        |   0
 .../0_stateless/00945_bloom_filter_index.sql  |  31 +++++
 8 files changed, 130 insertions(+), 44 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.reference
 create mode 100644 dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh
 create mode 100755 dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference
 create mode 100755 dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql

diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
index 4c5fc1934fa..414d69cf35b 100644
--- a/dbms/src/Interpreters/BloomFilterHash.h
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -43,16 +43,16 @@ struct BloomFilterHash
             throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
     }
 
-    static ColumnPtr hashWithColumn(const IDataType * data_type, const IColumn * column, size_t pos, size_t limit)
+    static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit)
     {
         auto index_column = ColumnUInt64::create(limit);
         ColumnUInt64::Container & index_column_vec = index_column->getData();
-        getAnyTypeHash<true>(data_type, column, index_column_vec, pos);
+        getAnyTypeHash<true>(&*data_type, &*column, index_column_vec, pos);
         return index_column;
     }
 
     template <bool is_first>
-    static void getAnyTypeHash(const IDataType *data_type, const IColumn *column, ColumnUInt64::Container &vec, size_t pos)
+    static void getAnyTypeHash(const IDataType * data_type, const IColumn * column, ColumnUInt64::Container & vec, size_t pos)
     {
         WhichDataType which(data_type);
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
index c3f7150548c..760721b5f3c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp
@@ -49,7 +49,7 @@ void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * p
     for (size_t index = 0; index < index_columns_name.size(); ++index)
     {
         const auto & column_and_type = block.getByName(index_columns_name[index]);
-        const auto & index_column = BloomFilterHash::hashWithColumn(&*column_and_type.type, &*column_and_type.column, *pos, max_read_rows);
+        const auto & index_column = BloomFilterHash::hashWithColumn(column_and_type.type, column_and_type.column, *pos, max_read_rows);
 
         granule_index_block.insert({std::move(index_column), std::make_shared<DataTypeUInt64>(), column_and_type.name});
     }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index 5d9a3c2baee..d90bc90a2da 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -10,6 +10,7 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Columns/ColumnTuple.h>
+#include "MergeTreeIndexConditionBloomFilter.h"
 
 
 namespace DB
@@ -30,6 +31,14 @@ PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_t
     return PreparedSetKey::forLiteral(*node, DataTypes(1, data_type));
 }
 
+ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set)
+{
+    if (prepared_set->getDataTypes().size() == 1)
+        return {prepared_set->getSetElements()[0], prepared_set->getDataTypes()[0], "dummy"};
+
+    return {ColumnTuple::create(prepared_set->getSetElements()), std::make_shared<DataTypeTuple>(prepared_set->getDataTypes()), "dummy"};
+}
+
 bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & bloom_filter, size_t hash_functions)
 {
     const auto const_column = typeid_cast<const ColumnConst *>(hash_column);
@@ -208,68 +217,77 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl
             return false;
 
         if (functionIsInOrGlobalInOperator(function->name))
-            return processInOrNotInOperator(function->name, arguments[0], arguments[1], out);
-
-        if (function->name == "equals" || function->name  == "notEquals")
+        {
+            if (const auto & prepared_set = getPreparedSet(arguments[1]))
+                return traverseASTIn(function->name, arguments[0], prepared_set, out);
+        }
+        else if (function->name == "equals" || function->name  == "notEquals")
         {
             Field const_value;
             DataTypePtr const_type;
             if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type))
-                return processEqualsOrNotEquals(function->name, arguments[0], const_type, const_value, out);
+                return traverseASTEquals(function->name, arguments[0], const_type, const_value, out);
             else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type))
-                return processEqualsOrNotEquals(function->name, arguments[1], const_type, const_value, out);
+                return traverseASTEquals(function->name, arguments[1], const_type, const_value, out);
         }
     }
 
     return false;
 }
 
-bool MergeTreeIndexConditionBloomFilter::processInOrNotInOperator(
-    const String & function_name, const ASTPtr & key_ast, const ASTPtr & expr_list, RPNElement & out)
+bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
+    const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out)
+{
+    const auto & prepared_info = getPreparedSetInfo(prepared_set);
+    return traverseASTIn(function_name, key_ast, prepared_info.type, prepared_info.column, out);
+}
+
+bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
+    const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out)
 {
     if (header.has(key_ast->getColumnName()))
     {
-        const auto & column_and_type = header.getByName(key_ast->getColumnName());
-        const auto & prepared_set_it = query_info.sets.find(getPreparedSetKey(expr_list, column_and_type.type));
+        size_t row_size = column->size();
+        size_t position = header.getPositionByName(key_ast->getColumnName());
+        out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, column, 0, row_size)));
 
-        if (prepared_set_it != query_info.sets.end() && prepared_set_it->second->hasExplicitSetElements())
+        if (function_name == "in"  || function_name == "globalIn")
+            out.function = RPNElement::FUNCTION_IN;
+
+        if (function_name == "notIn"  || function_name == "globalNotIn")
+            out.function = RPNElement::FUNCTION_NOT_IN;
+
+        return true;
+    }
+
+    if (const auto * function = key_ast->as<ASTFunction>())
+    {
+        WhichDataType which(type);
+
+        if (which.isTuple() && function->name == "tuple")
         {
-            const IDataType * type = &*column_and_type.type;
-            const auto & prepared_set = prepared_set_it->second;
+            const auto & tuple_column = typeid_cast<const ColumnTuple *>(column.get());
+            const auto & tuple_data_type = typeid_cast<const DataTypeTuple *>(type.get());
+            const ASTs & arguments = typeid_cast<const ASTExpressionList &>(*function->arguments).children;
 
-            if (!typeid_cast<const DataTypeTuple *>(type))
-            {
-                const Columns & columns = prepared_set->getSetElements();
+            if (tuple_data_type->getElements().size() != arguments.size() || tuple_column->getColumns().size() != arguments.size())
+                throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-                if (columns.size() != 1)
-                    throw Exception("LOGICAL ERROR: prepared_set columns size must be 1.", ErrorCodes::LOGICAL_ERROR);
+            bool match_with_subtype = false;
+            const auto & sub_columns = tuple_column->getColumns();
+            const auto & sub_data_types = tuple_data_type->getElements();
 
-                ColumnPtr column = columns[0];
-                size_t position = header.getPositionByName(key_ast->getColumnName());
-                out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, &*column, 0, column->size())));
-            }
-            else
-            {
-                size_t position = header.getPositionByName(key_ast->getColumnName());
-                const auto & tuple_column = ColumnTuple::create(prepared_set->getSetElements());
-                const auto & bf_hash_column = BloomFilterHash::hashWithColumn(type, &*tuple_column, 0, prepared_set->getTotalRowCount());
-                out.predicate.emplace_back(std::make_pair(position, bf_hash_column));
-            }
+            for (size_t index = 0; index < arguments.size(); ++index)
+                match_with_subtype |= traverseASTIn(function_name, arguments[index], sub_data_types[index], sub_columns[index], out);
 
-            if (function_name == "in"  || function_name == "globalIn")
-                out.function = RPNElement::FUNCTION_IN;
-
-            if (function_name == "notIn"  || function_name == "globalNotIn")
-                out.function = RPNElement::FUNCTION_NOT_IN;
-
-            return true;
+            return match_with_subtype;
         }
     }
 
     return false;
 }
 
-bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals(
+bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
     const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out)
 {
     if (header.has(key_ast->getColumnName()))
@@ -284,7 +302,6 @@ bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals(
     {
         WhichDataType which(value_type);
 
-        /// TODO: support SQL: where array(index_column_x, column_y) = [1, 2]
         if (which.isTuple() && function->name == "tuple")
         {
             const TupleBackend & tuple = get<const Tuple &>(value_field).toUnderType();
@@ -298,7 +315,7 @@ bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals(
             const DataTypes & subtypes = value_tuple_data_type->getElements();
 
             for (size_t index = 0; index < tuple.size(); ++index)
-                match_with_subtype |= processEqualsOrNotEquals(function_name, arguments[index], subtypes[index], tuple[index], out);
+                match_with_subtype |= traverseASTEquals(function_name, arguments[index], subtypes[index], tuple[index], out);
 
             return match_with_subtype;
         }
@@ -307,4 +324,24 @@ bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals(
     return false;
 }
 
+SetPtr MergeTreeIndexConditionBloomFilter::getPreparedSet(const ASTPtr & node)
+{
+    if (header.has(node->getColumnName()))
+    {
+        const auto & column_and_type = header.getByName(node->getColumnName());
+        const auto & prepared_set_it = query_info.sets.find(getPreparedSetKey(node, column_and_type.type));
+
+        if (prepared_set_it != query_info.sets.end() && prepared_set_it->second->hasExplicitSetElements())
+            return prepared_set_it->second;
+    }
+    else
+    {
+        for (const auto & prepared_set_it : query_info.sets)
+            if (prepared_set_it.first.ast_hash == node->getTreeHash() && prepared_set_it.second->hasExplicitSetElements())
+                return prepared_set_it.second;
+    }
+
+    return DB::SetPtr();
+}
+
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
index d002936101f..7d23b06ccce 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
@@ -57,13 +57,19 @@ private:
     const size_t hash_functions;
     std::vector<RPNElement> rpn;
 
+    SetPtr getPreparedSet(const ASTPtr & node);
+
     bool mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const;
 
     bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
 
-    bool processInOrNotInOperator(const String &function_name, const ASTPtr &key_ast, const ASTPtr &expr_list, RPNElement &out);
+    bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const SetPtr &prepared_set, RPNElement &out);
 
-    bool processEqualsOrNotEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out);
+    bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &type, const ColumnPtr &column,
+                       RPNElement &out);
+
+    bool traverseASTEquals(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &value_type, const Field &value_field,
+                           RPNElement &out);
 };
 
 }
diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.reference b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh
new file mode 100644
index 00000000000..a637468f203
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+set -e
+
+for sequence in 1 10 100 1000 10000 100000 1000000 10000000 100000000 1000000000; do \
+rate=`echo "1 $sequence" | awk '{printf("%0.9f\n",$1/$2)}'`
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.bloom_filter_idx";
+$CLICKHOUSE_CLIENT -q "CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192"
+done
diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference
new file mode 100755
index 00000000000..e69de29bb2d
diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
new file mode 100755
index 00000000000..0cc77825f4e
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
@@ -0,0 +1,31 @@
+DROP TABLE IF EXISTS test.single_column_bloom_filter;
+
+SET allow_experimental_data_skipping_indices = 1;
+
+CREATE TABLE test.single_column_bloom_filter (u64 UInt64, i32 Int32, i64 UInt64, INDEX idx (i32) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 6;
+
+INSERT INTO test.single_column_bloom_filter SELECT number AS u64, number AS i32, number AS i64 FROM system.numbers LIMIT 100;
+
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6;
+
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 = 1 SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(1, 1), (2, 2)])) SETTINGS max_rows_to_read = 6;
+WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 6;
+
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+
+DROP TABLE IF EXISTS test.single_column_bloom_filter;

From d1452951639f7055779b6b0a374a36dd7cb1328f Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Wed, 19 Jun 2019 18:50:37 +0800
Subject: [PATCH 083/191] convert type with condition

---
 dbms/src/Interpreters/BloomFilterHash.h       |  6 ++-
 .../MergeTree/MergeTreeIndexBloomFilter.cpp   | 30 ++++++++++--
 .../MergeTreeIndexConditionBloomFilter.cpp    | 13 +++--
 .../MergeTreeIndexConditionBloomFilter.h      |  9 ++--
 .../0_stateless/00945_bloom_filter_index.sql  | 48 ++++++++++++++++---
 5 files changed, 84 insertions(+), 22 deletions(-)

diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
index 414d69cf35b..544bf1be1ac 100644
--- a/dbms/src/Interpreters/BloomFilterHash.h
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -30,10 +30,12 @@ struct BloomFilterHash
     {
         WhichDataType which(data_type);
 
-        if (which.isUInt())
+        if (which.isUInt() || which.isDateOrDateTime())
             return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet<UInt64>())), 1);
-        else if (which.isInt())
+        else if (which.isInt() || which.isEnum())
             return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()))), 1);
+        else if (which.isFloat())
+            return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()))), 1);
         else if (which.isString() || which.isFixedString())
         {
             const auto & value = field.safeGet<String>();
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index dff73a80576..539422968ed 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -36,15 +36,18 @@ MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const
 
 bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const
 {
-    const String column_name = node->getColumnName();
+    const String & column_name = node->getColumnName();
 
     for (const auto & name : columns)
         if (column_name == name)
             return true;
 
     if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
-        if (func->arguments->children.size() == 1)
-            return mayBenefitFromIndexForIn(func->arguments->children.front());
+    {
+        for (const auto & children : func->arguments->children)
+            if (mayBenefitFromIndexForIn(children))
+                return true;
+    }
 
     return false;
 }
@@ -59,6 +62,24 @@ IndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQu
     return std::make_shared<MergeTreeIndexConditionBloomFilter>(query_info, context, header, hash_functions);
 }
 
+static void assertIndexColumnsType(const Block &header)
+{
+    if (!header || !header.columns())
+        throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
+
+    const DataTypes & columns_data_types = header.getDataTypes();
+
+    for (size_t index = 0; index < columns_data_types.size(); ++index)
+    {
+        WhichDataType which(columns_data_types[index]);
+
+        if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
+            !which.isDateOrDateTime() && !which.isEnum())
+            throw Exception("Unexpected type " + columns_data_types[index]->getName() + " of bloom filter index.",
+                            ErrorCodes::ILLEGAL_COLUMN);
+    }
+}
+
 std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context)
 {
     if (node->name.empty())
@@ -70,8 +91,7 @@ std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(const NamesAndTypesL
     auto index_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
     auto index_sample = ExpressionAnalyzer(expr_list, syntax, context).getActions(true)->getSampleBlock();
 
-    if (!index_sample || !index_sample.columns())
-        throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
+    assertIndexColumnsType(index_sample);
 
     double max_conflict_probability = 0.025;
     if (node->type->arguments && !node->type->arguments->children.empty())
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index d90bc90a2da..5da0c2265c1 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -10,7 +10,8 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Columns/ColumnTuple.h>
-#include "MergeTreeIndexConditionBloomFilter.h"
+#include <Interpreters/castColumn.h>
+#include <Interpreters/convertFieldToType.h>
 
 
 namespace DB
@@ -76,7 +77,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr &
 
 MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter(
     const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions)
-    : header(header), query_info(info), hash_functions(hash_functions)
+    : header(header), context(context), query_info(info), hash_functions(hash_functions)
 {
     auto atomFromAST = [this](auto & node, auto &, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); };
     rpn = std::move(RPNBuilder<RPNElement>(info, context, atomFromAST).extractRPN());
@@ -249,7 +250,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
     {
         size_t row_size = column->size();
         size_t position = header.getPositionByName(key_ast->getColumnName());
-        out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, column, 0, row_size)));
+        const DataTypePtr & index_type = header.getByPosition(position).type;
+        const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type, context);
+        out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size)));
 
         if (function_name == "in"  || function_name == "globalIn")
             out.function = RPNElement::FUNCTION_IN;
@@ -293,7 +296,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
     if (header.has(key_ast->getColumnName()))
     {
         size_t position = header.getPositionByName(key_ast->getColumnName());
-        out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*value_type, value_field)));
+        const DataTypePtr & index_type = header.getByPosition(position).type;
+        Field converted_field = convertFieldToType(value_field, *index_type, &*value_type);
+        out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*index_type, converted_field)));
         out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
         return true;
     }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
index 7d23b06ccce..0c55b5b3035 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
@@ -53,6 +53,7 @@ public:
 
 private:
     const Block & header;
+    const Context & context;
     const SelectQueryInfo & query_info;
     const size_t hash_functions;
     std::vector<RPNElement> rpn;
@@ -63,13 +64,11 @@ private:
 
     bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
 
-    bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const SetPtr &prepared_set, RPNElement &out);
+    bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out);
 
-    bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &type, const ColumnPtr &column,
-                       RPNElement &out);
+    bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out);
 
-    bool traverseASTEquals(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &value_type, const Field &value_field,
-                           RPNElement &out);
+    bool traverseASTEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out);
 };
 
 }
diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
index 0cc77825f4e..a8f795150bb 100755
--- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
+++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
@@ -1,7 +1,7 @@
-DROP TABLE IF EXISTS test.single_column_bloom_filter;
-
 SET allow_experimental_data_skipping_indices = 1;
 
+DROP TABLE IF EXISTS test.single_column_bloom_filter;
+
 CREATE TABLE test.single_column_bloom_filter (u64 UInt64, i32 Int32, i64 UInt64, INDEX idx (i32) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 6;
 
 INSERT INTO test.single_column_bloom_filter SELECT number AS u64, number AS i32, number AS i64 FROM system.numbers LIMIT 100;
@@ -19,13 +19,49 @@ SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 6;
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6;
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 6;
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(1, 1), (2, 2)])) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 6;
 WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 6;
+WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
+WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
+WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
 
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
 WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
+WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
 
 DROP TABLE IF EXISTS test.single_column_bloom_filter;
+
+
+DROP TABLE IF EXISTS test.bloom_filter_types_test;
+
+CREATE TABLE test.bloom_filter_types_test (order_key   UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, f32 Float32, f64 Float64, date Date, date_time DateTime, str String, fixed_string FixedString(3), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6;
+INSERT INTO test.bloom_filter_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number) AS date, toDateTime(number) AS date_time, toString(number) AS str, toFixedString(toString(number), 3) AS fixed_string FROM system.numbers LIMIT 100;
+
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE i16 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE i32 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE i64 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE u8 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE u16 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE u32 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE u64 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE f32 = 1.0 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE f64 = 1.0 SETTINGS max_rows_to_read = 6;
+
+SELECT * FROM test.bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6;
+
+
+DROP TABLE IF EXISTS test.bloom_filter_types_test;

From a50aea09f1810f4b3e38133a1526cb4c23a1d634 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Wed, 19 Jun 2019 23:09:07 +0800
Subject: [PATCH 084/191] fix float in bloom filter

---
 dbms/src/Interpreters/BloomFilter.cpp         | 65 +---------------
 dbms/src/Interpreters/BloomFilter.h           |  6 +-
 dbms/src/Interpreters/BloomFilterHash.h       | 78 +++++++++++++++++--
 .../MergeTree/MergeTreeIndexBloomFilter.cpp   |  3 +-
 .../MergeTreeIndexConditionBloomFilter.cpp    |  4 +-
 .../00945_bloom_filter_index.reference        | 30 +++++++
 .../0_stateless/00945_bloom_filter_index.sql  | 33 ++------
 7 files changed, 119 insertions(+), 100 deletions(-)

diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp
index 3f20799cedf..d648fd114f4 100644
--- a/dbms/src/Interpreters/BloomFilter.cpp
+++ b/dbms/src/Interpreters/BloomFilter.cpp
@@ -1,6 +1,5 @@
 #include <Interpreters/BloomFilter.h>
 #include <city.h>
-#include "BloomFilter.h"
 
 
 namespace DB
@@ -72,72 +71,16 @@ bool operator== (const BloomFilter & a, const BloomFilter & b)
     return true;
 }
 
-void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & seed)
+void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed)
 {
-    size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, seed)) % (8 * size);
+    size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size);
     filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
 }
 
-bool BloomFilter::containsWithSeed(const UInt64 & hash, const UInt64 & seed)
+bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed)
 {
-    size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, seed)) % (8 * size);
+    size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size);
     return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType)))));
 }
 
-static std::pair<sMergeTreeIndexFullText.cppize_t, size_t> calculationBestPracticesImpl(double max_conflict_probability)
-{
-    static const size_t MAX_BITS_PER_ROW = 20;
-    static const size_t MAX_HASH_FUNCTION_COUNT = 15;
-
-    /// For the smallest index per level in probability_lookup_table
-    static const size_t min_probability_index_each_bits[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14};
-
-    static const long double probability_lookup_table[MAX_BITS_PER_ROW + 1][MAX_HASH_FUNCTION_COUNT] =
-    {
-        {1.0},  /// dummy, 0 bits per row
-        {1.0, 1.0},
-        {1.0, 0.393,  0.400},
-        {1.0, 0.283,  0.237,   0.253},
-        {1.0, 0.221,  0.155,   0.147,   0.160},
-        {1.0, 0.181,  0.109,   0.092,   0.092,   0.101}, // 5
-        {1.0, 0.154,  0.0804,  0.0609,  0.0561,  0.0578,   0.0638},
-        {1.0, 0.133,  0.0618,  0.0423,  0.0359,  0.0347,   0.0364},
-        {1.0, 0.118,  0.0489,  0.0306,  0.024,   0.0217,   0.0216,   0.0229},
-        {1.0, 0.105,  0.0397,  0.0228,  0.0166,  0.0141,   0.0133,   0.0135,   0.0145},
-        {1.0, 0.0952, 0.0329,  0.0174,  0.0118,  0.00943,  0.00844,  0.00819,  0.00846}, // 10
-        {1.0, 0.0869, 0.0276,  0.0136,  0.00864, 0.0065,   0.00552,  0.00513,  0.00509},
-        {1.0, 0.08,   0.0236,  0.0108,  0.00646, 0.00459,  0.00371,  0.00329,  0.00314},
-        {1.0, 0.074,  0.0203,  0.00875, 0.00492, 0.00332,  0.00255,  0.00217,  0.00199,  0.00194},
-        {1.0, 0.0689, 0.0177,  0.00718, 0.00381, 0.00244,  0.00179,  0.00146,  0.00129,  0.00121,  0.0012},
-        {1.0, 0.0645, 0.0156,  0.00596, 0.003,   0.00183,  0.00128,  0.001,    0.000852, 0.000775, 0.000744}, // 15
-        {1.0, 0.0606, 0.0138,  0.005,   0.00239, 0.00139,  0.000935, 0.000702, 0.000574, 0.000505, 0.00047,  0.000459},
-        {1.0, 0.0571, 0.0123,  0.00423, 0.00193, 0.00107,  0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284},
-        {1.0, 0.054,  0.0111,  0.00362, 0.00158, 0.000839, 0.000519, 0.00036,  0.000275, 0.000226, 0.000198, 0.000183, 0.000176},
-        {1.0, 0.0513, 0.00998, 0.00312, 0.0013,  0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109},
-        {1.0, 0.0488, 0.00906, 0.0027,  0.00108, 0.00053,  0.000303, 0.000196, 0.00014,  0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20
-    };
-
-    for (size_t bits_per_row = 1; bits_per_row < MAX_BITS_PER_ROW; ++bits_per_row)
-    {
-        if (probability_lookup_table[bits_per_row][min_probability_index_each_bits[bits_per_row]] <= max_conflict_probability)
-        {
-            size_t max_size_of_hash_functions = min_probability_index_each_bits[bits_per_row];
-            for (size_t size_of_hash_functions = max_size_of_hash_functions; size_of_hash_functions > 0; --size_of_hash_functions)
-                if (probability_lookup_table[bits_per_row][size_of_hash_functions] > max_conflict_probability)
-                {
-                    std::cout << "Best bf:" <<  bits_per_row << ", " << (size_of_hash_functions + 1) << "\n";
-                    return std::pair<size_t, size_t>(bits_per_row, size_of_hash_functions + 1);
-                }
-
-        }
-    }
-
-    return std::pair<size_t, size_t>(MAX_BITS_PER_ROW - 1, min_probability_index_each_bits[MAX_BITS_PER_ROW - 1]);
-}
-
-std::pair<size_t, size_t> calculationBestPractices(double max_conflict_probability)
-{
-    return calculationBestPracticesImpl(max_conflict_probability);
-}
-
 }
diff --git a/dbms/src/Interpreters/BloomFilter.h b/dbms/src/Interpreters/BloomFilter.h
index 23bf7baba20..19469834c94 100644
--- a/dbms/src/Interpreters/BloomFilter.h
+++ b/dbms/src/Interpreters/BloomFilter.h
@@ -26,8 +26,8 @@ public:
     void add(const char * data, size_t len);
     void clear();
 
-    void addHashWithSeed(const UInt64 & hash, const UInt64 & seed);
-    bool containsWithSeed(const UInt64 & hash, const UInt64 & seed);
+    void addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
+    bool findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
 
     /// Checks if this contains everything from another bloom filter.
     /// Bloom filters must have equal size and seed.
@@ -53,6 +53,4 @@ using BloomFilterPtr = std::shared_ptr<BloomFilter>;
 
 bool operator== (const BloomFilter & a, const BloomFilter & b);
 
-std::pair<size_t, size_t> calculationBestPractices(double max_conflict_probability);
-
 }
diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h
index 544bf1be1ac..a94bc8687eb 100644
--- a/dbms/src/Interpreters/BloomFilterHash.h
+++ b/dbms/src/Interpreters/BloomFilterHash.h
@@ -34,7 +34,7 @@ struct BloomFilterHash
             return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet<UInt64>())), 1);
         else if (which.isInt() || which.isEnum())
             return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()))), 1);
-        else if (which.isFloat())
+        else if (which.isFloat32() || which.isFloat64())
             return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()))), 1);
         else if (which.isString() || which.isFixedString())
         {
@@ -87,14 +87,31 @@ struct BloomFilterHash
 
         const typename ColumnVector<Type>::Container & vec_from = index_column->getData();
 
-        for (size_t index = 0, size = vec.size(); index < size; ++index)
+        /// Because we're missing the precision of float in the Field.h
+        /// to be consistent, we need to convert Float32 to Float64 processing, also see: BloomFilterHash::hashWithField
+        if constexpr (std::is_same_v<ColumnVector<Type>, ColumnFloat32>)
         {
-            UInt64 hash = intHash64(ext::bit_cast<UInt64>(vec_from[index + pos]));
+            for (size_t index = 0, size = vec.size(); index < size; ++index)
+            {
+                UInt64 hash = intHash64(ext::bit_cast<UInt64>(Float64(vec_from[index + pos])));
 
-            if constexpr (is_first)
-                vec[index] = hash;
-            else
-                vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash));
+                if constexpr (is_first)
+                    vec[index] = hash;
+                else
+                    vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash));
+            }
+        }
+        else
+        {
+            for (size_t index = 0, size = vec.size(); index < size; ++index)
+            {
+                UInt64 hash = intHash64(ext::bit_cast<UInt64>(vec_from[index + pos]));
+
+                if constexpr (is_first)
+                    vec[index] = hash;
+                else
+                    vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash));
+            }
         }
     }
 
@@ -138,6 +155,53 @@ struct BloomFilterHash
         else
             throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN);
     }
+
+    static std::pair<size_t, size_t> calculationBestPractices(double max_conflict_probability)
+    {
+        static const size_t MAX_BITS_PER_ROW = 20;
+        static const size_t MAX_HASH_FUNCTION_COUNT = 15;
+
+        /// For the smallest index per level in probability_lookup_table
+        static const size_t min_probability_index_each_bits[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14};
+
+        static const long double probability_lookup_table[MAX_BITS_PER_ROW + 1][MAX_HASH_FUNCTION_COUNT] =
+            {
+                {1.0},  /// dummy, 0 bits per row
+                {1.0, 1.0},
+                {1.0, 0.393,  0.400},
+                {1.0, 0.283,  0.237,   0.253},
+                {1.0, 0.221,  0.155,   0.147,   0.160},
+                {1.0, 0.181,  0.109,   0.092,   0.092,   0.101}, // 5
+                {1.0, 0.154,  0.0804,  0.0609,  0.0561,  0.0578,   0.0638},
+                {1.0, 0.133,  0.0618,  0.0423,  0.0359,  0.0347,   0.0364},
+                {1.0, 0.118,  0.0489,  0.0306,  0.024,   0.0217,   0.0216,   0.0229},
+                {1.0, 0.105,  0.0397,  0.0228,  0.0166,  0.0141,   0.0133,   0.0135,   0.0145},
+                {1.0, 0.0952, 0.0329,  0.0174,  0.0118,  0.00943,  0.00844,  0.00819,  0.00846}, // 10
+                {1.0, 0.0869, 0.0276,  0.0136,  0.00864, 0.0065,   0.00552,  0.00513,  0.00509},
+                {1.0, 0.08,   0.0236,  0.0108,  0.00646, 0.00459,  0.00371,  0.00329,  0.00314},
+                {1.0, 0.074,  0.0203,  0.00875, 0.00492, 0.00332,  0.00255,  0.00217,  0.00199,  0.00194},
+                {1.0, 0.0689, 0.0177,  0.00718, 0.00381, 0.00244,  0.00179,  0.00146,  0.00129,  0.00121,  0.0012},
+                {1.0, 0.0645, 0.0156,  0.00596, 0.003,   0.00183,  0.00128,  0.001,    0.000852, 0.000775, 0.000744}, // 15
+                {1.0, 0.0606, 0.0138,  0.005,   0.00239, 0.00139,  0.000935, 0.000702, 0.000574, 0.000505, 0.00047,  0.000459},
+                {1.0, 0.0571, 0.0123,  0.00423, 0.00193, 0.00107,  0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284},
+                {1.0, 0.054,  0.0111,  0.00362, 0.00158, 0.000839, 0.000519, 0.00036,  0.000275, 0.000226, 0.000198, 0.000183, 0.000176},
+                {1.0, 0.0513, 0.00998, 0.00312, 0.0013,  0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109},
+                {1.0, 0.0488, 0.00906, 0.0027,  0.00108, 0.00053,  0.000303, 0.000196, 0.00014,  0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20
+            };
+
+        for (size_t bits_per_row = 1; bits_per_row < MAX_BITS_PER_ROW; ++bits_per_row)
+        {
+            if (probability_lookup_table[bits_per_row][min_probability_index_each_bits[bits_per_row]] <= max_conflict_probability)
+            {
+                size_t max_size_of_hash_functions = min_probability_index_each_bits[bits_per_row];
+                for (size_t size_of_hash_functions = max_size_of_hash_functions; size_of_hash_functions > 0; --size_of_hash_functions)
+                    if (probability_lookup_table[bits_per_row][size_of_hash_functions] > max_conflict_probability)
+                        return std::pair<size_t, size_t>(bits_per_row, size_of_hash_functions + 1);
+            }
+        }
+
+        return std::pair<size_t, size_t>(MAX_BITS_PER_ROW - 1, min_probability_index_each_bits[MAX_BITS_PER_ROW - 1]);
+    }
 };
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index 539422968ed..3e4a35d0c94 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -11,6 +11,7 @@
 #include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
 #include <Parsers/queryToString.h>
 #include <Columns/ColumnConst.h>
+#include <Interpreters/BloomFilterHash.h>
 
 
 namespace DB
@@ -97,7 +98,7 @@ std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(const NamesAndTypesL
     if (node->type->arguments && !node->type->arguments->children.empty())
         max_conflict_probability = typeid_cast<const ASTLiteral &>(*node->type->arguments->children[0]).value.get<Float64>();
 
-    const auto & bits_per_row_and_size_of_hash_functions = calculationBestPractices(max_conflict_probability);
+    const auto & bits_per_row_and_size_of_hash_functions = BloomFilterHash::calculationBestPractices(max_conflict_probability);
 
     return std::make_unique<MergeTreeIndexBloomFilter>(
         node->name, std::move(index_expr), index_sample.getNames(), index_sample.getDataTypes(), index_sample, node->granularity,
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
index 5da0c2265c1..9c8a9d4b41c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
@@ -51,7 +51,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr &
     if (const_column)
     {
         for (size_t index = 0; index < hash_functions; ++index)
-            if (!bloom_filter->containsWithSeed(const_column->getValue<UInt64>(), BloomFilterHash::bf_hash_seed[index]))
+            if (!bloom_filter->findHashWithSeed(const_column->getValue<UInt64>(), BloomFilterHash::bf_hash_seed[index]))
                 return false;
         return true;
     }
@@ -64,7 +64,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr &
         {
             bool match_row = true;
             for (size_t hash_index = 0; match_row && hash_index < hash_functions; ++hash_index)
-                match_row = bloom_filter->containsWithSeed(data[index], BloomFilterHash::bf_hash_seed[hash_index]);
+                match_row = bloom_filter->findHashWithSeed(data[index], BloomFilterHash::bf_hash_seed[hash_index]);
 
             missing_rows = !match_row;
         }
diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference
index e69de29bb2d..7b6d919d404 100755
--- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference
+++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference
@@ -0,0 +1,30 @@
+1
+0
+1
+1
+2
+0
+2
+2
+2
+0
+2
+2
+2
+0
+2
+2
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
index a8f795150bb..bb258b886a4 100755
--- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
+++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql
@@ -11,11 +11,6 @@ SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SE
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6;
 
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 = 1 SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 6;
@@ -29,26 +24,13 @@ WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_colu
 WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
 WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
 
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 }
-
 DROP TABLE IF EXISTS test.single_column_bloom_filter;
 
 
 DROP TABLE IF EXISTS test.bloom_filter_types_test;
 
-CREATE TABLE test.bloom_filter_types_test (order_key   UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, f32 Float32, f64 Float64, date Date, date_time DateTime, str String, fixed_string FixedString(3), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6;
-INSERT INTO test.bloom_filter_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number) AS date, toDateTime(number) AS date_time, toString(number) AS str, toFixedString(toString(number), 3) AS fixed_string FROM system.numbers LIMIT 100;
+CREATE TABLE test.bloom_filter_types_test (order_key   UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, f32 Float32, f64 Float64, date Date, date_time DateTime('Europe/Moscow'), str String, fixed_string FixedString(5), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6;
+INSERT INTO test.bloom_filter_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Europe/Moscow') AS date, toDateTime(number, 'Europe/Moscow') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100;
 
 SELECT COUNT() FROM test.bloom_filter_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.bloom_filter_types_test WHERE i16 = 1 SETTINGS max_rows_to_read = 6;
@@ -58,10 +40,11 @@ SELECT COUNT() FROM test.bloom_filter_types_test WHERE u8 = 1 SETTINGS max_rows_
 SELECT COUNT() FROM test.bloom_filter_types_test WHERE u16 = 1 SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.bloom_filter_types_test WHERE u32 = 1 SETTINGS max_rows_to_read = 6;
 SELECT COUNT() FROM test.bloom_filter_types_test WHERE u64 = 1 SETTINGS max_rows_to_read = 6;
-SELECT COUNT() FROM test.bloom_filter_types_test WHERE f32 = 1.0 SETTINGS max_rows_to_read = 6;
-SELECT COUNT() FROM test.bloom_filter_types_test WHERE f64 = 1.0 SETTINGS max_rows_to_read = 6;
-
-SELECT * FROM test.bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6;
-
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
+SELECT COUNT() FROM test.bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;
 
 DROP TABLE IF EXISTS test.bloom_filter_types_test;

From 166018e41e93ec4867f8617e8601eec70984937b Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Wed, 19 Jun 2019 23:30:48 +0800
Subject: [PATCH 085/191] fix code style & rename minmax, set

---
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  4 +-
 .../MergeTree/MergeTreeDataSelectExecutor.h   |  2 +-
 .../MergeTree/MergeTreeIndexBloomFilter.cpp   |  7 ++-
 .../MergeTree/MergeTreeIndexBloomFilter.h     |  2 +-
 .../MergeTreeIndexConditionBloomFilter.h      | 10 ++--
 .../MergeTree/MergeTreeIndexFullText.cpp      |  2 +-
 .../MergeTree/MergeTreeIndexFullText.h        |  4 +-
 ...nMaxIndex.cpp => MergeTreeIndexMinMax.cpp} | 50 ++++++++--------
 ...eeMinMaxIndex.h => MergeTreeIndexMinMax.h} | 38 ++++++------
 ...kippingIndex.cpp => MergeTreeIndexSet.cpp} | 58 +++++++++----------
 ...SetSkippingIndex.h => MergeTreeIndexSet.h} | 38 ++++++------
 .../src/Storages/MergeTree/MergeTreeIndices.h |  8 +--
 .../MergeTree/registerStorageMergeTree.cpp    |  4 +-
 13 files changed, 114 insertions(+), 113 deletions(-)
 rename dbms/src/Storages/MergeTree/{MergeTreeMinMaxIndex.cpp => MergeTreeIndexMinMax.cpp} (74%)
 rename dbms/src/Storages/MergeTree/{MergeTreeMinMaxIndex.h => MergeTreeIndexMinMax.h} (59%)
 rename dbms/src/Storages/MergeTree/{MergeTreeSetSkippingIndex.cpp => MergeTreeIndexSet.cpp} (87%)
 rename dbms/src/Storages/MergeTree/{MergeTreeSetSkippingIndex.h => MergeTreeIndexSet.h} (69%)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index d8002f91a07..6a32cb3c17c 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -518,7 +518,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
 
     RangesInDataParts parts_with_ranges;
 
-    std::vector<std::pair<MergeTreeIndexPtr, IndexConditionPtr>> useful_indices;
+    std::vector<std::pair<MergeTreeIndexPtr, MergeTreeIndexConditionPtr>> useful_indices;
     for (const auto & index : data.skip_indices)
     {
         auto condition = index->createIndexCondition(query_info, context);
@@ -998,7 +998,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
 
 MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
     MergeTreeIndexPtr index,
-    IndexConditionPtr condition,
+    MergeTreeIndexConditionPtr condition,
     MergeTreeData::DataPartPtr part,
     const MarkRanges & ranges,
     const Settings & settings) const
diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index a949d593904..d38d00d055b 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -84,7 +84,7 @@ private:
 
     MarkRanges filterMarksUsingIndex(
         MergeTreeIndexPtr index,
-        IndexConditionPtr condition,
+        MergeTreeIndexConditionPtr condition,
         MergeTreeData::DataPartPtr part,
         const MarkRanges & ranges,
         const Settings & settings) const;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index 3e4a35d0c94..4dcdb90a4f6 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -58,12 +58,12 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() c
     return std::make_shared<MergeTreeIndexAggregatorBloomFilter>(bits_per_row, hash_functions, columns);
 }
 
-IndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const
+MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const
 {
     return std::make_shared<MergeTreeIndexConditionBloomFilter>(query_info, context, header, hash_functions);
 }
 
-static void assertIndexColumnsType(const Block &header)
+static void assertIndexColumnsType(const Block & header)
 {
     if (!header || !header.columns())
         throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY);
@@ -81,7 +81,8 @@ static void assertIndexColumnsType(const Block &header)
     }
 }
 
-std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context)
+std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(
+    const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context)
 {
     if (node->name.empty())
         throw Exception("Index must have unique name.", ErrorCodes::INCORRECT_QUERY);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
index 5b506846754..8930018f22e 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
@@ -19,7 +19,7 @@ public:
 
     MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
 
-    IndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override;
+    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override;
 
     bool mayBenefitFromIndexForIn(const ASTPtr & node) const override;
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
index 0c55b5b3035..6c268cadbb6 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h
@@ -9,7 +9,7 @@
 namespace DB
 {
 
-class MergeTreeIndexConditionBloomFilter : public IIndexCondition
+class MergeTreeIndexConditionBloomFilter : public IMergeTreeIndexCondition
 {
 public:
     struct RPNElement
@@ -44,9 +44,7 @@ public:
     bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override
     {
         if (const auto & bf_granule = typeid_cast<const MergeTreeIndexGranuleBloomFilter *>(granule.get()))
-        {
             return mayBeTrueOnGranule(bf_granule);
-        }
 
         throw Exception("LOGICAL ERROR: require bloom filter index granule.", ErrorCodes::LOGICAL_ERROR);
     }
@@ -66,9 +64,11 @@ private:
 
     bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out);
 
-    bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out);
+    bool traverseASTIn(
+        const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out);
 
-    bool traverseASTEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out);
+    bool traverseASTEquals(
+        const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out);
 };
 
 }
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index e597cc99a36..42dd5415b0b 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -486,7 +486,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator() cons
     return std::make_shared<MergeTreeIndexAggregatorFullText>(*this);
 }
 
-IndexConditionPtr MergeTreeIndexFullText::createIndexCondition(
+MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition(
         const SelectQueryInfo & query, const Context & context) const
 {
     return std::make_shared<MergeTreeConditionFullText>(query, context, *this);
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h
index 9b9eefd1d43..cd8ac534e64 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h
@@ -49,7 +49,7 @@ struct MergeTreeIndexAggregatorFullText : IMergeTreeIndexAggregator
 };
 
 
-class MergeTreeConditionFullText : public IIndexCondition
+class MergeTreeConditionFullText : public IMergeTreeIndexCondition
 {
 public:
     MergeTreeConditionFullText(
@@ -189,7 +189,7 @@ public:
     MergeTreeIndexGranulePtr createIndexGranule() const override;
     MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
 
-    IndexConditionPtr createIndexCondition(
+    MergeTreeIndexConditionPtr createIndexCondition(
             const SelectQueryInfo & query, const Context & context) const override;
 
     bool mayBenefitFromIndexForIn(const ASTPtr & node) const override;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
similarity index 74%
rename from dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp
rename to dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
index 23deb29758d..2dcd3da510b 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
@@ -1,4 +1,4 @@
-#include <Storages/MergeTree/MergeTreeMinMaxIndex.h>
+#include <Storages/MergeTree/MergeTreeIndexMinMax.h>
 
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionAnalyzer.h>
@@ -16,14 +16,14 @@ namespace ErrorCodes
 }
 
 
-MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index)
+MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index)
     : IMergeTreeIndexGranule(), index(index), parallelogram() {}
 
-MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(
-    const MergeTreeMinMaxIndex & index, std::vector<Range> && parallelogram)
+MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(
+    const MergeTreeIndexMinMax & index, std::vector<Range> && parallelogram)
     : IMergeTreeIndexGranule(), index(index), parallelogram(std::move(parallelogram)) {}
 
-void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const
+void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const
 {
     if (empty())
         throw Exception(
@@ -50,7 +50,7 @@ void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const
     }
 }
 
-void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr)
+void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr)
 {
     parallelogram.clear();
     Field min_val;
@@ -83,15 +83,15 @@ void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr)
 }
 
 
-MergeTreeMinMaxAggregator::MergeTreeMinMaxAggregator(const MergeTreeMinMaxIndex & index)
+MergeTreeIndexAggregatorMinMax::MergeTreeIndexAggregatorMinMax(const MergeTreeIndexMinMax & index)
     : index(index) {}
 
-MergeTreeIndexGranulePtr MergeTreeMinMaxAggregator::getGranuleAndReset()
+MergeTreeIndexGranulePtr MergeTreeIndexAggregatorMinMax::getGranuleAndReset()
 {
-    return std::make_shared<MergeTreeMinMaxGranule>(index, std::move(parallelogram));
+    return std::make_shared<MergeTreeIndexGranuleMinMax>(index, std::move(parallelogram));
 }
 
-void MergeTreeMinMaxAggregator::update(const Block & block, size_t * pos, size_t limit)
+void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, size_t limit)
 {
     if (*pos >= block.rows())
         throw Exception(
@@ -122,21 +122,21 @@ void MergeTreeMinMaxAggregator::update(const Block & block, size_t * pos, size_t
 }
 
 
-MinMaxCondition::MinMaxCondition(
+MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax(
     const SelectQueryInfo &query,
     const Context &context,
-    const MergeTreeMinMaxIndex &index)
-    : IIndexCondition(), index(index), condition(query, context, index.columns, index.expr) {}
+    const MergeTreeIndexMinMax &index)
+    : IMergeTreeIndexCondition(), index(index), condition(query, context, index.columns, index.expr) {}
 
-bool MinMaxCondition::alwaysUnknownOrTrue() const
+bool MergeTreeIndexConditionMinMax::alwaysUnknownOrTrue() const
 {
     return condition.alwaysUnknownOrTrue();
 }
 
-bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
+bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
 {
-    std::shared_ptr<MergeTreeMinMaxGranule> granule
-        = std::dynamic_pointer_cast<MergeTreeMinMaxGranule>(idx_granule);
+    std::shared_ptr<MergeTreeIndexGranuleMinMax> granule
+        = std::dynamic_pointer_cast<MergeTreeIndexGranuleMinMax>(idx_granule);
     if (!granule)
         throw Exception(
             "Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
@@ -147,25 +147,25 @@ bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c
 }
 
 
-MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const
+MergeTreeIndexGranulePtr MergeTreeIndexMinMax::createIndexGranule() const
 {
-    return std::make_shared<MergeTreeMinMaxGranule>(*this);
+    return std::make_shared<MergeTreeIndexGranuleMinMax>(*this);
 }
 
 
-MergeTreeIndexAggregatorPtr MergeTreeMinMaxIndex::createIndexAggregator() const
+MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator() const
 {
-    return std::make_shared<MergeTreeMinMaxAggregator>(*this);
+    return std::make_shared<MergeTreeIndexAggregatorMinMax>(*this);
 }
 
 
-IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition(
+MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition(
     const SelectQueryInfo & query, const Context & context) const
 {
-    return std::make_shared<MinMaxCondition>(query, context, *this);
+    return std::make_shared<MergeTreeIndexConditionMinMax>(query, context, *this);
 };
 
-bool MergeTreeMinMaxIndex::mayBenefitFromIndexForIn(const ASTPtr & node) const
+bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const
 {
     const String column_name = node->getColumnName();
 
@@ -210,7 +210,7 @@ std::unique_ptr<IMergeTreeIndex> minmaxIndexCreator(
         data_types.emplace_back(column.type);
     }
 
-    return std::make_unique<MergeTreeMinMaxIndex>(
+    return std::make_unique<MergeTreeIndexMinMax>(
         node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity);
 }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h
similarity index 59%
rename from dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h
rename to dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h
index 06be8fe0cdd..5b514cdc738 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h
@@ -10,62 +10,62 @@
 namespace DB
 {
 
-class MergeTreeMinMaxIndex;
+class MergeTreeIndexMinMax;
 
 
-struct MergeTreeMinMaxGranule : public IMergeTreeIndexGranule
+struct MergeTreeIndexGranuleMinMax : public IMergeTreeIndexGranule
 {
-    explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index);
-    MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index, std::vector<Range> && parallelogram);
-    ~MergeTreeMinMaxGranule() override = default;
+    explicit MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index);
+    MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index, std::vector<Range> && parallelogram);
+    ~MergeTreeIndexGranuleMinMax() override = default;
 
     void serializeBinary(WriteBuffer & ostr) const override;
     void deserializeBinary(ReadBuffer & istr) override;
 
     bool empty() const override { return parallelogram.empty(); }
 
-    const MergeTreeMinMaxIndex & index;
+    const MergeTreeIndexMinMax & index;
     std::vector<Range> parallelogram;
 };
 
 
-struct MergeTreeMinMaxAggregator : IMergeTreeIndexAggregator
+struct MergeTreeIndexAggregatorMinMax : IMergeTreeIndexAggregator
 {
-    explicit MergeTreeMinMaxAggregator(const MergeTreeMinMaxIndex & index);
-    ~MergeTreeMinMaxAggregator() override = default;
+    explicit MergeTreeIndexAggregatorMinMax(const MergeTreeIndexMinMax & index);
+    ~MergeTreeIndexAggregatorMinMax() override = default;
 
     bool empty() const override { return parallelogram.empty(); }
     MergeTreeIndexGranulePtr getGranuleAndReset() override;
     void update(const Block & block, size_t * pos, size_t limit) override;
 
-    const MergeTreeMinMaxIndex & index;
+    const MergeTreeIndexMinMax & index;
     std::vector<Range> parallelogram;
 };
 
 
-class MinMaxCondition : public IIndexCondition
+class MergeTreeIndexConditionMinMax : public IMergeTreeIndexCondition
 {
 public:
-    MinMaxCondition(
+    MergeTreeIndexConditionMinMax(
         const SelectQueryInfo & query,
         const Context & context,
-        const MergeTreeMinMaxIndex & index);
+        const MergeTreeIndexMinMax & index);
 
     bool alwaysUnknownOrTrue() const override;
 
     bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
 
-    ~MinMaxCondition() override = default;
+    ~MergeTreeIndexConditionMinMax() override = default;
 private:
-    const MergeTreeMinMaxIndex & index;
+    const MergeTreeIndexMinMax & index;
     KeyCondition condition;
 };
 
 
-class MergeTreeMinMaxIndex : public IMergeTreeIndex
+class MergeTreeIndexMinMax : public IMergeTreeIndex
 {
 public:
-    MergeTreeMinMaxIndex(
+    MergeTreeIndexMinMax(
         String name_,
         ExpressionActionsPtr expr_,
         const Names & columns_,
@@ -74,12 +74,12 @@ public:
         size_t granularity_)
         : IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_) {}
 
-    ~MergeTreeMinMaxIndex() override = default;
+    ~MergeTreeIndexMinMax() override = default;
 
     MergeTreeIndexGranulePtr createIndexGranule() const override;
     MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
 
-    IndexConditionPtr createIndexCondition(
+    MergeTreeIndexConditionPtr createIndexCondition(
         const SelectQueryInfo & query, const Context & context) const override;
 
     bool mayBenefitFromIndexForIn(const ASTPtr & node) const override;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp
similarity index 87%
rename from dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp
rename to dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp
index 5bf06a1ca6d..8efaae8e579 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp
@@ -1,4 +1,4 @@
-#include <Storages/MergeTree/MergeTreeSetSkippingIndex.h>
+#include <Storages/MergeTree/MergeTreeIndexSet.h>
 
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionAnalyzer.h>
@@ -21,18 +21,18 @@ namespace ErrorCodes
 const Field UNKNOWN_FIELD(3u);
 
 
-MergeTreeSetIndexGranule::MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index)
+MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index)
     : IMergeTreeIndexGranule()
     , index(index)
     , block(index.header.cloneEmpty()) {}
 
-MergeTreeSetIndexGranule::MergeTreeSetIndexGranule(
-    const MergeTreeSetSkippingIndex & index, MutableColumns && mutable_columns)
+MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(
+    const MergeTreeIndexSet & index, MutableColumns && mutable_columns)
     : IMergeTreeIndexGranule()
     , index(index)
     , block(index.header.cloneWithColumns(std::move(mutable_columns))) {}
 
-void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const
+void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const
 {
     if (empty())
         throw Exception(
@@ -64,7 +64,7 @@ void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const
     }
 }
 
-void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr)
+void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr)
 {
     block.clear();
 
@@ -94,7 +94,7 @@ void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr)
 }
 
 
-MergeTreeSetIndexAggregator::MergeTreeSetIndexAggregator(const MergeTreeSetSkippingIndex & index)
+MergeTreeIndexAggregatorSet::MergeTreeIndexAggregatorSet(const MergeTreeIndexSet & index)
     : index(index), columns(index.header.cloneEmptyColumns())
 {
     ColumnRawPtrs column_ptrs;
@@ -111,7 +111,7 @@ MergeTreeSetIndexAggregator::MergeTreeSetIndexAggregator(const MergeTreeSetSkipp
     columns = index.header.cloneEmptyColumns();
 }
 
-void MergeTreeSetIndexAggregator::update(const Block & block, size_t * pos, size_t limit)
+void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size_t limit)
 {
     if (*pos >= block.rows())
         throw Exception(
@@ -164,7 +164,7 @@ void MergeTreeSetIndexAggregator::update(const Block & block, size_t * pos, size
 }
 
 template <typename Method>
-bool MergeTreeSetIndexAggregator::buildFilter(
+bool MergeTreeIndexAggregatorSet::buildFilter(
     Method & method,
     const ColumnRawPtrs & column_ptrs,
     IColumn::Filter & filter,
@@ -190,9 +190,9 @@ bool MergeTreeSetIndexAggregator::buildFilter(
     return has_new_data;
 }
 
-MergeTreeIndexGranulePtr MergeTreeSetIndexAggregator::getGranuleAndReset()
+MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
 {
-    auto granule = std::make_shared<MergeTreeSetIndexGranule>(index, std::move(columns));
+    auto granule = std::make_shared<MergeTreeIndexGranuleSet>(index, std::move(columns));
 
     switch (data.type)
     {
@@ -212,11 +212,11 @@ MergeTreeIndexGranulePtr MergeTreeSetIndexAggregator::getGranuleAndReset()
 }
 
 
-SetIndexCondition::SetIndexCondition(
+MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
         const SelectQueryInfo & query,
         const Context & context,
-        const MergeTreeSetSkippingIndex &index)
-        : IIndexCondition(), index(index)
+        const MergeTreeIndexSet &index)
+        : IMergeTreeIndexCondition(), index(index)
 {
     for (size_t i = 0, size = index.columns.size(); i < size; ++i)
     {
@@ -253,14 +253,14 @@ SetIndexCondition::SetIndexCondition(
     actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
 }
 
-bool SetIndexCondition::alwaysUnknownOrTrue() const
+bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const
 {
     return useless;
 }
 
-bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
+bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
 {
-    auto granule = std::dynamic_pointer_cast<MergeTreeSetIndexGranule>(idx_granule);
+    auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleSet>(idx_granule);
     if (!granule)
         throw Exception(
                 "Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
@@ -294,7 +294,7 @@ bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule)
     return false;
 }
 
-void SetIndexCondition::traverseAST(ASTPtr & node) const
+void MergeTreeIndexConditionSet::traverseAST(ASTPtr & node) const
 {
     if (operatorFromAST(node))
     {
@@ -309,7 +309,7 @@ void SetIndexCondition::traverseAST(ASTPtr & node) const
         node = std::make_shared<ASTLiteral>(UNKNOWN_FIELD);
 }
 
-bool SetIndexCondition::atomFromAST(ASTPtr & node) const
+bool MergeTreeIndexConditionSet::atomFromAST(ASTPtr & node) const
 {
     /// Function, literal or column
 
@@ -340,7 +340,7 @@ bool SetIndexCondition::atomFromAST(ASTPtr & node) const
     return false;
 }
 
-bool SetIndexCondition::operatorFromAST(ASTPtr & node) const
+bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) const
 {
     /// Functions AND, OR, NOT. Replace with bit*.
     auto * func = node->as<ASTFunction>();
@@ -416,7 +416,7 @@ static bool checkAtomName(const String & name)
     return atoms.find(name) != atoms.end();
 }
 
-bool SetIndexCondition::checkASTUseless(const ASTPtr &node, bool atomic) const
+bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr &node, bool atomic) const
 {
     if (const auto * func = node->as<ASTFunction>())
     {
@@ -446,23 +446,23 @@ bool SetIndexCondition::checkASTUseless(const ASTPtr &node, bool atomic) const
 }
 
 
-MergeTreeIndexGranulePtr MergeTreeSetSkippingIndex::createIndexGranule() const
+MergeTreeIndexGranulePtr MergeTreeIndexSet::createIndexGranule() const
 {
-    return std::make_shared<MergeTreeSetIndexGranule>(*this);
+    return std::make_shared<MergeTreeIndexGranuleSet>(*this);
 }
 
-MergeTreeIndexAggregatorPtr MergeTreeSetSkippingIndex::createIndexAggregator() const
+MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator() const
 {
-    return std::make_shared<MergeTreeSetIndexAggregator>(*this);
+    return std::make_shared<MergeTreeIndexAggregatorSet>(*this);
 }
 
-IndexConditionPtr MergeTreeSetSkippingIndex::createIndexCondition(
+MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition(
     const SelectQueryInfo & query, const Context & context) const
 {
-    return std::make_shared<SetIndexCondition>(query, context, *this);
+    return std::make_shared<MergeTreeIndexConditionSet>(query, context, *this);
 };
 
-bool MergeTreeSetSkippingIndex::mayBenefitFromIndexForIn(const ASTPtr &) const
+bool MergeTreeIndexSet::mayBenefitFromIndexForIn(const ASTPtr &) const
 {
     return false;
 }
@@ -506,7 +506,7 @@ std::unique_ptr<IMergeTreeIndex> setIndexCreator(
         header.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name));
     }
 
-    return std::make_unique<MergeTreeSetSkippingIndex>(
+    return std::make_unique<MergeTreeIndexSet>(
         node->name, std::move(unique_expr), columns, data_types, header, node->granularity, max_rows);
 }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h
similarity index 69%
rename from dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h
rename to dbms/src/Storages/MergeTree/MergeTreeIndexSet.h
index 61d409af589..04f4d2bec1e 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h
@@ -12,12 +12,12 @@
 namespace DB
 {
 
-class MergeTreeSetSkippingIndex;
+class MergeTreeIndexSet;
 
-struct MergeTreeSetIndexGranule : public IMergeTreeIndexGranule
+struct MergeTreeIndexGranuleSet : public IMergeTreeIndexGranule
 {
-    explicit MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index);
-    MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index, MutableColumns && columns);
+    explicit MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index);
+    MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index, MutableColumns && columns);
 
     void serializeBinary(WriteBuffer & ostr) const override;
     void deserializeBinary(ReadBuffer & istr) override;
@@ -25,17 +25,17 @@ struct MergeTreeSetIndexGranule : public IMergeTreeIndexGranule
     size_t size() const { return block.rows(); }
     bool empty() const override { return !size(); }
 
-    ~MergeTreeSetIndexGranule() override = default;
+    ~MergeTreeIndexGranuleSet() override = default;
 
-    const MergeTreeSetSkippingIndex & index;
+    const MergeTreeIndexSet & index;
     Block block;
 };
 
 
-struct MergeTreeSetIndexAggregator : IMergeTreeIndexAggregator
+struct MergeTreeIndexAggregatorSet : IMergeTreeIndexAggregator
 {
-    explicit MergeTreeSetIndexAggregator(const MergeTreeSetSkippingIndex & index);
-    ~MergeTreeSetIndexAggregator() override = default;
+    explicit MergeTreeIndexAggregatorSet(const MergeTreeIndexSet & index);
+    ~MergeTreeIndexAggregatorSet() override = default;
 
     size_t size() const { return data.getTotalRowCount(); }
     bool empty() const override { return !size(); }
@@ -55,26 +55,26 @@ private:
             size_t limit,
             ClearableSetVariants & variants) const;
 
-    const MergeTreeSetSkippingIndex & index;
+    const MergeTreeIndexSet & index;
     ClearableSetVariants data;
     Sizes key_sizes;
     MutableColumns columns;
 };
 
 
-class SetIndexCondition : public IIndexCondition
+class MergeTreeIndexConditionSet : public IMergeTreeIndexCondition
 {
 public:
-    SetIndexCondition(
+    MergeTreeIndexConditionSet(
             const SelectQueryInfo & query,
             const Context & context,
-            const MergeTreeSetSkippingIndex & index);
+            const MergeTreeIndexSet & index);
 
     bool alwaysUnknownOrTrue() const override;
 
     bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
 
-    ~SetIndexCondition() override = default;
+    ~MergeTreeIndexConditionSet() override = default;
 private:
     void traverseAST(ASTPtr & node) const;
     bool atomFromAST(ASTPtr & node) const;
@@ -82,7 +82,7 @@ private:
 
     bool checkASTUseless(const ASTPtr &node, bool atomic = false) const;
 
-    const MergeTreeSetSkippingIndex & index;
+    const MergeTreeIndexSet & index;
 
     bool useless;
     std::set<String> key_columns;
@@ -91,10 +91,10 @@ private:
 };
 
 
-class MergeTreeSetSkippingIndex : public IMergeTreeIndex
+class MergeTreeIndexSet : public IMergeTreeIndex
 {
 public:
-    MergeTreeSetSkippingIndex(
+    MergeTreeIndexSet(
         String name_,
         ExpressionActionsPtr expr_,
         const Names & columns_,
@@ -104,12 +104,12 @@ public:
         size_t max_rows_)
         : IMergeTreeIndex(std::move(name_), std::move(expr_), columns_, data_types_, header_, granularity_), max_rows(max_rows_) {}
 
-    ~MergeTreeSetSkippingIndex() override = default;
+    ~MergeTreeIndexSet() override = default;
 
     MergeTreeIndexGranulePtr createIndexGranule() const override;
     MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
 
-    IndexConditionPtr createIndexCondition(
+    MergeTreeIndexConditionPtr createIndexCondition(
             const SelectQueryInfo & query, const Context & context) const override;
 
     bool mayBenefitFromIndexForIn(const ASTPtr & node) const override;
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.h b/dbms/src/Storages/MergeTree/MergeTreeIndices.h
index b6ee89d87ef..2a00c902810 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.h
@@ -59,17 +59,17 @@ using MergeTreeIndexAggregators = std::vector<MergeTreeIndexAggregatorPtr>;
 
 
 /// Condition on the index.
-class IIndexCondition
+class IMergeTreeIndexCondition
 {
 public:
-    virtual ~IIndexCondition() = default;
+    virtual ~IMergeTreeIndexCondition() = default;
     /// Checks if this index is useful for query.
     virtual bool alwaysUnknownOrTrue() const = 0;
 
     virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0;
 };
 
-using IndexConditionPtr = std::shared_ptr<IIndexCondition>;
+using MergeTreeIndexConditionPtr = std::shared_ptr<IMergeTreeIndexCondition>;
 
 
 /// Structure for storing basic index info like columns, expression, arguments, ...
@@ -101,7 +101,7 @@ public:
     virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0;
     virtual MergeTreeIndexAggregatorPtr createIndexAggregator() const = 0;
 
-    virtual IndexConditionPtr createIndexCondition(
+    virtual MergeTreeIndexConditionPtr createIndexCondition(
             const SelectQueryInfo & query_info, const Context & context) const = 0;
 
     String name;
diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp
index b23a2eedc0e..138e7c14f9d 100644
--- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -2,8 +2,8 @@
 #include <Storages/StorageMergeTree.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/MergeTree/MergeTreeIndices.h>
-#include <Storages/MergeTree/MergeTreeMinMaxIndex.h>
-#include <Storages/MergeTree/MergeTreeSetSkippingIndex.h>
+#include <Storages/MergeTree/MergeTreeIndexMinMax.h>
+#include <Storages/MergeTree/MergeTreeIndexSet.h>
 
 #include <Common/typeid_cast.h>
 #include <Common/OptimizedRegularExpression.h>

From 047ee3883808b55e83134896a459d743c1c449bd Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Wed, 19 Jun 2019 18:38:06 +0300
Subject: [PATCH 086/191] fix race condition in flushing system log

---
 dbms/src/Interpreters/SystemLog.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h
index ec4de2f1c83..168ce893f14 100644
--- a/dbms/src/Interpreters/SystemLog.h
+++ b/dbms/src/Interpreters/SystemLog.h
@@ -198,12 +198,13 @@ void SystemLog<LogElement>::flush()
         return;
 
     std::lock_guard flush_lock(flush_mutex);
+    force_flushing = true;
+
     /// Tell thread to execute extra flush.
     queue.push({ElementType::FORCE_FLUSH, {}});
 
     /// Wait for flush being finished.
     std::unique_lock lock(condvar_mutex);
-    force_flushing = true;
     while (force_flushing)
         flush_condvar.wait(lock);
 }

From 374aac3501e8cf6fe65b1fb35278c7c7b2c4f270 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Thu, 20 Jun 2019 00:32:20 +0800
Subject: [PATCH 087/191] fix build & chmod shell test

---
 .../Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp  | 5 ++---
 .../Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h    | 2 +-
 .../00944_create_bloom_filter_index_with_merge_tree.sh       | 0
 3 files changed, 3 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh

diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
index 365c94dcbaa..4eee7309811 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
@@ -44,7 +44,7 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(
         }
 
         for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column)
-            fillingBloomFilter(bloom_filters[column], granule_index_block, column, hash_functions);
+            fillingBloomFilter(bloom_filters[column], granule_index_block, column);
     }
 }
 
@@ -94,8 +94,7 @@ void MergeTreeIndexGranuleBloomFilter::assertGranuleBlocksStructure(const Blocks
     }
 }
 
-void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(
-    std::shared_ptr<BloomFilter> & bf, const Block & granule_index_block, size_t index_hash_column, size_t hash_functions)
+void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column)
 {
     const auto & column = granule_index_block.getByPosition(index_hash_column);
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
index 6aea7601a73..79670678e79 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
@@ -29,7 +29,7 @@ private:
 
     void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const;
 
-    void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column, size_t hash_functions);
+    void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column);
 };
 
 
diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh
old mode 100644
new mode 100755

From 5e6ceef224d4c479e24cf81a3bde10de011ad6c5 Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Wed, 19 Jun 2019 19:50:17 +0300
Subject: [PATCH 088/191] fix race condition in flushing system log

---
 dbms/src/Interpreters/SystemLog.h | 33 ++++++++++++++++---------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h
index 168ce893f14..36c864ede03 100644
--- a/dbms/src/Interpreters/SystemLog.h
+++ b/dbms/src/Interpreters/SystemLog.h
@@ -119,14 +119,15 @@ protected:
     const size_t flush_interval_milliseconds;
     std::atomic<bool> is_shutdown{false};
 
-    enum class ElementType
+    enum class EntryType
     {
-        REGULAR = 0,
+        LOG_ELEMENT = 0,
+        AUTO_FLUSH,
+        FORCE_FLUSH,
         SHUTDOWN,
-        FORCE_FLUSH
     };
 
-    using QueueItem = std::pair<ElementType, LogElement>;
+    using QueueItem = std::pair<EntryType, LogElement>;
 
     /// Queue is bounded. But its size is quite large to not block in all normal cases.
     ConcurrentBoundedQueue<QueueItem> queue {DBMS_SYSTEM_LOG_QUEUE_SIZE};
@@ -158,7 +159,7 @@ protected:
     bool force_flushing = false;
 
     /// flushImpl can be executed only in saving_thread.
-    void flushImpl(bool quiet);
+    void flushImpl(EntryType reason);
 };
 
 
@@ -186,7 +187,7 @@ void SystemLog<LogElement>::add(const LogElement & element)
         return;
 
     /// Without try we could block here in case of queue overflow.
-    if (!queue.tryPush({ElementType::REGULAR, element}))
+    if (!queue.tryPush({EntryType::LOG_ELEMENT, element}))
         LOG_ERROR(log, "SystemLog queue is full");
 }
 
@@ -201,7 +202,7 @@ void SystemLog<LogElement>::flush()
     force_flushing = true;
 
     /// Tell thread to execute extra flush.
-    queue.push({ElementType::FORCE_FLUSH, {}});
+    queue.push({EntryType::FORCE_FLUSH, {}});
 
     /// Wait for flush being finished.
     std::unique_lock lock(condvar_mutex);
@@ -218,7 +219,7 @@ void SystemLog<LogElement>::shutdown()
         return;
 
     /// Tell thread to shutdown.
-    queue.push({ElementType::SHUTDOWN, {}});
+    queue.push({EntryType::SHUTDOWN, {}});
     saving_thread.join();
 }
 
@@ -268,15 +269,15 @@ void SystemLog<LogElement>::threadFunction()
 
             if (has_element)
             {
-                if (element.first == ElementType::SHUTDOWN)
+                if (element.first == EntryType::SHUTDOWN)
                 {
                     /// NOTE: MergeTree engine can write data even it is already in shutdown state.
-                    flushImpl(true);
+                    flushImpl(element.first);
                     break;
                 }
-                else if (element.first == ElementType::FORCE_FLUSH)
+                else if (element.first == EntryType::FORCE_FLUSH)
                 {
-                    flushImpl(false);
+                    flushImpl(element.first);
                     time_after_last_write.restart();
                     continue;
                 }
@@ -288,7 +289,7 @@ void SystemLog<LogElement>::threadFunction()
             if (milliseconds_elapsed >= flush_interval_milliseconds)
             {
                 /// Write data to a table.
-                flushImpl(true);
+                flushImpl(EntryType::AUTO_FLUSH);
                 time_after_last_write.restart();
             }
         }
@@ -303,11 +304,11 @@ void SystemLog<LogElement>::threadFunction()
 
 
 template <typename LogElement>
-void SystemLog<LogElement>::flushImpl(bool quiet)
+void SystemLog<LogElement>::flushImpl(EntryType reason)
 {
     try
     {
-        if (quiet && data.empty())
+        if ((reason == EntryType::AUTO_FLUSH || reason == EntryType::SHUTDOWN) && data.empty())
             return;
 
         LOG_TRACE(log, "Flushing system log");
@@ -346,7 +347,7 @@ void SystemLog<LogElement>::flushImpl(bool quiet)
         /// In case of exception, also clean accumulated data - to avoid locking.
         data.clear();
     }
-    if (!quiet)
+    if (reason == EntryType::FORCE_FLUSH)
     {
         std::lock_guard lock(condvar_mutex);
         force_flushing = false;

From 8221dd2a1a913f5e65825a06780fb6a36f4856ae Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Thu, 20 Jun 2019 08:33:37 +0800
Subject: [PATCH 089/191] fix build and test failure

---
 dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp  | 7 ++++---
 dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h    | 4 ++--
 .../00944_create_bloom_filter_index_with_merge_tree.sh     | 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
index 4dcdb90a4f6..b86da56649d 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp
@@ -24,9 +24,10 @@ namespace ErrorCodes
 }
 
 MergeTreeIndexBloomFilter::MergeTreeIndexBloomFilter(
-    const String & name, const ExpressionActionsPtr & expr, const Names & columns, const DataTypes & data_types, const Block & header,
-    size_t granularity, size_t bits_per_row_, size_t hash_functions_)
-    : IMergeTreeIndex(name, expr, columns, data_types, header, granularity), bits_per_row(bits_per_row_), hash_functions(hash_functions_)
+    const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_, const Block & header_,
+    size_t granularity_, size_t bits_per_row_, size_t hash_functions_)
+    : IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_), bits_per_row(bits_per_row_),
+      hash_functions(hash_functions_)
 {
 }
 
diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
index 8930018f22e..2b89b9bddfa 100644
--- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
+++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h
@@ -12,8 +12,8 @@ class MergeTreeIndexBloomFilter : public IMergeTreeIndex
 {
 public:
     MergeTreeIndexBloomFilter(
-        const String & name, const ExpressionActionsPtr & expr, const Names & columns, const DataTypes & data_types,
-        const Block & header, size_t granularity, size_t bits_per_row_, size_t hash_functions_);
+        const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_,
+        const Block & header_, size_t granularity_, size_t bits_per_row_, size_t hash_functions_);
 
     MergeTreeIndexGranulePtr createIndexGranule() const override;
 
diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh
index a637468f203..52246b50b7a 100755
--- a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh
+++ b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh
@@ -7,6 +7,6 @@ set -e
 
 for sequence in 1 10 100 1000 10000 100000 1000000 10000000 100000000 1000000000; do \
 rate=`echo "1 $sequence" | awk '{printf("%0.9f\n",$1/$2)}'`
-$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.bloom_filter_idx";
-$CLICKHOUSE_CLIENT -q "CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192"
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.bloom_filter_idx";
+$CLICKHOUSE_CLIENT --allow_experimental_data_skipping_indices=1 --query="CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192"
 done

From 1e2d3a101fad29aa2b09e722665424ed85fabe9a Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Thu, 20 Jun 2019 13:39:54 +0300
Subject: [PATCH 090/191] Fix doc

---
 docs/ru/query_language/functions/geo.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md
index ec1033eb49b..9682d75d836 100644
--- a/docs/ru/query_language/functions/geo.md
+++ b/docs/ru/query_language/functions/geo.md
@@ -104,7 +104,7 @@ SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
 Получает H3 индекс точки (lat, lon) с заданным разрешением
 
 ```
-pointInPolygon(lat, lon, resolution)
+geoToH3(lat, lon, resolution)
 ```
 
 **Входные значения**

From 6064a1ed7c616be223cf9bec1b339e0845b75918 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Thu, 20 Jun 2019 13:44:34 +0300
Subject: [PATCH 091/191] Fix geoToH3 compile

---
 dbms/src/Functions/geoToH3.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index a4394e8940c..2adb6ead584 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -1,9 +1,9 @@
 #include <array>
 #include <math.h>
-#include <Functions/FunctionFactory.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
 #include <Common/typeid_cast.h>
 #include <ext/range.h>
@@ -27,9 +27,7 @@ class FunctionGeoToH3 : public IFunction
 public:
     static constexpr auto name = "geoToH3";
 
-    FunctionGeoToH3(const Context & context) : context(context) {}
-
-    static FunctionPtr create(const Context & context) { return std::make_shared<FunctionGeoToH3>(context); }
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeoToH3>(); }
 
     std::string getName() const override { return name; }
 
@@ -157,9 +155,6 @@ public:
                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         }
     }
-
-private:
-    const Context & context;
 };
 
 

From bd14069cd1b8eb1f907fd93c3e40f3eed0ad2175 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Thu, 20 Jun 2019 13:58:21 +0300
Subject: [PATCH 092/191] Fix cmake

---
 contrib/CMakeLists.txt | 6 +++++-
 dbms/CMakeLists.txt    | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 9911f1b563d..737b6d72bee 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -110,11 +110,15 @@ if (USE_INTERNAL_H3_LIBRARY)
     add_subdirectory(h3)
 endif ()
 
-
 if (USE_INTERNAL_SSL_LIBRARY)
     if (NOT MAKE_STATIC_LIBRARIES)
         set (BUILD_SHARED 1)
     endif ()
+
+    # By default, ${CMAKE_INSTALL_PREFIX}/etc/ssl is selected - that is not what we need.
+    # We need to use system wide ssl directory.
+    set (OPENSSLDIR "/etc/ssl")
+
     set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "")
     add_subdirectory (ssl)
     target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR})
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index b7bb4a81473..4089adc9cf5 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -359,7 +359,7 @@ target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE
 add_subdirectory (programs)
 add_subdirectory (tests)
 
-if (ENABLE_TESTS)
+if (ENABLE_TESTS AND USE_GTEST)
     macro (grep_gtest_sources BASE_DIR DST_VAR)
         # Cold match files that are not in tests/ directories
         file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp")

From b0d0e82b29bc4d1a3f07ca25b8e0016f263c119e Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Thu, 20 Jun 2019 14:39:53 +0300
Subject: [PATCH 093/191] h3 submodule

---
 contrib/h3 | 1 +
 1 file changed, 1 insertion(+)
 create mode 160000 contrib/h3

diff --git a/contrib/h3 b/contrib/h3
new file mode 160000
index 00000000000..6cfd649e8c0
--- /dev/null
+++ b/contrib/h3
@@ -0,0 +1 @@
+Subproject commit 6cfd649e8c0d3ed913e8aae928a669fc3b8a2365

From 6ba6ee9bcd1e2ffee690412f77fc2089877ab2ba Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Thu, 20 Jun 2019 19:27:08 +0300
Subject: [PATCH 094/191] glibc compat for h3

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e9f862230e5..08c7cd4d60f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -401,6 +401,7 @@ if (GLIBC_COMPATIBILITY)
     add_glibc_compat(kj)
     add_glibc_compat(simdjson)
     add_glibc_compat(apple_rt)
+    add_glibc_compat(h3)
     add_glibc_compat(re2)
     add_glibc_compat(re2_st)
     add_glibc_compat(hs_compile_shared)

From 25cbc901ede879c769d432ce0bf2152a2be3d612 Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Thu, 20 Jun 2019 20:38:16 +0300
Subject: [PATCH 095/191] fix deadlock at flushing on shutdown

---
 dbms/src/Interpreters/Context.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index 1eab4a081f3..5f18b7b3caa 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -245,15 +245,12 @@ struct ContextShared
             return;
         shutdown_called = true;
 
-        {
-            std::lock_guard lock(mutex);
 
-            /** After this point, system logs will shutdown their threads and no longer write any data.
-            * It will prevent recreation of system tables at shutdown.
-            * Note that part changes at shutdown won't be logged to part log.
-            */
-            system_logs.reset();
-        }
+        /** At this point, system logs will flush accumulated data, then shutdown their threads and no longer write any data.
+        * It will prevent recreation of system tables at shutdown.
+        * Note that part changes at shutdown won't be logged to part log.
+        */
+        system_logs.reset();
 
         /** At this point, some tables may have threads that block our mutex.
           * To shutdown them correctly, we will copy the current list of tables,

From 92509b71a41bfd88e967ab3152dc90362f69a347 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 21 Jun 2019 16:01:16 +0300
Subject: [PATCH 096/191] Slightly speedup

---
 dbms/src/Functions/URL/domain.h | 127 ++++++++++++++------------------
 1 file changed, 57 insertions(+), 70 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 3c9fef742c1..ba50acce2a7 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -8,25 +8,10 @@
 namespace DB
 {
 
-static inline bool isUnsafeCharUrl(char c)
+static inline bool isUnsafeOrReversedCharUrl(char c)
 {
     switch (c)
     {
-        case ' ':
-        case '\t':
-        case '<':
-        case '>':
-        case '#':
-        case '%':
-        case '{':
-        case '}':
-        case '|':
-        case '\\':
-        case '^':
-        case '~':
-        case '[':
-        case ']':
-            return true;
     }
     return false;
 }
@@ -44,74 +29,76 @@ static inline bool isCharEndOfUrl(char c)
     return false;
 }
 
-static inline bool isReservedCharUrl(char c)
-{
-    switch (c)
-    {
-        case ';':
-        case '/':
-        case '?':
-        case ':':
-        case '@':
-        case '=':
-        case '&':
-            return true;
-    }
-    return false;
-}
-
 /// Extracts host from given url.
 inline StringRef getURLHost(const char * data, size_t size)
 {
     Pos pos = data;
     Pos end = data + size;
 
-    Pos slash_pos = find_first_symbols<'/'>(pos, end);
-    if (slash_pos != end)
-    {
-        pos = slash_pos;
-    }
-    else
-    {
-        pos = data;
-    }
+    if (*(end - 1) == '.')
+        return StringRef{};
 
-    if (pos != data)
+    StringRef scheme = getURLScheme(data, size);
+    if (scheme.size != 0)
     {
-        StringRef scheme = getURLScheme(data, size);
         Pos scheme_end = data + scheme.size;
-
-        // Colon must follows after scheme.
-        if (pos - scheme_end != 1 || *scheme_end != ':')
-            return {};
-    }
-
-    // Check with we still have // character from the scheme
-    if (!(end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/'))
-        pos += 2;
-
-    const char * start_of_host = pos;
-    bool has_dot_delimiter = false;
-    for (; pos < end; ++pos)
-    {
-        if (*pos == '@')
-            start_of_host = pos + 1;
-        else if (*pos == '.')
-        {
-            if (pos + 1 == end || isCharEndOfUrl(*(pos + 1)))
-                return StringRef{};
-            has_dot_delimiter = true;
-        }
-        else if (isCharEndOfUrl(*pos))
-            break;
-        else if (isUnsafeCharUrl(*pos) || isReservedCharUrl(*pos))
+        pos = scheme_end + 1;
+        if (*scheme_end != ':' || *pos != '/')
             return StringRef{};
     }
 
-    if (!has_dot_delimiter)
+    if (end - pos > 2 && *pos == '/' && *(pos + 1) == '/')
+        pos += 2;
+
+    auto start_of_host = pos;
+    Pos dot_pos = nullptr;
+    bool exit_loop = false;
+    for (; pos < end && !exit_loop; ++pos)
+    {
+        switch(*pos)
+        {
+        case '.':
+            dot_pos = pos;
+            break;
+        case ':': /// end symbols
+        case '/':
+        case '?':
+        case '#':
+            exit_loop = true;
+            break;
+        case '@':
+            start_of_host = pos;
+            break;
+        case ' ': /// restricted symbols
+        case '\t':
+        case '<':
+        case '>':
+        case '%':
+        case '{':
+        case '}':
+        case '|':
+        case '\\':
+        case '^':
+        case '~':
+        case '[':
+        case ']':
+        case ';':
+        case '=':
+        case '&':
+            return StringRef{};
+        }
+    }
+
+    if (!dot_pos || start_of_host >= pos)
         return StringRef{};
 
-    return (pos == start_of_host) ? StringRef{} : StringRef(start_of_host, pos - start_of_host);
+    /// if end found immediately after dot
+    char after_dot = *(dot_pos + 1);
+    if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#')
+        return StringRef{};
+
+
+    return StringRef(start_of_host, pos - start_of_host);
 }
 
 template <bool without_www>

From 8b263739ce9a7548bdc5ba9dcd330576656516b5 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Fri, 21 Jun 2019 16:01:27 +0300
Subject: [PATCH 097/191] Comments

---
 dbms/src/Functions/URL/domain.h | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index ba50acce2a7..af71b9fdc29 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -8,27 +8,6 @@
 namespace DB
 {
 
-static inline bool isUnsafeOrReversedCharUrl(char c)
-{
-    switch (c)
-    {
-    }
-    return false;
-}
-
-static inline bool isCharEndOfUrl(char c)
-{
-    switch (c)
-    {
-        case ':':
-        case '/':
-        case '?':
-        case '#':
-            return true;
-    }
-    return false;
-}
-
 /// Extracts host from given url.
 inline StringRef getURLHost(const char * data, size_t size)
 {
@@ -66,7 +45,7 @@ inline StringRef getURLHost(const char * data, size_t size)
         case '#':
             exit_loop = true;
             break;
-        case '@':
+        case '@': /// myemail@gmail.com
             start_of_host = pos;
             break;
         case ' ': /// restricted symbols

From b75db2ef0442261f9787b9fc09d43f14ceeee708 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 21 Jun 2019 17:29:10 +0300
Subject: [PATCH 098/191] Fix dropping message to early when stalling

---
 .../Kafka/ReadBufferFromKafkaConsumer.cpp       | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
index 5511f3c4cec..a67a0aeb519 100644
--- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
+++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp
@@ -74,18 +74,21 @@ bool ReadBufferFromKafkaConsumer::nextImpl()
     {
         if (intermediate_commit)
             commit();
-        messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(poll_timeout));
+
+        /// Don't drop old messages immediately, since we may need them for virtual columns.
+        auto new_messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(poll_timeout));
+        if (new_messages.empty())
+        {
+            LOG_TRACE(log, "Stalled");
+            stalled = true;
+            return false;
+        }
+        messages = std::move(new_messages);
         current = messages.begin();
 
         LOG_TRACE(log, "Polled batch of " << messages.size() << " messages");
     }
 
-    if (messages.empty())
-    {
-        stalled = true;
-        return false;
-    }
-
     if (auto err = current->get_error())
     {
         ++current;

From b989d45818f9dfe299d2a7ab9aec77dfc113cd14 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 21 Jun 2019 19:58:13 +0300
Subject: [PATCH 099/191] Fix tests

---
 .../integration/test_storage_kafka/test.py    |  27 +++--
 .../test_kafka_virtual.reference              | 100 +++++++++---------
 2 files changed, 63 insertions(+), 64 deletions(-)

diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py
index 7769556b400..f8514b908ba 100644
--- a/dbms/tests/integration/test_storage_kafka/test.py
+++ b/dbms/tests/integration/test_storage_kafka/test.py
@@ -235,20 +235,20 @@ def test_kafka_json_without_delimiter(kafka_cluster):
             ENGINE = Kafka
             SETTINGS
                 kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'json',
-                kafka_group_name = 'json',
+                kafka_topic_list = 'json1',
+                kafka_group_name = 'json1',
                 kafka_format = 'JSONEachRow';
         ''')
 
     messages = ''
     for i in range(25):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
-    kafka_produce('json', [messages])
+    kafka_produce('json1', [messages])
 
     messages = ''
     for i in range(25, 50):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
-    kafka_produce('json', [messages])
+    kafka_produce('json1', [messages])
 
     result = ''
     for i in range(50):
@@ -290,8 +290,8 @@ def test_kafka_materialized_view(kafka_cluster):
             ENGINE = Kafka
             SETTINGS
                 kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'json',
-                kafka_group_name = 'json',
+                kafka_topic_list = 'json2',
+                kafka_group_name = 'json2',
                 kafka_format = 'JSONEachRow',
                 kafka_row_delimiter = '\\n';
         CREATE TABLE test.view (key UInt64, value UInt64)
@@ -304,7 +304,7 @@ def test_kafka_materialized_view(kafka_cluster):
     messages = []
     for i in range(50):
         messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce('json', messages)
+    kafka_produce('json2', messages)
 
     for i in range(20):
         time.sleep(1)
@@ -321,7 +321,7 @@ def test_kafka_materialized_view(kafka_cluster):
 
 def test_kafka_flush_on_big_message(kafka_cluster):
     # Create batchs of messages of size ~100Kb
-    kafka_messages = 10000
+    kafka_messages = 1000
     batch_messages = 1000
     messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(kafka_messages)]
     kafka_produce('flush', messages)
@@ -336,8 +336,7 @@ def test_kafka_flush_on_big_message(kafka_cluster):
                 kafka_topic_list = 'flush',
                 kafka_group_name = 'flush',
                 kafka_format = 'JSONEachRow',
-                kafka_max_block_size = 10,
-                kafka_commit_on_every_batch = 1;
+                kafka_max_block_size = 10;
         CREATE TABLE test.view (key UInt64, value String)
             ENGINE = MergeTree
             ORDER BY key;
@@ -372,20 +371,20 @@ def test_kafka_virtual_columns(kafka_cluster):
             ENGINE = Kafka
             SETTINGS
                 kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'json',
-                kafka_group_name = 'json',
+                kafka_topic_list = 'json3',
+                kafka_group_name = 'json3',
                 kafka_format = 'JSONEachRow';
         ''')
 
     messages = ''
     for i in range(25):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
-    kafka_produce('json', [messages])
+    kafka_produce('json3', [messages])
 
     messages = ''
     for i in range(25, 50):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
-    kafka_produce('json', [messages])
+    kafka_produce('json3', [messages])
 
     result = ''
     for i in range(50):
diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference
index 0660a969f7f..6ee6017efd6 100644
--- a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference
+++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference
@@ -1,50 +1,50 @@
-	0	json	0	0
-	1	json	1	0
-	2	json	2	0
-	3	json	3	0
-	4	json	4	0
-	5	json	5	0
-	6	json	6	0
-	7	json	7	0
-	8	json	8	0
-	9	json	9	0
-	10	json	10	0
-	11	json	11	0
-	12	json	12	0
-	13	json	13	0
-	14	json	14	0
-	15	json	15	0
-	16	json	16	0
-	17	json	17	0
-	18	json	18	0
-	19	json	19	0
-	20	json	20	0
-	21	json	21	0
-	22	json	22	0
-	23	json	23	0
-	24	json	24	0
-	25	json	25	1
-	26	json	26	1
-	27	json	27	1
-	28	json	28	1
-	29	json	29	1
-	30	json	30	1
-	31	json	31	1
-	32	json	32	1
-	33	json	33	1
-	34	json	34	1
-	35	json	35	1
-	36	json	36	1
-	37	json	37	1
-	38	json	38	1
-	39	json	39	1
-	40	json	40	1
-	41	json	41	1
-	42	json	42	1
-	43	json	43	1
-	44	json	44	1
-	45	json	45	1
-	46	json	46	1
-	47	json	47	1
-	48	json	48	1
-	49	json	49	1
+	0	json3	0	0
+	1	json3	1	0
+	2	json3	2	0
+	3	json3	3	0
+	4	json3	4	0
+	5	json3	5	0
+	6	json3	6	0
+	7	json3	7	0
+	8	json3	8	0
+	9	json3	9	0
+	10	json3	10	0
+	11	json3	11	0
+	12	json3	12	0
+	13	json3	13	0
+	14	json3	14	0
+	15	json3	15	0
+	16	json3	16	0
+	17	json3	17	0
+	18	json3	18	0
+	19	json3	19	0
+	20	json3	20	0
+	21	json3	21	0
+	22	json3	22	0
+	23	json3	23	0
+	24	json3	24	0
+	25	json3	25	1
+	26	json3	26	1
+	27	json3	27	1
+	28	json3	28	1
+	29	json3	29	1
+	30	json3	30	1
+	31	json3	31	1
+	32	json3	32	1
+	33	json3	33	1
+	34	json3	34	1
+	35	json3	35	1
+	36	json3	36	1
+	37	json3	37	1
+	38	json3	38	1
+	39	json3	39	1
+	40	json3	40	1
+	41	json3	41	1
+	42	json3	42	1
+	43	json3	43	1
+	44	json3	44	1
+	45	json3	45	1
+	46	json3	46	1
+	47	json3	47	1
+	48	json3	48	1
+	49	json3	49	1

From ac3072cd9bcb7afeda1d0a86179970a8ec157ccf Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Fri, 21 Jun 2019 20:25:47 +0300
Subject: [PATCH 100/191] better shutdown of system_logs

---
 dbms/src/Interpreters/Context.cpp   | 9 ++++-----
 dbms/src/Interpreters/SystemLog.cpp | 6 ++++++
 dbms/src/Interpreters/SystemLog.h   | 2 ++
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index 5f18b7b3caa..a3eabdd165a 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -245,12 +245,11 @@ struct ContextShared
             return;
         shutdown_called = true;
 
+        /**  After system_logs have been shut down it is guaranteed that no system table gets created or written to.
+          *  Note that part changes at shutdown won't be logged to part log.
+          */
 
-        /** At this point, system logs will flush accumulated data, then shutdown their threads and no longer write any data.
-        * It will prevent recreation of system tables at shutdown.
-        * Note that part changes at shutdown won't be logged to part log.
-        */
-        system_logs.reset();
+        system_logs->shutdown();
 
         /** At this point, some tables may have threads that block our mutex.
           * To shutdown them correctly, we will copy the current list of tables,
diff --git a/dbms/src/Interpreters/SystemLog.cpp b/dbms/src/Interpreters/SystemLog.cpp
index 94214b26f6e..f46b348db7a 100644
--- a/dbms/src/Interpreters/SystemLog.cpp
+++ b/dbms/src/Interpreters/SystemLog.cpp
@@ -50,6 +50,12 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi
 
 
 SystemLogs::~SystemLogs()
+{
+    shutdown();
+}
+
+
+void SystemLogs::shutdown()
 {
     if (query_log)
         query_log->shutdown();
diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h
index 36c864ede03..48dbde5a38b 100644
--- a/dbms/src/Interpreters/SystemLog.h
+++ b/dbms/src/Interpreters/SystemLog.h
@@ -68,6 +68,8 @@ struct SystemLogs
     SystemLogs(Context & global_context, const Poco::Util::AbstractConfiguration & config);
     ~SystemLogs();
 
+    void shutdown();
+
     std::shared_ptr<QueryLog> query_log;                /// Used to log queries.
     std::shared_ptr<QueryThreadLog> query_thread_log;   /// Used to log query threads.
     std::shared_ptr<PartLog> part_log;                  /// Used to log operations with parts

From 892a82e5ffb377c47476c0b9cfde81e3a2e39de5 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 21 Jun 2019 20:43:39 +0300
Subject: [PATCH 101/191] Add test on virtual columns and materialized view

---
 .../integration/test_storage_kafka/test.py    | 39 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py
index f8514b908ba..66230455999 100644
--- a/dbms/tests/integration/test_storage_kafka/test.py
+++ b/dbms/tests/integration/test_storage_kafka/test.py
@@ -389,11 +389,48 @@ def test_kafka_virtual_columns(kafka_cluster):
     result = ''
     for i in range(50):
         result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka')
-        if kafka_check_result(result):
+        if kafka_check_result(result, False, 'test_kafka_virtual.reference'):
             break
     kafka_check_result(result, True, 'test_kafka_virtual.reference')
 
 
+def test_kafka_virtual_columns_with_materialized_view(kafka_cluster):
+    instance.query('''
+        DROP TABLE IF EXISTS test.view;
+        DROP TABLE IF EXISTS test.consumer;
+        CREATE TABLE test.kafka (key UInt64, value UInt64)
+            ENGINE = Kafka
+            SETTINGS
+                kafka_broker_list = 'kafka1:19092',
+                kafka_topic_list = 'json3',
+                kafka_group_name = 'json3',
+                kafka_format = 'JSONEachRow',
+                kafka_row_delimiter = '\\n';
+        CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64)
+            ENGINE = MergeTree()
+            ORDER BY key;
+        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
+            SELECT *, _topic, _offset FROM test.kafka;
+    ''')
+
+    messages = []
+    for i in range(50):
+        messages.append(json.dumps({'key': i, 'value': i}))
+    kafka_produce('json3', messages)
+
+    for i in range(20):
+        time.sleep(1)
+        result = instance.query('SELECT kafka_key, key, topic, value, offset FROM test.view')
+        if kafka_check_result(result, False, 'test_kafka_virtual.reference'):
+            break
+    kafka_check_result(result, True, 'test_kafka_virtual.reference')
+
+    instance.query('''
+        DROP TABLE test.consumer;
+        DROP TABLE test.view;
+    ''')
+
+
 if __name__ == '__main__':
     cluster.start()
     raw_input("Cluster created, press any key to destroy...")

From f34e4b53ce5be4f3a87005bec6c2791a1042ac4d Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 21 Jun 2019 21:34:24 +0300
Subject: [PATCH 102/191] Fix tests again

---
 dbms/tests/integration/test_storage_kafka/test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py
index 66230455999..082d9704020 100644
--- a/dbms/tests/integration/test_storage_kafka/test.py
+++ b/dbms/tests/integration/test_storage_kafka/test.py
@@ -388,6 +388,7 @@ def test_kafka_virtual_columns(kafka_cluster):
 
     result = ''
     for i in range(50):
+        time.sleep(1)
         result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka')
         if kafka_check_result(result, False, 'test_kafka_virtual.reference'):
             break
@@ -410,7 +411,7 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster):
             ENGINE = MergeTree()
             ORDER BY key;
         CREATE MATERIALIZED VIEW test.consumer TO test.view AS
-            SELECT *, _topic, _offset FROM test.kafka;
+            SELECT *, _key, _topic, _offset FROM test.kafka;
     ''')
 
     messages = []

From 53634a324e112075e82af253615faf4e74cad5a1 Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Fri, 21 Jun 2019 22:24:30 +0300
Subject: [PATCH 103/191] fix error with uninitialized system_logs

---
 dbms/src/Interpreters/Context.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index a3eabdd165a..0abf34c5170 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -249,7 +249,8 @@ struct ContextShared
           *  Note that part changes at shutdown won't be logged to part log.
           */
 
-        system_logs->shutdown();
+        if (system_logs)
+            system_logs->shutdown();
 
         /** At this point, some tables may have threads that block our mutex.
           * To shutdown them correctly, we will copy the current list of tables,

From 4f110bad2aacc87f277cded69a78a891e42237dd Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Sat, 22 Jun 2019 12:55:56 +0300
Subject: [PATCH 104/191] Fixed GCC minor version in libhdfs3-cmake.

---
 contrib/libhdfs3-cmake/CMake/Platform.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake
index 55fbf646589..ea00fa3f401 100644
--- a/contrib/libhdfs3-cmake/CMake/Platform.cmake
+++ b/contrib/libhdfs3-cmake/CMake/Platform.cmake
@@ -16,7 +16,7 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
     STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
     
     LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
-    LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MINOR)
+    LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
     
     SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
     SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")

From d6d10120c8f53812d7a9128f51bd839684d7bc94 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Sun, 23 Jun 2019 17:48:58 +0300
Subject: [PATCH 105/191] Refactor tests

---
 .../integration/test_storage_kafka/test.py    | 136 ++++++++----------
 .../test_kafka_virtual.reference              |  50 -------
 .../test_kafka_virtual1.reference             |  50 +++++++
 .../test_kafka_virtual2.reference             |  50 +++++++
 4 files changed, 163 insertions(+), 123 deletions(-)
 delete mode 100644 dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference
 create mode 100644 dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference
 create mode 100644 dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference

diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py
index 082d9704020..8e42a83459f 100644
--- a/dbms/tests/integration/test_storage_kafka/test.py
+++ b/dbms/tests/integration/test_storage_kafka/test.py
@@ -22,7 +22,6 @@ import kafka_pb2
 
 
 # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side.
-# TODO: add test for mat. view is working.
 # TODO: add test for SELECT LIMIT is working.
 # TODO: modify tests to respect `skip_broken_messages` setting.
 
@@ -148,13 +147,12 @@ def test_kafka_settings_new_syntax(kafka_cluster):
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'new',
-                kafka_group_name = 'new',
-                kafka_format = 'JSONEachRow',
-                kafka_row_delimiter = '\\n',
-                kafka_skip_broken_messages = 1;
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'new',
+                     kafka_group_name = 'new',
+                     kafka_format = 'JSONEachRow',
+                     kafka_row_delimiter = '\\n',
+                     kafka_skip_broken_messages = 1;
         ''')
 
     messages = []
@@ -172,7 +170,7 @@ def test_kafka_settings_new_syntax(kafka_cluster):
     kafka_produce('new', messages)
 
     result = ''
-    for i in range(50):
+    while True:
         result += instance.query('SELECT * FROM test.kafka')
         if kafka_check_result(result):
             break
@@ -183,12 +181,11 @@ def test_kafka_csv_with_delimiter(kafka_cluster):
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'csv',
-                kafka_group_name = 'csv',
-                kafka_format = 'CSV',
-                kafka_row_delimiter = '\\n';
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'csv',
+                     kafka_group_name = 'csv',
+                     kafka_format = 'CSV',
+                     kafka_row_delimiter = '\\n';
         ''')
 
     messages = []
@@ -197,7 +194,7 @@ def test_kafka_csv_with_delimiter(kafka_cluster):
     kafka_produce('csv', messages)
 
     result = ''
-    for i in range(50):
+    while True:
         result += instance.query('SELECT * FROM test.kafka')
         if kafka_check_result(result):
             break
@@ -208,12 +205,11 @@ def test_kafka_tsv_with_delimiter(kafka_cluster):
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'tsv',
-                kafka_group_name = 'tsv',
-                kafka_format = 'TSV',
-                kafka_row_delimiter = '\\n';
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'tsv',
+                     kafka_group_name = 'tsv',
+                     kafka_format = 'TSV',
+                     kafka_row_delimiter = '\\n';
         ''')
 
     messages = []
@@ -222,7 +218,7 @@ def test_kafka_tsv_with_delimiter(kafka_cluster):
     kafka_produce('tsv', messages)
 
     result = ''
-    for i in range(50):
+    while True:
         result += instance.query('SELECT * FROM test.kafka')
         if kafka_check_result(result):
             break
@@ -233,25 +229,24 @@ def test_kafka_json_without_delimiter(kafka_cluster):
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'json1',
-                kafka_group_name = 'json1',
-                kafka_format = 'JSONEachRow';
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'json',
+                     kafka_group_name = 'json',
+                     kafka_format = 'JSONEachRow';
         ''')
 
     messages = ''
     for i in range(25):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
-    kafka_produce('json1', [messages])
+    kafka_produce('json', [messages])
 
     messages = ''
     for i in range(25, 50):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
-    kafka_produce('json1', [messages])
+    kafka_produce('json', [messages])
 
     result = ''
-    for i in range(50):
+    while True:
         result += instance.query('SELECT * FROM test.kafka')
         if kafka_check_result(result):
             break
@@ -262,12 +257,11 @@ def test_kafka_protobuf(kafka_cluster):
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value String)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'pb',
-                kafka_group_name = 'pb',
-                kafka_format = 'Protobuf',
-                kafka_schema = 'kafka.proto:KeyValuePair';
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'pb',
+                     kafka_group_name = 'pb',
+                     kafka_format = 'Protobuf',
+                     kafka_schema = 'kafka.proto:KeyValuePair';
         ''')
 
     kafka_produce_protobuf_messages('pb', 0, 20)
@@ -275,7 +269,7 @@ def test_kafka_protobuf(kafka_cluster):
     kafka_produce_protobuf_messages('pb', 21, 29)
 
     result = ''
-    for i in range(50):
+    while True:
         result += instance.query('SELECT * FROM test.kafka')
         if kafka_check_result(result):
             break
@@ -288,12 +282,11 @@ def test_kafka_materialized_view(kafka_cluster):
         DROP TABLE IF EXISTS test.consumer;
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'json2',
-                kafka_group_name = 'json2',
-                kafka_format = 'JSONEachRow',
-                kafka_row_delimiter = '\\n';
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'mv',
+                     kafka_group_name = 'mv',
+                     kafka_format = 'JSONEachRow',
+                     kafka_row_delimiter = '\\n';
         CREATE TABLE test.view (key UInt64, value UInt64)
             ENGINE = MergeTree()
             ORDER BY key;
@@ -304,9 +297,9 @@ def test_kafka_materialized_view(kafka_cluster):
     messages = []
     for i in range(50):
         messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce('json2', messages)
+    kafka_produce('mv', messages)
 
-    for i in range(20):
+    while True:
         time.sleep(1)
         result = instance.query('SELECT * FROM test.view')
         if kafka_check_result(result):
@@ -331,12 +324,11 @@ def test_kafka_flush_on_big_message(kafka_cluster):
         DROP TABLE IF EXISTS test.consumer;
         CREATE TABLE test.kafka (key UInt64, value String)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'flush',
-                kafka_group_name = 'flush',
-                kafka_format = 'JSONEachRow',
-                kafka_max_block_size = 10;
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'flush',
+                     kafka_group_name = 'flush',
+                     kafka_format = 'JSONEachRow',
+                     kafka_max_block_size = 10;
         CREATE TABLE test.view (key UInt64, value String)
             ENGINE = MergeTree
             ORDER BY key;
@@ -356,7 +348,7 @@ def test_kafka_flush_on_big_message(kafka_cluster):
         except kafka.errors.GroupCoordinatorNotAvailableError:
             continue
 
-    for _ in range(20):
+    while True:
         time.sleep(1)
         result = instance.query('SELECT count() FROM test.view')
         if int(result) == kafka_messages*batch_messages:
@@ -369,30 +361,29 @@ def test_kafka_virtual_columns(kafka_cluster):
     instance.query('''
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'json3',
-                kafka_group_name = 'json3',
-                kafka_format = 'JSONEachRow';
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'virt1',
+                     kafka_group_name = 'virt1',
+                     kafka_format = 'JSONEachRow';
         ''')
 
     messages = ''
     for i in range(25):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
-    kafka_produce('json3', [messages])
+    kafka_produce('virt1', [messages])
 
     messages = ''
     for i in range(25, 50):
         messages += json.dumps({'key': i, 'value': i}) + '\n'
-    kafka_produce('json3', [messages])
+    kafka_produce('virt1', [messages])
 
     result = ''
-    for i in range(50):
+    while True:
         time.sleep(1)
         result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka')
-        if kafka_check_result(result, False, 'test_kafka_virtual.reference'):
+        if kafka_check_result(result, False, 'test_kafka_virtual1.reference'):
             break
-    kafka_check_result(result, True, 'test_kafka_virtual.reference')
+    kafka_check_result(result, True, 'test_kafka_virtual1.reference')
 
 
 def test_kafka_virtual_columns_with_materialized_view(kafka_cluster):
@@ -401,12 +392,11 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster):
         DROP TABLE IF EXISTS test.consumer;
         CREATE TABLE test.kafka (key UInt64, value UInt64)
             ENGINE = Kafka
-            SETTINGS
-                kafka_broker_list = 'kafka1:19092',
-                kafka_topic_list = 'json3',
-                kafka_group_name = 'json3',
-                kafka_format = 'JSONEachRow',
-                kafka_row_delimiter = '\\n';
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                     kafka_topic_list = 'virt2',
+                     kafka_group_name = 'virt2',
+                     kafka_format = 'JSONEachRow',
+                     kafka_row_delimiter = '\\n';
         CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64)
             ENGINE = MergeTree()
             ORDER BY key;
@@ -417,14 +407,14 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster):
     messages = []
     for i in range(50):
         messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce('json3', messages)
+    kafka_produce('virt2', messages)
 
-    for i in range(20):
+    while True:
         time.sleep(1)
         result = instance.query('SELECT kafka_key, key, topic, value, offset FROM test.view')
-        if kafka_check_result(result, False, 'test_kafka_virtual.reference'):
+        if kafka_check_result(result, False, 'test_kafka_virtual2.reference'):
             break
-    kafka_check_result(result, True, 'test_kafka_virtual.reference')
+    kafka_check_result(result, True, 'test_kafka_virtual2.reference')
 
     instance.query('''
         DROP TABLE test.consumer;
diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference
deleted file mode 100644
index 6ee6017efd6..00000000000
--- a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference
+++ /dev/null
@@ -1,50 +0,0 @@
-	0	json3	0	0
-	1	json3	1	0
-	2	json3	2	0
-	3	json3	3	0
-	4	json3	4	0
-	5	json3	5	0
-	6	json3	6	0
-	7	json3	7	0
-	8	json3	8	0
-	9	json3	9	0
-	10	json3	10	0
-	11	json3	11	0
-	12	json3	12	0
-	13	json3	13	0
-	14	json3	14	0
-	15	json3	15	0
-	16	json3	16	0
-	17	json3	17	0
-	18	json3	18	0
-	19	json3	19	0
-	20	json3	20	0
-	21	json3	21	0
-	22	json3	22	0
-	23	json3	23	0
-	24	json3	24	0
-	25	json3	25	1
-	26	json3	26	1
-	27	json3	27	1
-	28	json3	28	1
-	29	json3	29	1
-	30	json3	30	1
-	31	json3	31	1
-	32	json3	32	1
-	33	json3	33	1
-	34	json3	34	1
-	35	json3	35	1
-	36	json3	36	1
-	37	json3	37	1
-	38	json3	38	1
-	39	json3	39	1
-	40	json3	40	1
-	41	json3	41	1
-	42	json3	42	1
-	43	json3	43	1
-	44	json3	44	1
-	45	json3	45	1
-	46	json3	46	1
-	47	json3	47	1
-	48	json3	48	1
-	49	json3	49	1
diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference
new file mode 100644
index 00000000000..5956210d25e
--- /dev/null
+++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference
@@ -0,0 +1,50 @@
+	0	virt1	0	0
+	1	virt1	1	0
+	2	virt1	2	0
+	3	virt1	3	0
+	4	virt1	4	0
+	5	virt1	5	0
+	6	virt1	6	0
+	7	virt1	7	0
+	8	virt1	8	0
+	9	virt1	9	0
+	10	virt1	10	0
+	11	virt1	11	0
+	12	virt1	12	0
+	13	virt1	13	0
+	14	virt1	14	0
+	15	virt1	15	0
+	16	virt1	16	0
+	17	virt1	17	0
+	18	virt1	18	0
+	19	virt1	19	0
+	20	virt1	20	0
+	21	virt1	21	0
+	22	virt1	22	0
+	23	virt1	23	0
+	24	virt1	24	0
+	25	virt1	25	1
+	26	virt1	26	1
+	27	virt1	27	1
+	28	virt1	28	1
+	29	virt1	29	1
+	30	virt1	30	1
+	31	virt1	31	1
+	32	virt1	32	1
+	33	virt1	33	1
+	34	virt1	34	1
+	35	virt1	35	1
+	36	virt1	36	1
+	37	virt1	37	1
+	38	virt1	38	1
+	39	virt1	39	1
+	40	virt1	40	1
+	41	virt1	41	1
+	42	virt1	42	1
+	43	virt1	43	1
+	44	virt1	44	1
+	45	virt1	45	1
+	46	virt1	46	1
+	47	virt1	47	1
+	48	virt1	48	1
+	49	virt1	49	1
diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference
new file mode 100644
index 00000000000..c20dc3513a0
--- /dev/null
+++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference
@@ -0,0 +1,50 @@
+	0	virt2	0	0
+	1	virt2	1	0
+	2	virt2	2	0
+	3	virt2	3	0
+	4	virt2	4	0
+	5	virt2	5	0
+	6	virt2	6	0
+	7	virt2	7	0
+	8	virt2	8	0
+	9	virt2	9	0
+	10	virt2	10	0
+	11	virt2	11	0
+	12	virt2	12	0
+	13	virt2	13	0
+	14	virt2	14	0
+	15	virt2	15	0
+	16	virt2	16	0
+	17	virt2	17	0
+	18	virt2	18	0
+	19	virt2	19	0
+	20	virt2	20	0
+	21	virt2	21	0
+	22	virt2	22	0
+	23	virt2	23	0
+	24	virt2	24	0
+	25	virt2	25	1
+	26	virt2	26	1
+	27	virt2	27	1
+	28	virt2	28	1
+	29	virt2	29	1
+	30	virt2	30	1
+	31	virt2	31	1
+	32	virt2	32	1
+	33	virt2	33	1
+	34	virt2	34	1
+	35	virt2	35	1
+	36	virt2	36	1
+	37	virt2	37	1
+	38	virt2	38	1
+	39	virt2	39	1
+	40	virt2	40	1
+	41	virt2	41	1
+	42	virt2	42	1
+	43	virt2	43	1
+	44	virt2	44	1
+	45	virt2	45	1
+	46	virt2	46	1
+	47	virt2	47	1
+	48	virt2	48	1
+	49	virt2	49	1

From 56a759525407c05ebe4b8f468f15675fcb8dd127 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 24 Jun 2019 13:47:17 +0300
Subject: [PATCH 106/191] Slightly speed up

---
 dbms/src/Functions/URL/domain.h | 34 +++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index af71b9fdc29..65c5a5fa9e7 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -8,6 +8,23 @@
 namespace DB
 {
 
+namespace {
+
+inline StringRef buildFound(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host)
+{
+    if (!dot_pos || start_of_host >= pos)
+        return StringRef{};
+
+    auto after_dot = *(dot_pos + 1);
+    if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#')
+        return StringRef{};
+
+
+    return StringRef(start_of_host, pos - start_of_host);
+}
+
+}
+
 /// Extracts host from given url.
 inline StringRef getURLHost(const char * data, size_t size)
 {
@@ -31,8 +48,7 @@ inline StringRef getURLHost(const char * data, size_t size)
 
     auto start_of_host = pos;
     Pos dot_pos = nullptr;
-    bool exit_loop = false;
-    for (; pos < end && !exit_loop; ++pos)
+    for (; pos < end; ++pos)
     {
         switch(*pos)
         {
@@ -43,8 +59,7 @@ inline StringRef getURLHost(const char * data, size_t size)
         case '/':
         case '?':
         case '#':
-            exit_loop = true;
-            break;
+            return buildFound(pos, dot_pos, start_of_host);
         case '@': /// myemail@gmail.com
             start_of_host = pos;
             break;
@@ -68,16 +83,7 @@ inline StringRef getURLHost(const char * data, size_t size)
         }
     }
 
-    if (!dot_pos || start_of_host >= pos)
-        return StringRef{};
-
-    /// if end found immediately after dot
-    char after_dot = *(dot_pos + 1);
-    if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#')
-        return StringRef{};
-
-
-    return StringRef(start_of_host, pos - start_of_host);
+    return buildFound(pos, dot_pos, start_of_host);
 }
 
 template <bool without_www>

From 088401b35f55ab99a73a6a95e074e2bc3a5c10b6 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Mon, 24 Jun 2019 13:53:06 +0300
Subject: [PATCH 107/191] Add helpful option to docker-compose invocation

---
 dbms/tests/integration/helpers/cluster.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index 157ba616246..5743625a8cd 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -338,30 +338,32 @@ class ClickHouseCluster:
 
         self.docker_client = docker.from_env(version=self.docker_api_version)
 
+        common_opts = ['up', '-d', '--force-recreate', '--renew-anon-volumes']
+
         if self.with_zookeeper and self.base_zookeeper_cmd:
-            subprocess_check_call(self.base_zookeeper_cmd + ['up', '-d', '--force-recreate'])
+            subprocess_check_call(self.base_zookeeper_cmd + common_opts)
             for command in self.pre_zookeeper_commands:
                 self.run_kazoo_commands_with_retries(command, repeats=5)
             self.wait_zookeeper_to_start(120)
 
         if self.with_mysql and self.base_mysql_cmd:
-            subprocess_check_call(self.base_mysql_cmd + ['up', '-d', '--force-recreate'])
+            subprocess_check_call(self.base_mysql_cmd+ common_opts)
             self.wait_mysql_to_start(120)
 
         if self.with_postgres and self.base_postgres_cmd:
-            subprocess_check_call(self.base_postgres_cmd + ['up', '-d', '--force-recreate'])
+            subprocess_check_call(self.base_postgres_cmd+ common_opts)
             self.wait_postgres_to_start(120)
 
         if self.with_kafka and self.base_kafka_cmd:
-            subprocess_check_call(self.base_kafka_cmd + ['up', '-d', '--force-recreate'])
+            subprocess_check_call(self.base_kafka_cmd+ common_opts)
             self.kafka_docker_id = self.get_instance_docker_id('kafka1')
 
         if self.with_hdfs and self.base_hdfs_cmd:
-            subprocess_check_call(self.base_hdfs_cmd + ['up', '-d', '--force-recreate'])
+            subprocess_check_call(self.base_hdfs_cmd+ common_opts)
             self.wait_hdfs_to_start(120)
 
         if self.with_mongo and self.base_mongo_cmd:
-            subprocess_check_call(self.base_mongo_cmd + ['up', '-d', '--force-recreate'])
+            subprocess_check_call(self.base_mongo_cmd+ common_opts)
             self.wait_mongo_to_start(30)
 
         subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate'])

From 331e17d56a3d9890b086fe88081e12f0ffd23916 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 24 Jun 2019 14:16:08 +0300
Subject: [PATCH 108/191] Return scheme logic

---
 dbms/src/Functions/URL/domain.h | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 65c5a5fa9e7..d6d0409e0df 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -19,7 +19,6 @@ inline StringRef buildFound(const Pos & pos, const Pos & dot_pos, const Pos & st
     if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#')
         return StringRef{};
 
-
     return StringRef(start_of_host, pos - start_of_host);
 }
 
@@ -34,16 +33,26 @@ inline StringRef getURLHost(const char * data, size_t size)
     if (*(end - 1) == '.')
         return StringRef{};
 
-    StringRef scheme = getURLScheme(data, size);
-    if (scheme.size != 0)
+
+    Pos slash_pos = find_first_symbols<'/'>(pos, end);
+    if (slash_pos != end)
+        pos = slash_pos;
+    else
+        pos = data;
+
+    if (pos != data)
     {
+        StringRef scheme = getURLScheme(data, size);
         Pos scheme_end = data + scheme.size;
-        pos = scheme_end + 1;
-        if (*scheme_end != ':' || *pos != '/')
+        if (pos - scheme_end != 1 || *scheme_end != ':')
+        {
+            std::cerr << "RETURNING HERE\n";
             return StringRef{};
+        }
     }
 
-    if (end - pos > 2 && *pos == '/' && *(pos + 1) == '/')
+    // Check with we still have // character from the scheme
+    if (end - pos > 2 && *(pos) == '/' && *(pos + 1) == '/')
         pos += 2;
 
     auto start_of_host = pos;
@@ -61,7 +70,7 @@ inline StringRef getURLHost(const char * data, size_t size)
         case '#':
             return buildFound(pos, dot_pos, start_of_host);
         case '@': /// myemail@gmail.com
-            start_of_host = pos;
+            start_of_host = pos + 1;
             break;
         case ' ': /// restricted symbols
         case '\t':

From 5b378a3f01e06990135cb3d1800ce542062d1e90 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 24 Jun 2019 14:18:53 +0300
Subject: [PATCH 109/191] Remove degug info

---
 dbms/src/Functions/URL/domain.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index d6d0409e0df..8fdd24159ec 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -45,10 +45,7 @@ inline StringRef getURLHost(const char * data, size_t size)
         StringRef scheme = getURLScheme(data, size);
         Pos scheme_end = data + scheme.size;
         if (pos - scheme_end != 1 || *scheme_end != ':')
-        {
-            std::cerr << "RETURNING HERE\n";
             return StringRef{};
-        }
     }
 
     // Check with we still have // character from the scheme

From c6ece40f3c9fbdc2d232f882edaeaa0fd00a4396 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 24 Jun 2019 16:04:20 +0300
Subject: [PATCH 110/191] Fix minor bug and style

---
 dbms/src/Functions/URL/domain.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 8fdd24159ec..9b21b11253e 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -8,11 +8,12 @@
 namespace DB
 {
 
-namespace {
+namespace
+{
 
 inline StringRef buildFound(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host)
 {
-    if (!dot_pos || start_of_host >= pos)
+    if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1)
         return StringRef{};
 
     auto after_dot = *(dot_pos + 1);
@@ -30,10 +31,6 @@ inline StringRef getURLHost(const char * data, size_t size)
     Pos pos = data;
     Pos end = data + size;
 
-    if (*(end - 1) == '.')
-        return StringRef{};
-
-
     Pos slash_pos = find_first_symbols<'/'>(pos, end);
     if (slash_pos != end)
         pos = slash_pos;
@@ -56,7 +53,7 @@ inline StringRef getURLHost(const char * data, size_t size)
     Pos dot_pos = nullptr;
     for (; pos < end; ++pos)
     {
-        switch(*pos)
+        switch (*pos)
         {
         case '.':
             dot_pos = pos;

From 48451b2b0fcfb6eb49e5f2ee389c6aff37ff8e38 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Mon, 24 Jun 2019 22:00:40 +0300
Subject: [PATCH 111/191] Better name

---
 dbms/src/Functions/URL/domain.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 9b21b11253e..edbb629ae95 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -11,7 +11,7 @@ namespace DB
 namespace
 {
 
-inline StringRef buildFound(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host)
+inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host)
 {
     if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1)
         return StringRef{};
@@ -62,7 +62,7 @@ inline StringRef getURLHost(const char * data, size_t size)
         case '/':
         case '?':
         case '#':
-            return buildFound(pos, dot_pos, start_of_host);
+            return checkAndReturnHost(pos, dot_pos, start_of_host);
         case '@': /// myemail@gmail.com
             start_of_host = pos + 1;
             break;
@@ -86,7 +86,7 @@ inline StringRef getURLHost(const char * data, size_t size)
         }
     }
 
-    return buildFound(pos, dot_pos, start_of_host);
+    return checkAndReturnHost(pos, dot_pos, start_of_host);
 }
 
 template <bool without_www>

From 0b28e73f500e3d3e0f85c92f14b0215b1a6a3cb0 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Tue, 25 Jun 2019 11:23:36 +0300
Subject: [PATCH 112/191] Fix unblundled build

---
 dbms/src/Functions/geoToH3.cpp           | 3 +++
 dbms/src/Functions/registerFunctions.cpp | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 2adb6ead584..bc2b44514ee 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -1,3 +1,5 @@
+#if USE_H3
+
 #include <array>
 #include <math.h>
 #include <Columns/ColumnConst.h>
@@ -164,3 +166,4 @@ void registerFunctionGeoToH3(FunctionFactory & factory)
 }
 
 }
+#endif
diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp
index 5859506627e..c48fa1004e0 100644
--- a/dbms/src/Functions/registerFunctions.cpp
+++ b/dbms/src/Functions/registerFunctions.cpp
@@ -42,7 +42,10 @@ void registerFunctionsNull(FunctionFactory &);
 void registerFunctionsFindCluster(FunctionFactory &);
 void registerFunctionsJSON(FunctionFactory &);
 void registerFunctionTransform(FunctionFactory &);
+
+#if USE_H3
 void registerFunctionGeoToH3(FunctionFactory &);
+#endif
 
 #if USE_ICU
 void registerFunctionConvertCharset(FunctionFactory &);
@@ -86,7 +89,10 @@ void registerFunctions()
     registerFunctionsFindCluster(factory);
     registerFunctionsJSON(factory);
     registerFunctionTransform(factory);
+
+#if USE_H3
     registerFunctionGeoToH3(factory);
+#endif
 
 #if USE_ICU
     registerFunctionConvertCharset(factory);

From fa88954e5618336fe2f58fafb93d40786013db01 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 25 Jun 2019 12:04:35 +0300
Subject: [PATCH 113/191] Better scheme cut

---
 dbms/src/Functions/URL/domain.h                      | 12 ++++--------
 .../1_stateful/00038_uniq_state_merge2.reference     | 10 +++++-----
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index edbb629ae95..16c154cde1f 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -32,23 +32,19 @@ inline StringRef getURLHost(const char * data, size_t size)
     Pos end = data + size;
 
     Pos slash_pos = find_first_symbols<'/'>(pos, end);
-    if (slash_pos != end)
-        pos = slash_pos;
+    if (slash_pos < end - 1 && *(slash_pos + 1) == '/')
+        pos = slash_pos + 2;
     else
         pos = data;
 
     if (pos != data)
     {
-        StringRef scheme = getURLScheme(data, size);
+        StringRef scheme = getURLScheme(data, end - pos);
         Pos scheme_end = data + scheme.size;
-        if (pos - scheme_end != 1 || *scheme_end != ':')
+        if (scheme.size && (pos - scheme_end != 3 || *scheme_end != ':'))
             return StringRef{};
     }
 
-    // Check with we still have // character from the scheme
-    if (end - pos > 2 && *(pos) == '/' && *(pos + 1) == '/')
-        pos += 2;
-
     auto start_of_host = pos;
     Pos dot_pos = nullptr;
     for (; pos < end; ++pos)
diff --git a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
index 575d19b2ebf..9144afd90b2 100644
--- a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
+++ b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference
@@ -1,16 +1,16 @@
-ru	262911	69218
+ru	262914	69218
 	92101	89421
-com	63297	30285
+com	63298	30285
 ua	29037	17475
-html	25077	15037
+html	25079	15039
 tr	16770	11857
 net	16387	11686
-php	14373	10307
+php	14374	10307
 yandsearch	12024	9484
 by	8192	6915
 yandex	7211	6124
 org	4890	4514
-kz	4677	4209
+kz	4679	4211
 tv	4400	3928
 su	2602	2396
 phtml	2409	2226

From fff18f78db5ae6212b79dfe9fc6332ac70ec6f42 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 25 Jun 2019 12:12:28 +0300
Subject: [PATCH 114/191] Fix tail detection

---
 dbms/src/Functions/URL/domain.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 16c154cde1f..540072dd045 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -39,7 +39,7 @@ inline StringRef getURLHost(const char * data, size_t size)
 
     if (pos != data)
     {
-        StringRef scheme = getURLScheme(data, end - pos);
+        StringRef scheme = getURLScheme(data, pos - data - 2);
         Pos scheme_end = data + scheme.size;
         if (scheme.size && (pos - scheme_end != 3 || *scheme_end != ':'))
             return StringRef{};

From 6f6c1167bcde6b075b32bcac44bf17b169f76ae8 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Tue, 25 Jun 2019 12:44:55 +0300
Subject: [PATCH 115/191] Fix

---
 dbms/src/Functions/config_functions.h.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Functions/config_functions.h.in b/dbms/src/Functions/config_functions.h.in
index a6b5e9790c0..7d395741b78 100644
--- a/dbms/src/Functions/config_functions.h.in
+++ b/dbms/src/Functions/config_functions.h.in
@@ -8,3 +8,4 @@
 #cmakedefine01 USE_HYPERSCAN
 #cmakedefine01 USE_SIMDJSON
 #cmakedefine01 USE_RAPIDJSON
+#cmakedefine01 USE_H3

From 8ad592dd07cdbc5cdaed3390c0d885e46e681d41 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Tue, 25 Jun 2019 14:27:39 +0300
Subject: [PATCH 116/191] Fix??

---
 .../Storages/System/StorageSystemBuildOptions.generated.cpp.in   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
index 758408114a8..1ee9803dda3 100644
--- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
+++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
@@ -41,6 +41,7 @@ const char * auto_config_build[]
     "USE_LFALLOC_RANDOM_HINT", "@USE_LFALLOC_RANDOM_HINT@",
     "USE_UNWIND", "@USE_UNWIND@",
     "USE_ICU", "@USE_ICU@",
+    "USE_H3", "@USE_H3@",
     "USE_MYSQL", "@USE_MYSQL@",
     "USE_RE2_ST", "@USE_RE2_ST@",
     "USE_VECTORCLASS", "@USE_VECTORCLASS@",

From 737abcdbfc5e34bcb928c0d63c04af555c68cf1d Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Tue, 25 Jun 2019 15:43:07 +0300
Subject: [PATCH 117/191] Finally fix??

---
 dbms/src/Functions/geoToH3.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index bc2b44514ee..6621bc40b42 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -1,5 +1,5 @@
+#include "config_functions.h"
 #if USE_H3
-
 #include <array>
 #include <math.h>
 #include <Columns/ColumnConst.h>

From ff72cf48933efaec5ef83b776c7a9df585f0fd0e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 25 Jun 2019 18:54:47 +0300
Subject: [PATCH 118/191] Trying to do everything in one pass

---
 dbms/src/Functions/URL/domain.h               | 54 ++++++++++++++-----
 .../00044_any_left_join_string.reference      |  2 +-
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 540072dd045..74a41811ebd 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -3,6 +3,7 @@
 #include "protocol.h"
 #include <common/find_symbols.h>
 #include <cstring>
+#include <Common/StringUtils/StringUtils.h>
 
 
 namespace DB
@@ -31,22 +32,51 @@ inline StringRef getURLHost(const char * data, size_t size)
     Pos pos = data;
     Pos end = data + size;
 
-    Pos slash_pos = find_first_symbols<'/'>(pos, end);
-    if (slash_pos < end - 1 && *(slash_pos + 1) == '/')
-        pos = slash_pos + 2;
-    else
-        pos = data;
-
-    if (pos != data)
+    if (*pos == '/' && *(pos + 1) == '/')
+        pos += 2;
+    else if (isAlphaASCII(*pos)) /// Slightly modified getURLScheme
     {
-        StringRef scheme = getURLScheme(data, pos - data - 2);
-        Pos scheme_end = data + scheme.size;
-        if (scheme.size && (pos - scheme_end != 3 || *scheme_end != ':'))
-            return StringRef{};
+        for (++pos; pos < end; ++pos)
+        {
+            if (!isAlphaNumericASCII(*pos))
+            {
+                switch(*pos)
+                {
+                case '.':
+                case '-':
+                case '+':
+                    break;
+                case ' ': /// restricted symbols
+                case '\t':
+                case '<':
+                case '>':
+                case '%':
+                case '{':
+                case '}':
+                case '|':
+                case '\\':
+                case '^':
+                case '~':
+                case '[':
+                case ']':
+                case ';':
+                case '=':
+                case '&':
+                    return StringRef{};
+                default:
+                    goto exit_loop;
+                }
+            }
+        }
+ exit_loop:;
+        if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/')
+            pos += 3;
+        else
+            pos = data;
     }
 
-    auto start_of_host = pos;
     Pos dot_pos = nullptr;
+    auto start_of_host = pos;
     for (; pos < end; ++pos)
     {
         switch (*pos)
diff --git a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
index 05e97417263..364115011f9 100644
--- a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
+++ b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference
@@ -1,4 +1,4 @@
-	4508175	712434
+	4508153	712428
 auto.ru	576845	8935
 yandex.ru	410776	111278
 korer.ru	277987	0

From bd56f219aba3974ebad19f8762a31d6b413de270 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 25 Jun 2019 19:15:00 +0300
Subject: [PATCH 119/191] Fix style

---
 dbms/src/Functions/URL/domain.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 74a41811ebd..43f99092b13 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -40,7 +40,7 @@ inline StringRef getURLHost(const char * data, size_t size)
         {
             if (!isAlphaNumericASCII(*pos))
             {
-                switch(*pos)
+                switch (*pos)
                 {
                 case '.':
                 case '-':
@@ -68,8 +68,7 @@ inline StringRef getURLHost(const char * data, size_t size)
                 }
             }
         }
- exit_loop:;
-        if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/')
+exit_loop: if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/')
             pos += 3;
         else
             pos = data;

From d933b024bd6b59450ae12f2fe5d0ad2ce5f2c8a4 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Wed, 26 Jun 2019 00:49:42 +0300
Subject: [PATCH 120/191] Fix now???

---
 dbms/src/Functions/registerFunctions.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp
index 02619fc3e3a..88f549ea01b 100644
--- a/dbms/src/Functions/registerFunctions.cpp
+++ b/dbms/src/Functions/registerFunctions.cpp
@@ -1,6 +1,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/registerFunctions.h>
 #include "config_core.h"
+#include "config_functions.h"
 
 namespace DB
 {

From 6bc851b74b3a4c2aa9384312e7299a90d7245651 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Wed, 26 Jun 2019 02:06:00 +0300
Subject: [PATCH 121/191] Compile h3 in docker

---
 docker/packager/deb/Dockerfile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 4e989494165..6f6bbf1c0b5 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -9,6 +9,7 @@ RUN apt-get --allow-unauthenticated update -y \
             cmake \
             ccache \
             curl \
+            libtool \
             software-properties-common
 
 RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list
@@ -69,5 +70,7 @@ RUN apt-get --allow-unauthenticated update -y \
             tzdata \
             gperf
 
+RUN git clone https://github.com/uber/h3 && cd h3 && cmake . && make && make install && cd .. && rm -rf h3
+
 COPY build.sh /
 CMD ["/bin/bash", "/build.sh"]

From 7a5979cc0a041ed47e9894a5e2fc0a8bb99df3da Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Wed, 26 Jun 2019 02:11:28 +0300
Subject: [PATCH 122/191] Fix bug

---
 dbms/src/Functions/geoToH3.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 6621bc40b42..7cc89357fc0 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -110,7 +110,6 @@ public:
         {
             const auto col_vec_lat = static_cast<const ColumnVector<Float64> *>(col_lat);
             const auto col_vec_lon = static_cast<const ColumnVector<Float64> *>(col_lon);
-            const auto col_vec_res = static_cast<const ColumnVector<UInt8> *>(col_res);
 
             auto dst = ColumnVector<UInt64>::create();
             auto & dst_data = dst->getData();
@@ -122,6 +121,7 @@ public:
                 const double lon = col_vec_lon->getData()[row];
                 if (!is_const_resulution)
                 {
+                    const auto col_vec_res = static_cast<const ColumnVector<UInt8> *>(col_res);
                     resolution = col_vec_res->getData()[row];
                 }
 

From f740334ee5a0bf2693fc3d736e9fe263c3306bc3 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 26 Jun 2019 13:18:12 +0300
Subject: [PATCH 123/191] Faster scheme search

Add docs
---
 dbms/src/Functions/URL/domain.h               | 52 +++++--------------
 .../query_language/functions/url_functions.md |  2 +-
 .../query_language/functions/url_functions.md |  2 +-
 3 files changed, 14 insertions(+), 42 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 43f99092b13..fe9e8f34266 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -4,7 +4,7 @@
 #include <common/find_symbols.h>
 #include <cstring>
 #include <Common/StringUtils/StringUtils.h>
-
+#include <Common/StringSearcher.h>
 
 namespace DB
 {
@@ -12,6 +12,8 @@ namespace DB
 namespace
 {
 
+const ASCIICaseSensitiveStringSearcher SCHEME_SEARCHER{"://", 3};
+
 inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host)
 {
     if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1)
@@ -33,45 +35,15 @@ inline StringRef getURLHost(const char * data, size_t size)
     Pos end = data + size;
 
     if (*pos == '/' && *(pos + 1) == '/')
-        pos += 2;
-    else if (isAlphaASCII(*pos)) /// Slightly modified getURLScheme
     {
-        for (++pos; pos < end; ++pos)
-        {
-            if (!isAlphaNumericASCII(*pos))
-            {
-                switch (*pos)
-                {
-                case '.':
-                case '-':
-                case '+':
-                    break;
-                case ' ': /// restricted symbols
-                case '\t':
-                case '<':
-                case '>':
-                case '%':
-                case '{':
-                case '}':
-                case '|':
-                case '\\':
-                case '^':
-                case '~':
-                case '[':
-                case ']':
-                case ';':
-                case '=':
-                case '&':
-                    return StringRef{};
-                default:
-                    goto exit_loop;
-                }
-            }
-        }
-exit_loop: if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/')
-            pos += 3;
-        else
-            pos = data;
+        pos += 2;
+    }
+    else
+    {
+        size_t max_scheme_size = std::min(size, 16UL);
+        Pos scheme_end = reinterpret_cast<Pos>(SCHEME_SEARCHER.search(reinterpret_cast<const UInt8 *>(data), max_scheme_size));
+        if (scheme_end != data + max_scheme_size)
+            pos = scheme_end + 3;
     }
 
     Pos dot_pos = nullptr;
@@ -91,7 +63,7 @@ exit_loop: if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) =
         case '@': /// myemail@gmail.com
             start_of_host = pos + 1;
             break;
-        case ' ': /// restricted symbols
+        case ' ': /// restricted symbols in whole URL
         case '\t':
         case '<':
         case '>':
diff --git a/docs/en/query_language/functions/url_functions.md b/docs/en/query_language/functions/url_functions.md
index 19b12bd5b21..1f9ee0f928d 100644
--- a/docs/en/query_language/functions/url_functions.md
+++ b/docs/en/query_language/functions/url_functions.md
@@ -12,7 +12,7 @@ Returns the protocol. Examples: http, ftp, mailto, magnet...
 
 ### domain
 
-Gets the domain.
+Gets the domain. Cut scheme by substring '://'. Size of cutted scheme is less than 16 bytes. Scheme correctness is not checked.
 
 ### domainWithoutWWW
 
diff --git a/docs/ru/query_language/functions/url_functions.md b/docs/ru/query_language/functions/url_functions.md
index 4b4fdc9adda..1c209c95e80 100644
--- a/docs/ru/query_language/functions/url_functions.md
+++ b/docs/ru/query_language/functions/url_functions.md
@@ -10,7 +10,7 @@
 Возвращает протокол. Примеры: http, ftp, mailto, magnet...
 
 ### domain
-Возвращает домен.
+Возвращает домен. Отсекает схему по подстроке '://'. Размер схемы не более 16 байт. Корректность схемы не проверяется.
 
 ### domainWithoutWWW
 Возвращает домен, удалив не более одного 'www.' с начала, если есть.

From b324a9333dabc2052384d42b24c2f902ab253e21 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Wed, 26 Jun 2019 14:52:36 +0300
Subject: [PATCH 124/191] Set include path

---
 cmake/find_h3.cmake | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake
index 7f19157f978..9417dcb1df5 100644
--- a/cmake/find_h3.cmake
+++ b/cmake/find_h3.cmake
@@ -1,5 +1,7 @@
 option (USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of bundled" ${NOT_UNBUNDLED})
 
+set (H3_INCLUDE_PATHS /usr/local/include/h3)
+
 if (USE_INTERNAL_H3_LIBRARY)
     set (H3_LIBRARY h3)
     set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include)

From 30c7055d3b4dbda81021dfb6d77af686ff610917 Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Wed, 26 Jun 2019 17:15:29 +0300
Subject: [PATCH 125/191] Fix

---
 cmake/find_h3.cmake            | 2 +-
 dbms/src/Functions/geoToH3.cpp | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake
index 9417dcb1df5..802f5aff05e 100644
--- a/cmake/find_h3.cmake
+++ b/cmake/find_h3.cmake
@@ -7,7 +7,7 @@ if (USE_INTERNAL_H3_LIBRARY)
     set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include)
 else ()
     find_library (H3_LIBRARY h3)
-    find_path (H3_INCLUDE_DIR NAMES geoCoord.h PATHS ${H3_INCLUDE_PATHS})
+    find_path (H3_INCLUDE_DIR NAMES h3api.h PATHS ${H3_INCLUDE_PATHS})
 endif ()
 
 if (H3_LIBRARY AND H3_INCLUDE_DIR)
diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 7cc89357fc0..74f30f3df93 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -12,7 +12,7 @@
 
 
 extern "C" {
-#include <h3Index.h>
+#include <h3api.h>
 }
 
 namespace DB
@@ -126,7 +126,8 @@ public:
                 }
 
                 GeoCoord coord;
-                setGeoDegs(&coord, lat, lon);
+                coord.lat = H3_EXPORT(degsToRads)(lat);
+                coord.lon = H3_EXPORT(degsToRads)(lon);
 
                 H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution);
 
@@ -144,7 +145,8 @@ public:
             const double lon = col_const_lon->getValue<Float64>();
 
             GeoCoord coord;
-            setGeoDegs(&coord, lat, lon);
+            coord.lat = H3_EXPORT(degsToRads)(lat);
+            coord.lon = H3_EXPORT(degsToRads)(lon);
             H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution);
 
             block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex);

From 306e27c152805825e681d590d8e043caa909ead6 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 26 Jun 2019 18:13:29 +0300
Subject: [PATCH 126/191] Return old behaviour

---
 dbms/src/Functions/URL/domain.h               | 41 +++++++++++++++++--
 .../query_language/functions/url_functions.md |  2 +-
 .../query_language/functions/url_functions.md |  2 +-
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index fe9e8f34266..88ca94cfd33 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -40,10 +40,43 @@ inline StringRef getURLHost(const char * data, size_t size)
     }
     else
     {
-        size_t max_scheme_size = std::min(size, 16UL);
-        Pos scheme_end = reinterpret_cast<Pos>(SCHEME_SEARCHER.search(reinterpret_cast<const UInt8 *>(data), max_scheme_size));
-        if (scheme_end != data + max_scheme_size)
-            pos = scheme_end + 3;
+        Pos scheme_end = data + std::min(size, 16UL);
+        for (++pos; pos < scheme_end; ++pos)
+        {
+            if (!isAlphaNumericASCII(*pos))
+            {
+                switch (*pos)
+                {
+                case '.':
+                case '-':
+                case '+':
+                    break;
+                case ' ': /// restricted symbols
+                case '\t':
+                case '<':
+                case '>':
+                case '%':
+                case '{':
+                case '}':
+                case '|':
+                case '\\':
+                case '^':
+                case '~':
+                case '[':
+                case ']':
+                case ';':
+                case '=':
+                case '&':
+                    return StringRef{};
+                default:
+                    goto exloop;
+                }
+            }
+        }
+exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/')
+            pos += 3;
+        else
+            pos = data;
     }
 
     Pos dot_pos = nullptr;
diff --git a/docs/en/query_language/functions/url_functions.md b/docs/en/query_language/functions/url_functions.md
index 1f9ee0f928d..93edf705e7e 100644
--- a/docs/en/query_language/functions/url_functions.md
+++ b/docs/en/query_language/functions/url_functions.md
@@ -12,7 +12,7 @@ Returns the protocol. Examples: http, ftp, mailto, magnet...
 
 ### domain
 
-Gets the domain. Cut scheme by substring '://'. Size of cutted scheme is less than 16 bytes. Scheme correctness is not checked.
+Gets the domain. Cut scheme with size less than 16 bytes.
 
 ### domainWithoutWWW
 
diff --git a/docs/ru/query_language/functions/url_functions.md b/docs/ru/query_language/functions/url_functions.md
index 1c209c95e80..1897d1b28a3 100644
--- a/docs/ru/query_language/functions/url_functions.md
+++ b/docs/ru/query_language/functions/url_functions.md
@@ -10,7 +10,7 @@
 Возвращает протокол. Примеры: http, ftp, mailto, magnet...
 
 ### domain
-Возвращает домен. Отсекает схему по подстроке '://'. Размер схемы не более 16 байт. Корректность схемы не проверяется.
+Возвращает домен. Отсекает схему размером не более 16 байт.
 
 ### domainWithoutWWW
 Возвращает домен, удалив не более одного 'www.' с начала, если есть.

From 197f1eedd27f2c831227ffaa0c8c2b8f9e0dfd5a Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 26 Jun 2019 18:22:15 +0300
Subject: [PATCH 127/191] Remove searcher

---
 dbms/src/Functions/URL/domain.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h
index 88ca94cfd33..141887d8e96 100644
--- a/dbms/src/Functions/URL/domain.h
+++ b/dbms/src/Functions/URL/domain.h
@@ -4,7 +4,6 @@
 #include <common/find_symbols.h>
 #include <cstring>
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/StringSearcher.h>
 
 namespace DB
 {
@@ -12,8 +11,6 @@ namespace DB
 namespace
 {
 
-const ASCIICaseSensitiveStringSearcher SCHEME_SEARCHER{"://", 3};
-
 inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host)
 {
     if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1)

From c22322a4464fdbff8c87c84ee06d5435167bff7c Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Wed, 26 Jun 2019 19:01:18 +0300
Subject: [PATCH 128/191] 3rd party header compile fix

---
 dbms/src/Functions/geoToH3.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 74f30f3df93..fccced742c2 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -12,7 +12,10 @@
 
 
 extern "C" {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdocumentation"
 #include <h3api.h>
+#pragma clang diagnostic pop
 }
 
 namespace DB

From 718da84f41051ff16cc7c2060a684bdde3a87c7f Mon Sep 17 00:00:00 2001
From: Ivan Remen <i.remen@corp.mail.ru>
Date: Wed, 26 Jun 2019 20:02:31 +0300
Subject: [PATCH 129/191] Fix

---
 dbms/src/Functions/geoToH3.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index fccced742c2..41ca3cd31e2 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -12,10 +12,16 @@
 
 
 extern "C" {
+#ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdocumentation"
+#endif
+
 #include <h3api.h>
+
+#ifdef __clang__
 #pragma clang diagnostic pop
+#endif
 }
 
 namespace DB

From a719933c586c25b4d34907b980821f8e40607f98 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 28 Jun 2019 15:51:01 +0300
Subject: [PATCH 130/191] Fix initial size of some inline PODArray's.

A template parameter of PODArray named INITIAL_SIZE didn't make its
units clear, which made some callers to erroneously assume that it
specifies the number of elements and not the number of bytes.
Rename it, fix the wrong usages and, where possible, use the
PODArrayWithStackMemory typedef for arrays with inline memory.
---
 .../AggregateFunctionSequenceMatch.h          |  6 ++--
 .../AggregateFunctionTimeSeriesGroupSum.h     |  3 +-
 .../AggregateFunctionWindowFunnel.h           |  5 +--
 dbms/src/AggregateFunctions/QuantileExact.h   |  3 +-
 dbms/src/AggregateFunctions/QuantileTDigest.h |  3 +-
 .../src/AggregateFunctions/ReservoirSampler.h |  3 +-
 .../ReservoirSamplerDeterministic.h           |  3 +-
 dbms/src/Common/PODArray.h                    | 31 ++++++++++++-------
 dbms/src/Functions/FunctionsVisitParam.h      |  3 +-
 9 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
index 017b6d113dc..80860fdb62a 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
@@ -47,8 +47,7 @@ struct AggregateFunctionSequenceMatchData final
     using Comparator = ComparePairFirst<std::less>;
 
     bool sorted = true;
-    static constexpr size_t bytes_in_arena = 64;
-    PODArray<TimestampEvents, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>> events_list;
+    PODArrayWithStackMemory<TimestampEvents, 64> events_list;
 
     void add(const Timestamp timestamp, const Events & events)
     {
@@ -203,8 +202,7 @@ private:
         PatternAction(const PatternActionType type, const std::uint64_t extra = 0) : type{type}, extra{extra} {}
     };
 
-    static constexpr size_t bytes_on_stack = 64;
-    using PatternActions = PODArray<PatternAction, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
+    using PatternActions = PODArrayWithStackMemory<PatternAction, 64>;
 
     Derived & derived() { return static_cast<Derived &>(*this); }
 
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h b/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h
index c74ad8c0bdb..5e2a9b15f4e 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h
@@ -68,9 +68,8 @@ struct AggregateFunctionTimeSeriesGroupSumData
         }
     };
 
-    static constexpr size_t bytes_on_stack = 128;
     typedef std::map<UInt64, Points> Series;
-    typedef PODArray<DataPoint, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>> AggSeries;
+    typedef PODArrayWithStackMemory<DataPoint, 128> AggSeries;
     Series ss;
     AggSeries result;
 
diff --git a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
index 9a738d3fefb..1e3c005f73f 100644
--- a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
@@ -35,10 +35,7 @@ template <typename T>
 struct AggregateFunctionWindowFunnelData
 {
     using TimestampEvent = std::pair<T, UInt8>;
-
-    static constexpr size_t bytes_on_stack = 64;
-    using TimestampEvents = PODArray<TimestampEvent, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
-
+    using TimestampEvents = PODArray<TimestampEvent, 64>;
     using Comparator = ComparePairFirst;
 
     bool sorted = true;
diff --git a/dbms/src/AggregateFunctions/QuantileExact.h b/dbms/src/AggregateFunctions/QuantileExact.h
index b4398e8bb7f..a5b616669b9 100644
--- a/dbms/src/AggregateFunctions/QuantileExact.h
+++ b/dbms/src/AggregateFunctions/QuantileExact.h
@@ -27,8 +27,7 @@ struct QuantileExact
 {
     /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
     static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<Value>);
-
-    using Array = PODArray<Value, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
+    using Array = PODArrayWithStackMemory<Value, bytes_in_arena>;
     Array array;
 
     void add(const Value & x)
diff --git a/dbms/src/AggregateFunctions/QuantileTDigest.h b/dbms/src/AggregateFunctions/QuantileTDigest.h
index e9f261d4c21..f7201ef3b0d 100644
--- a/dbms/src/AggregateFunctions/QuantileTDigest.h
+++ b/dbms/src/AggregateFunctions/QuantileTDigest.h
@@ -86,8 +86,7 @@ class QuantileTDigest
 
     /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
     static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
-
-    using Summary = PODArray<Centroid, bytes_in_arena / sizeof(Centroid), AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
+    using Summary = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
 
     Summary summary;
     Count count = 0;
diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h
index ad5bf10f48f..30d72709ac2 100644
--- a/dbms/src/AggregateFunctions/ReservoirSampler.h
+++ b/dbms/src/AggregateFunctions/ReservoirSampler.h
@@ -194,8 +194,7 @@ private:
     friend void rs_perf_test();
 
     /// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements.
-    static constexpr size_t bytes_on_stack = 64;
-    using Array = DB::PODArray<T, bytes_on_stack / sizeof(T), AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
+    using Array = DB::PODArrayWithStackMemory<T, 64>;
 
     size_t sample_count;
     size_t total_values = 0;
diff --git a/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h
index c543e662b2a..4beeecd93bc 100644
--- a/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h
+++ b/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h
@@ -164,9 +164,8 @@ public:
 
 private:
     /// We allocate some memory on the stack to avoid allocations when there are many objects with a small number of elements.
-    static constexpr size_t bytes_on_stack = 64;
     using Element = std::pair<T, UInt32>;
-    using Array = DB::PODArray<Element, bytes_on_stack / sizeof(Element), AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
+    using Array = DB::PODArray<Element, 64>;
 
     size_t sample_count;
     size_t total_values{};
diff --git a/dbms/src/Common/PODArray.h b/dbms/src/Common/PODArray.h
index 0e7d547a7d0..01085a2c5a7 100644
--- a/dbms/src/Common/PODArray.h
+++ b/dbms/src/Common/PODArray.h
@@ -45,7 +45,7 @@ inline constexpr size_t integerRoundUp(size_t value, size_t dividend)
   * Only part of the std::vector interface is supported.
   *
   * The default constructor creates an empty object that does not allocate memory.
-  * Then the memory is allocated at least INITIAL_SIZE bytes.
+  * Then the memory is allocated at least initial_bytes bytes.
   *
   * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
   *
@@ -74,7 +74,7 @@ extern const char EmptyPODArray[EmptyPODArraySize];
 /** Base class that depend only on size of element, not on element itself.
   * You can static_cast to this class if you want to insert some data regardless to the actual type T.
   */
-template <size_t ELEMENT_SIZE, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_, size_t pad_left_>
+template <size_t ELEMENT_SIZE, size_t initial_bytes, typename TAllocator, size_t pad_right_, size_t pad_left_>
 class PODArrayBase : private boost::noncopyable, private TAllocator    /// empty base optimization
 {
 protected:
@@ -161,7 +161,8 @@ protected:
         {
             // The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise,
             // memory issue such as corruption could appear in edge case.
-            realloc(std::max(((INITIAL_SIZE - 1) / ELEMENT_SIZE + 1) * ELEMENT_SIZE, minimum_memory_for_elements(1)),
+            realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE),
+                             minimum_memory_for_elements(1)),
                     std::forward<TAllocatorParams>(allocator_params)...);
         }
         else
@@ -257,11 +258,11 @@ public:
     }
 };
 
-template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0, size_t pad_left_ = 0>
-class PODArray : public PODArrayBase<sizeof(T), INITIAL_SIZE, TAllocator, pad_right_, pad_left_>
+template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0, size_t pad_left_ = 0>
+class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>
 {
 protected:
-    using Base = PODArrayBase<sizeof(T), INITIAL_SIZE, TAllocator, pad_right_, pad_left_>;
+    using Base = PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>;
 
     T * t_start()                      { return reinterpret_cast<T *>(this->c_start); }
     T * t_end()                        { return reinterpret_cast<T *>(this->c_end); }
@@ -618,17 +619,23 @@ public:
     }
 };
 
-template <typename T, size_t INITIAL_SIZE, typename TAllocator, size_t pad_right_>
-void swap(PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & lhs, PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & rhs)
+template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_>
+void swap(PODArray<T, initial_bytes, TAllocator, pad_right_> & lhs, PODArray<T, initial_bytes, TAllocator, pad_right_> & rhs)
 {
     lhs.swap(rhs);
 }
 
 /** For columns. Padding is enough to read and write xmm-register at the address of the last element. */
-template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
-using PaddedPODArray = PODArray<T, INITIAL_SIZE, TAllocator, 15, 16>;
+template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>>
+using PaddedPODArray = PODArray<T, initial_bytes, TAllocator, 15, 16>;
 
-template <typename T, size_t stack_size_in_bytes>
-using PODArrayWithStackMemory = PODArray<T, 0, AllocatorWithStackMemory<Allocator<false>, integerRoundUp(stack_size_in_bytes, sizeof(T))>>;
+/** A helper for declaring PODArray that uses inline memory.
+  * The initial size is set to use all the inline bytes, since using less would
+  * only add some extra allocation calls.
+  */
+template <typename T, size_t inline_bytes,
+          size_t rounded_bytes = integerRoundUp(inline_bytes, sizeof(T))>
+using PODArrayWithStackMemory = PODArray<T, rounded_bytes,
+    AllocatorWithStackMemory<Allocator<false>, rounded_bytes>>;
 
 }
diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h
index 09cc3106719..41a49dfd908 100644
--- a/dbms/src/Functions/FunctionsVisitParam.h
+++ b/dbms/src/Functions/FunctionsVisitParam.h
@@ -91,8 +91,7 @@ struct ExtractBool
 
 struct ExtractRaw
 {
-    static constexpr size_t bytes_on_stack = 64;
-    using ExpectChars = PODArray<char, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
+    using ExpectChars = PODArrayWithStackMemory<char, 64>;
 
     static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data)
     {

From 0116c10e41919073d2050b2e418fc08c070ec8af Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 28 Jun 2019 19:21:05 +0300
Subject: [PATCH 131/191] Require explicit type in unalignedStore

This is a follow-up to PR #5786, which fixed a segfault caused by
an unexpected deduced type for unalignedStore. To prevent future errors
of this kind, require a caller to specify the stored type explicitly.
---
 dbms/src/Columns/ColumnVector.cpp                    |  2 +-
 dbms/src/Compression/CompressionCodecDelta.cpp       |  2 +-
 dbms/src/Compression/CompressionCodecDoubleDelta.cpp | 12 ++++++------
 dbms/src/Compression/CompressionCodecGorilla.cpp     |  8 ++++----
 dbms/src/Compression/CompressionCodecT64.cpp         |  2 +-
 dbms/src/Compression/LZ4_decompress_faster.cpp       |  6 +++---
 dbms/src/Functions/FunctionsRandom.cpp               |  8 ++++----
 libs/libcommon/include/common/unaligned.h            |  9 ++++++++-
 8 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp
index 6db110ef02e..a2d6de9df80 100644
--- a/dbms/src/Columns/ColumnVector.cpp
+++ b/dbms/src/Columns/ColumnVector.cpp
@@ -33,7 +33,7 @@ template <typename T>
 StringRef ColumnVector<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
 {
     auto pos = arena.allocContinue(sizeof(T), begin);
-    unalignedStore(pos, data[n]);
+    unalignedStore<T>(pos, data[n]);
     return StringRef(pos, sizeof(T));
 }
 
diff --git a/dbms/src/Compression/CompressionCodecDelta.cpp b/dbms/src/Compression/CompressionCodecDelta.cpp
index f5a5db04927..9f2397f8e59 100644
--- a/dbms/src/Compression/CompressionCodecDelta.cpp
+++ b/dbms/src/Compression/CompressionCodecDelta.cpp
@@ -67,7 +67,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
     while (source < source_end)
     {
         accumulator += unalignedLoad<T>(source);
-        unalignedStore(dest, accumulator);
+        unalignedStore<T>(dest, accumulator);
 
         source += sizeof(T);
         dest += sizeof(T);
diff --git a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
index b40b2abccfa..8f306f3f06a 100644
--- a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -90,7 +90,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
     const char * source_end = source + source_size;
 
     const UInt32 items_count = source_size / sizeof(T);
-    unalignedStore(dest, items_count);
+    unalignedStore<UInt32>(dest, items_count);
     dest += sizeof(items_count);
 
     T prev_value{};
@@ -99,7 +99,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
     if (source < source_end)
     {
         prev_value = unalignedLoad<T>(source);
-        unalignedStore(dest, prev_value);
+        unalignedStore<T>(dest, prev_value);
 
         source += sizeof(prev_value);
         dest += sizeof(prev_value);
@@ -109,7 +109,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
     {
         const T curr_value = unalignedLoad<T>(source);
         prev_delta = static_cast<DeltaType>(curr_value - prev_value);
-        unalignedStore(dest, prev_delta);
+        unalignedStore<T>(dest, prev_delta);
 
         source += sizeof(curr_value);
         dest += sizeof(prev_delta);
@@ -164,7 +164,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
     if (source < source_end)
     {
         prev_value = unalignedLoad<T>(source);
-        unalignedStore(dest, prev_value);
+        unalignedStore<T>(dest, prev_value);
 
         source += sizeof(prev_value);
         dest += sizeof(prev_value);
@@ -174,7 +174,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
     {
         prev_delta = unalignedLoad<DeltaType>(source);
         prev_value = static_cast<T>(prev_value + prev_delta);
-        unalignedStore(dest, prev_value);
+        unalignedStore<T>(dest, prev_value);
 
         source += sizeof(prev_delta);
         dest += sizeof(prev_value);
@@ -209,7 +209,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
         // else if first bit is zero, no need to read more data.
 
         const T curr_value = static_cast<T>(prev_value + prev_delta + double_delta);
-        unalignedStore(dest, curr_value);
+        unalignedStore<T>(dest, curr_value);
         dest += sizeof(curr_value);
 
         prev_delta = curr_value - prev_value;
diff --git a/dbms/src/Compression/CompressionCodecGorilla.cpp b/dbms/src/Compression/CompressionCodecGorilla.cpp
index f9c6b52756c..79cc6d27e81 100644
--- a/dbms/src/Compression/CompressionCodecGorilla.cpp
+++ b/dbms/src/Compression/CompressionCodecGorilla.cpp
@@ -94,7 +94,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
 
     const UInt32 items_count = source_size / sizeof(T);
 
-    unalignedStore(dest, items_count);
+    unalignedStore<UInt32>(dest, items_count);
     dest += sizeof(items_count);
 
     T prev_value{};
@@ -104,7 +104,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
     if (source < source_end)
     {
         prev_value = unalignedLoad<T>(source);
-        unalignedStore(dest, prev_value);
+        unalignedStore<T>(dest, prev_value);
 
         source += sizeof(prev_value);
         dest += sizeof(prev_value);
@@ -166,7 +166,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
     if (source < source_end)
     {
         prev_value = unalignedLoad<T>(source);
-        unalignedStore(dest, prev_value);
+        unalignedStore<T>(dest, prev_value);
 
         source += sizeof(prev_value);
         dest += sizeof(prev_value);
@@ -210,7 +210,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
         }
         // else: 0b0 prefix - use prev_value
 
-        unalignedStore(dest, curr_value);
+        unalignedStore<T>(dest, curr_value);
         dest += sizeof(curr_value);
 
         prev_xored_info = curr_xored_info;
diff --git a/dbms/src/Compression/CompressionCodecT64.cpp b/dbms/src/Compression/CompressionCodecT64.cpp
index cd369fc9c4e..9919f5322c5 100644
--- a/dbms/src/Compression/CompressionCodecT64.cpp
+++ b/dbms/src/Compression/CompressionCodecT64.cpp
@@ -390,7 +390,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco
     {
         _T min_value = min;
         for (UInt32 i = 0; i < num_elements; ++i, dst += sizeof(_T))
-            unalignedStore(dst, min_value);
+            unalignedStore<_T>(dst, min_value);
         return;
     }
 
diff --git a/dbms/src/Compression/LZ4_decompress_faster.cpp b/dbms/src/Compression/LZ4_decompress_faster.cpp
index 387650d3dcc..0d65a06b098 100644
--- a/dbms/src/Compression/LZ4_decompress_faster.cpp
+++ b/dbms/src/Compression/LZ4_decompress_faster.cpp
@@ -200,7 +200,7 @@ inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t o
         0, 1, 2, 3, 4, 5, 6, 0,
     };
 
-    unalignedStore(op, vtbl1_u8(unalignedLoad<uint8x8_t>(match), unalignedLoad<uint8x8_t>(masks + 8 * offset)));
+    unalignedStore<uint8x8_t>(op, vtbl1_u8(unalignedLoad<uint8x8_t>(match), unalignedLoad<uint8x8_t>(masks + 8 * offset)));
     match += masks[offset];
 }
 
@@ -328,10 +328,10 @@ inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t
         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,  0,
     };
 
-    unalignedStore(op,
+    unalignedStore<uint8x8_t>(op,
         vtbl2_u8(unalignedLoad<uint8x8x2_t>(match), unalignedLoad<uint8x8_t>(masks + 16 * offset)));
 
-    unalignedStore(op + 8,
+    unalignedStore<uint8x8_t>(op + 8,
         vtbl2_u8(unalignedLoad<uint8x8x2_t>(match), unalignedLoad<uint8x8_t>(masks + 16 * offset + 8)));
 
     match += masks[offset];
diff --git a/dbms/src/Functions/FunctionsRandom.cpp b/dbms/src/Functions/FunctionsRandom.cpp
index ede8c332d18..19b2f08cdba 100644
--- a/dbms/src/Functions/FunctionsRandom.cpp
+++ b/dbms/src/Functions/FunctionsRandom.cpp
@@ -57,10 +57,10 @@ void RandImpl::execute(char * output, size_t size)
 
     for (const char * end = output + size; output < end; output += 16)
     {
-        unalignedStore(output, generator0.next());
-        unalignedStore(output + 4, generator1.next());
-        unalignedStore(output + 8, generator2.next());
-        unalignedStore(output + 12, generator3.next());
+        unalignedStore<UInt32>(output, generator0.next());
+        unalignedStore<UInt32>(output + 4, generator1.next());
+        unalignedStore<UInt32>(output + 8, generator2.next());
+        unalignedStore<UInt32>(output + 12, generator3.next());
     }
 
     /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end.
diff --git a/libs/libcommon/include/common/unaligned.h b/libs/libcommon/include/common/unaligned.h
index 2b1505ba2d3..ca73298adfb 100644
--- a/libs/libcommon/include/common/unaligned.h
+++ b/libs/libcommon/include/common/unaligned.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <string.h>
+#include <type_traits>
 
 
 template <typename T>
@@ -11,8 +12,14 @@ inline T unalignedLoad(const void * address)
     return res;
 }
 
+/// We've had troubles before with wrong store size due to integral promotions
+/// (e.g., unalignedStore(dest, uint16_t + uint16_t) stores an uint32_t).
+/// To prevent this, make the caller specify the stored type explicitly.
+/// To disable deduction of T, wrap the argument type with std::enable_if.
 template <typename T>
-inline void unalignedStore(void * address, const T & src)
+inline void unalignedStore(void * address,
+                           const typename std::enable_if<true, T>::type & src)
 {
+    static_assert(std::is_trivially_copyable_v<T>);
     memcpy(address, &src, sizeof(src));
 }

From 614ec98a42a00c84eae546f90c59c88152ac1f00 Mon Sep 17 00:00:00 2001
From: Danila Kutenin <kutdanila@yandex.ru>
Date: Fri, 28 Jun 2019 21:26:24 +0300
Subject: [PATCH 132/191] Fix runtime of SPLIT_SHARED_LIBRARIES build

---
 dbms/src/Common/MiAllocator.cpp | 43 +++++++++++++++++++++++++++++++++
 dbms/src/Common/MiAllocator.h   | 33 +++----------------------
 2 files changed, 46 insertions(+), 30 deletions(-)
 create mode 100644 dbms/src/Common/MiAllocator.cpp

diff --git a/dbms/src/Common/MiAllocator.cpp b/dbms/src/Common/MiAllocator.cpp
new file mode 100644
index 00000000000..456609374ee
--- /dev/null
+++ b/dbms/src/Common/MiAllocator.cpp
@@ -0,0 +1,43 @@
+#include <Common/config.h>
+
+#if USE_MIMALLOC
+
+#include "MiAllocator.h"
+#include <mimalloc.h>
+
+namespace DB
+{
+
+void * MiAllocator::alloc(size_t size, size_t alignment)
+{
+    if (alignment == 0)
+        return mi_malloc(size);
+    else
+        return mi_malloc_aligned(size, alignment);
+}
+
+void MiAllocator::free(void * buf, size_t)
+{
+    mi_free(buf);
+}
+
+void * MiAllocator::realloc(void * old_ptr, size_t, size_t new_size, size_t alignment)
+{
+    if (old_ptr == nullptr)
+        return alloc(new_size, alignment);
+
+    if (new_size == 0)
+    {
+        mi_free(old_ptr);
+        return nullptr;
+    }
+
+    if (alignment == 0)
+        return mi_realloc(old_ptr, alignment);
+
+    return mi_realloc_aligned(old_ptr, new_size, alignment);
+}
+
+}
+
+#endif
diff --git a/dbms/src/Common/MiAllocator.h b/dbms/src/Common/MiAllocator.h
index 075328e5d94..48cfc6f9ab4 100644
--- a/dbms/src/Common/MiAllocator.h
+++ b/dbms/src/Common/MiAllocator.h
@@ -6,7 +6,6 @@
 #error "do not include this file until USE_MIMALLOC is set to 1"
 #endif
 
-#include <mimalloc.h>
 #include <cstddef>
 
 namespace DB
@@ -19,37 +18,11 @@ namespace DB
  */
 struct MiAllocator
 {
+    static void * alloc(size_t size, size_t alignment = 0);
 
-    static void * alloc(size_t size, size_t alignment = 0)
-    {
-        if (alignment == 0)
-            return mi_malloc(size);
-        else
-            return mi_malloc_aligned(size, alignment);
-    }
-
-    static void free(void * buf, size_t)
-    {
-        mi_free(buf);
-    }
-
-    static void * realloc(void * old_ptr, size_t, size_t new_size, size_t alignment = 0)
-    {
-        if (old_ptr == nullptr)
-            return alloc(new_size, alignment);
-
-        if (new_size == 0)
-        {
-            mi_free(old_ptr);
-            return nullptr;
-        }
-
-        if (alignment == 0)
-            return mi_realloc(old_ptr, alignment);
-
-        return mi_realloc_aligned(old_ptr, new_size, alignment);
-    }
+    static void free(void * buf, size_t);
 
+    static void * realloc(void * old_ptr, size_t, size_t new_size, size_t alignment = 0);
 };
 
 }

From 1a7cb519fe1a53206c056045065081a0a4080198 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Fri, 28 Jun 2019 23:09:15 +0300
Subject: [PATCH 133/191] Final test fix

---
 .../integration/test_storage_kafka/test.py    |  2 +-
 .../test_kafka_virtual2.reference             | 98 +++++++++----------
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py
index 8e42a83459f..ac55718cbb2 100644
--- a/dbms/tests/integration/test_storage_kafka/test.py
+++ b/dbms/tests/integration/test_storage_kafka/test.py
@@ -401,7 +401,7 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster):
             ENGINE = MergeTree()
             ORDER BY key;
         CREATE MATERIALIZED VIEW test.consumer TO test.view AS
-            SELECT *, _key, _topic, _offset FROM test.kafka;
+            SELECT *, _key as kafka_key, _topic as topic, _offset as offset FROM test.kafka;
     ''')
 
     messages = []
diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference
index c20dc3513a0..50c2edbf802 100644
--- a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference
+++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference
@@ -1,50 +1,50 @@
 	0	virt2	0	0
-	1	virt2	1	0
-	2	virt2	2	0
-	3	virt2	3	0
-	4	virt2	4	0
-	5	virt2	5	0
-	6	virt2	6	0
-	7	virt2	7	0
-	8	virt2	8	0
-	9	virt2	9	0
-	10	virt2	10	0
-	11	virt2	11	0
-	12	virt2	12	0
-	13	virt2	13	0
-	14	virt2	14	0
-	15	virt2	15	0
-	16	virt2	16	0
-	17	virt2	17	0
-	18	virt2	18	0
-	19	virt2	19	0
-	20	virt2	20	0
-	21	virt2	21	0
-	22	virt2	22	0
-	23	virt2	23	0
-	24	virt2	24	0
-	25	virt2	25	1
-	26	virt2	26	1
-	27	virt2	27	1
-	28	virt2	28	1
-	29	virt2	29	1
-	30	virt2	30	1
-	31	virt2	31	1
-	32	virt2	32	1
-	33	virt2	33	1
-	34	virt2	34	1
-	35	virt2	35	1
-	36	virt2	36	1
-	37	virt2	37	1
-	38	virt2	38	1
-	39	virt2	39	1
-	40	virt2	40	1
-	41	virt2	41	1
-	42	virt2	42	1
-	43	virt2	43	1
-	44	virt2	44	1
-	45	virt2	45	1
-	46	virt2	46	1
-	47	virt2	47	1
-	48	virt2	48	1
-	49	virt2	49	1
+	1	virt2	1	1
+	2	virt2	2	2
+	3	virt2	3	3
+	4	virt2	4	4
+	5	virt2	5	5
+	6	virt2	6	6
+	7	virt2	7	7
+	8	virt2	8	8
+	9	virt2	9	9
+	10	virt2	10	10
+	11	virt2	11	11
+	12	virt2	12	12
+	13	virt2	13	13
+	14	virt2	14	14
+	15	virt2	15	15
+	16	virt2	16	16
+	17	virt2	17	17
+	18	virt2	18	18
+	19	virt2	19	19
+	20	virt2	20	20
+	21	virt2	21	21
+	22	virt2	22	22
+	23	virt2	23	23
+	24	virt2	24	24
+	25	virt2	25	25
+	26	virt2	26	26
+	27	virt2	27	27
+	28	virt2	28	28
+	29	virt2	29	29
+	30	virt2	30	30
+	31	virt2	31	31
+	32	virt2	32	32
+	33	virt2	33	33
+	34	virt2	34	34
+	35	virt2	35	35
+	36	virt2	36	36
+	37	virt2	37	37
+	38	virt2	38	38
+	39	virt2	39	39
+	40	virt2	40	40
+	41	virt2	41	41
+	42	virt2	42	42
+	43	virt2	43	43
+	44	virt2	44	44
+	45	virt2	45	45
+	46	virt2	46	46
+	47	virt2	47	47
+	48	virt2	48	48
+	49	virt2	49	49

From 1ed6a6a1ce85550008eead12c0209abc3d1f9f15 Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Sat, 29 Jun 2019 14:34:26 +0300
Subject: [PATCH 134/191] Improved integration tests guide.

---
 dbms/tests/integration/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md
index 1b2d190b383..06819af7668 100644
--- a/dbms/tests/integration/README.md
+++ b/dbms/tests/integration/README.md
@@ -12,7 +12,7 @@ You must install latest Docker from
 https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/#set-up-the-repository
 Don't use Docker from your system repository.
 
-* [pip](https://pypi.python.org/pypi/pip). To install: `sudo apt-get install python-pip`
+* [pip](https://pypi.python.org/pypi/pip) and `libpq-dev`. To install: `sudo apt-get install python-pip libpq-dev`
 * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest`
 * [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf pytest-timeout`
 

From 8b1651ae1f06466c8e64346a98da59b40e4eea92 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Sat, 29 Jun 2019 15:04:47 +0300
Subject: [PATCH 135/191] arcadia fixes (#5795)

---
 dbms/programs/client/readpassphrase/readpassphrase.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/dbms/programs/client/readpassphrase/readpassphrase.h b/dbms/programs/client/readpassphrase/readpassphrase.h
index d504cff5f00..272c822423a 100644
--- a/dbms/programs/client/readpassphrase/readpassphrase.h
+++ b/dbms/programs/client/readpassphrase/readpassphrase.h
@@ -29,6 +29,11 @@
 //#include "includes.h"
 #include "config_client.h"
 
+// Should not be included on BSD systems, but if it happen...
+#ifdef HAVE_READPASSPHRASE
+#   include_next <readpassphrase.h>
+#endif
+
 #ifndef HAVE_READPASSPHRASE
 
 #    ifdef __cplusplus

From 3197b0748d2e6c333290a9186a43a77b892cbc04 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 18:13:52 +0300
Subject: [PATCH 136/191] Updated test

---
 .../0_stateless/00910_client_window_size_detection.reference    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference b/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference
index 85322d0b541..f96ac067218 100644
--- a/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference
+++ b/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference
@@ -1 +1 @@
-79
+105

From 6ff0a88eb36af3b034275360b09483c7a2c57686 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 18:26:19 +0300
Subject: [PATCH 137/191] Fixed minor issue in query formatting

---
 dbms/src/Parsers/ASTTablesInSelectQuery.cpp                  | 5 ++---
 .../queries/0_stateless/00909_kill_not_initialized_query.sh  | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp
index 59c10d74969..47be2008284 100644
--- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -184,14 +184,14 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS
 
     if (using_expression_list)
     {
-        settings.ostr << (settings.hilite ? hilite_keyword : "") << "USING " << (settings.hilite ? hilite_none : "");
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << " USING " << (settings.hilite ? hilite_none : "");
         settings.ostr << "(";
         using_expression_list->formatImpl(settings, state, frame);
         settings.ostr << ")";
     }
     else if (on_expression)
     {
-        settings.ostr << (settings.hilite ? hilite_keyword : "") << "ON " << (settings.hilite ? hilite_none : "");
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : "");
         on_expression->formatImpl(settings, state, frame);
     }
 }
@@ -227,7 +227,6 @@ void ASTTablesInSelectQueryElement::formatImpl(const FormatSettings & settings,
         }
 
         table_expression->formatImpl(settings, state, frame);
-        settings.ostr << " ";
 
         if (table_join)
             table_join->as<ASTTableJoin &>().formatImplAfterTable(settings, state, frame);
diff --git a/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh b/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh
index d8a4f29b30f..67454f676b3 100755
--- a/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh
+++ b/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh
@@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE cannot_kill_query (x UInt64) ENGINE = MergeT
 $CLICKHOUSE_CLIENT -q "INSERT INTO cannot_kill_query SELECT * FROM numbers(10000000)" &> /dev/null
 
 # This SELECT query will run for a long time. It's used as bloker for ALTER query. It will be killed with SYNC kill.
-query_for_pending="SELECT count() FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1, max_block_size=1"
+query_for_pending="SELECT count() FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads = 1, max_block_size = 1"
 $CLICKHOUSE_CLIENT -q "$query_for_pending" &>/dev/null &
 
 sleep 1 # queries should be in strict order
@@ -23,7 +23,7 @@ sleep 1
 
 # This SELECT query will also run for a long time. Also it's blocked by ALTER query. It will be killed with ASYNC kill.
 # This is main idea which we check -- blocked queries can be killed with ASYNC kill.
-query_to_kill="SELECT sum(1) FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1"
+query_to_kill="SELECT sum(1) FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads = 1"
 $CLICKHOUSE_CLIENT -q "$query_to_kill" &>/dev/null &
 
 sleep 1 # just to be sure that kill of $query_to_kill will be executed after $query_to_kill.

From a8e1c8a7d21301c43c3a10397176a200e96e9fac Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 19:13:28 +0300
Subject: [PATCH 138/191] Fixed formatting of invalid queries with ambiguous
 aliases

---
 dbms/src/Parsers/ASTWithAlias.cpp             | 38 +++++++++++--------
 dbms/src/Parsers/IAST.h                       | 12 +++++-
 ...59_format_with_different_aliases.reference |  3 ++
 .../00959_format_with_different_aliases.sh    | 12 ++++++
 4 files changed, 48 insertions(+), 17 deletions(-)
 create mode 100644 dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference
 create mode 100755 dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh

diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp
index 67a4401f9a5..916d8c7346c 100644
--- a/dbms/src/Parsers/ASTWithAlias.cpp
+++ b/dbms/src/Parsers/ASTWithAlias.cpp
@@ -16,27 +16,33 @@ void ASTWithAlias::writeAlias(const String & name, const FormatSettings & settin
 
 void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
-    if (!alias.empty())
-    {
-        /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias.
-        if (!state.printed_asts_with_alias.emplace(frame.current_select, alias).second)
-        {
-            settings.writeIdentifier(alias);
-            return;
-        }
-    }
+    /// We will compare formatting result with previously formatted nodes.
+    std::stringstream temporary_buffer;
+    FormatSettings temporary_settings(temporary_buffer, settings);
 
-    /// If there is an alias, then parentheses are required around the entire expression, including the alias. Because a record of the form `0 AS x + 0` is syntactically invalid.
+    /// If there is an alias, then parentheses are required around the entire expression, including the alias.
+    /// Because a record of the form `0 AS x + 0` is syntactically invalid.
     if (frame.need_parens && !alias.empty())
-        settings.ostr <<'(';
+        temporary_buffer << '(';
 
-    formatImplWithoutAlias(settings, state, frame);
+    formatImplWithoutAlias(temporary_settings, state, frame);
 
-    if (!alias.empty())
+    /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias.
+    /// This is needed because the query can become extraordinary large after substitution of aliases.
+    if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, temporary_buffer.str()).second)
     {
-        writeAlias(alias, settings);
-        if (frame.need_parens)
-            settings.ostr <<')';
+        settings.writeIdentifier(alias);
+    }
+    else
+    {
+        settings.ostr << temporary_buffer.rdbuf();
+
+        if (!alias.empty())
+        {
+            writeAlias(alias, settings);
+            if (frame.need_parens)
+                settings.ostr << ')';
+        }
     }
 }
 
diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h
index 8ebfd735874..04656816133 100644
--- a/dbms/src/Parsers/IAST.h
+++ b/dbms/src/Parsers/IAST.h
@@ -161,6 +161,13 @@ public:
             nl_or_ws = one_line ? ' ' : '\n';
         }
 
+        FormatSettings(std::ostream & ostr_, const FormatSettings & other)
+            : ostr(ostr_), hilite(other.hilite), one_line(other.one_line),
+            always_quote_identifiers(other.always_quote_identifiers), identifier_quoting_style(other.identifier_quoting_style)
+        {
+            nl_or_ws = one_line ? ' ' : '\n';
+        }
+
         void writeIdentifier(const String & name) const;
     };
 
@@ -170,7 +177,10 @@ public:
         /** The SELECT query in which the alias was found; identifier of a node with such an alias.
           * It is necessary that when the node has met again, output only the alias.
           */
-        std::set<std::pair<const IAST *, std::string>> printed_asts_with_alias;
+        std::set<std::tuple<
+            const IAST * /* SELECT query node */,
+            std::string /* alias */,
+            std::string /* printed content */>> printed_asts_with_alias;
     };
 
     /// The state that is copied when each node is formatted. For example, nesting level.
diff --git a/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference
new file mode 100644
index 00000000000..8feb70c2fc4
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference
@@ -0,0 +1,3 @@
+SELECT a + b AS x, x
+SELECT a + b AS x, a + c AS x
+SELECT a + b AS x, x
diff --git a/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh
new file mode 100755
index 00000000000..cad1083ad60
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+set -e
+
+format="$CLICKHOUSE_FORMAT --oneline"
+
+echo "SELECT a + b AS x, a + b AS x" | $format
+echo "SELECT a + b AS x, a + c AS x" | $format
+echo "SELECT a + b AS x, x" | $format

From 6566bb7088ef341926d04abbc9bc3997fd99f03e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 19:18:59 +0300
Subject: [PATCH 139/191] Updated tests

---
 .../gtest_transform_query_for_external_database.cpp    | 10 +++++-----
 .../00731_long_merge_tree_select_opened_files.sh       |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp
index 4a25bff5d87..bcee0b8d8e1 100644
--- a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp
+++ b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp
@@ -54,22 +54,22 @@ void check(const std::string & query, const std::string & expected, const Contex
 TEST(TransformQueryForExternalDatabase, InWithSingleElement)
 {
     check("SELECT column FROM test.table WHERE 1 IN (1)",
-          "SELECT \"column\" FROM \"test\".\"table\"  WHERE 1 IN (1)",
+          "SELECT \"column\" FROM \"test\".\"table\" WHERE 1 IN (1)",
           state().context, state().columns);
     check("SELECT column FROM test.table WHERE column IN (1, 2)",
-          "SELECT \"column\" FROM \"test\".\"table\"  WHERE \"column\" IN (1, 2)",
+          "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" IN (1, 2)",
           state().context, state().columns);
     check("SELECT column FROM test.table WHERE column NOT IN ('hello', 'world')",
-          "SELECT \"column\" FROM \"test\".\"table\"  WHERE \"column\" NOT IN ('hello', 'world')",
+          "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT IN ('hello', 'world')",
           state().context, state().columns);
 }
 
 TEST(TransformQueryForExternalDatabase, Like)
 {
     check("SELECT column FROM test.table WHERE column LIKE '%hello%'",
-          "SELECT \"column\" FROM \"test\".\"table\"  WHERE \"column\" LIKE '%hello%'",
+          "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" LIKE '%hello%'",
           state().context, state().columns);
     check("SELECT column FROM test.table WHERE column NOT LIKE 'w%rld'",
-          "SELECT \"column\" FROM \"test\".\"table\"  WHERE \"column\" NOT LIKE 'w%rld'",
+          "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT LIKE 'w%rld'",
           state().context, state().columns);
 }
diff --git a/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh
index bb67ae9fa83..350c9b05ea8 100755
--- a/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh
+++ b/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh
@@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT $settings -q "INSERT INTO merge_tree_table SELECT (intHash64(
 
 $CLICKHOUSE_CLIENT $settings -q "OPTIMIZE TABLE merge_tree_table FINAL;"
 
-toching_many_parts_query="SELECT count() from (SELECT toDayOfWeek(date) as m, id, count() FROM merge_tree_table GROUP BY id, m ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1)"
+toching_many_parts_query="SELECT count() FROM (SELECT toDayOfWeek(date) AS m, id, count() FROM merge_tree_table GROUP BY id, m ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1)"
 $CLICKHOUSE_CLIENT $settings -q "$toching_many_parts_query" &> /dev/null
 
 $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS"

From 2638bb79f72a865865868f04df5105492f6030b0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 19:22:02 +0300
Subject: [PATCH 140/191] Updated test

---
 .../00826_cross_to_inner_join.reference       | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference
index 04c21a1e29a..24649ea3acb 100644
--- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference
+++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference
@@ -56,26 +56,26 @@ comma nullable
 1	1	1	1
 2	2	1	2
 cross
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.a
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.a
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a
 cross nullable
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \n, \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.a
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\n, \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.a
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a
 cross nullable vs not nullable
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.b
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.b
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b
 cross self
-SELECT \n    a, \n    b, \n    y.a, \n    y.b\nFROM t1_00826 AS x \nCROSS JOIN t1_00826 AS y \nWHERE (a = y.a) AND (b = y.b)
-SELECT \n    a, \n    b, \n    y.a, \n    y.b\nFROM t1_00826 AS x \nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b)
+SELECT \n    a, \n    b, \n    y.a, \n    y.b\nFROM t1_00826 AS x\nCROSS JOIN t1_00826 AS y\nWHERE (a = y.a) AND (b = y.b)
+SELECT \n    a, \n    b, \n    y.a, \n    y.b\nFROM t1_00826 AS x\nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b)
 cross one table expr
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 \nWHERE a = b
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 \nWHERE a = b
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826\nWHERE a = b
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826\nWHERE a = b
 cross multiple ands
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 \nWHERE (a = t2_00826.a) AND (b = t2_00826.b)
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b)
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b)
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b)
 cross and inside and
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 \nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826 \n) AS t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826\n) AS t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
 cross split conjunction
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826 \n    WHERE b > 0\n) AS t2_00826 \nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0)
-SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826 \n    WHERE b > 0\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0)
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00826\n    WHERE b > 0\n) AS t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0)
+SELECT \n    a, \n    b, \n    t2_00826.a, \n    t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00826\n    WHERE b > 0\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0)

From 498a2072b5c07091f896cf72771af6c108ae481b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 19:58:32 +0300
Subject: [PATCH 141/191] Fixed bug in query formatting with TEMPORARY tables

---
 dbms/programs/client/Client.cpp               | 13 ++++---
 .../Parsers/ASTQueryWithTableAndOutput.cpp    | 14 +++++++
 dbms/src/Parsers/ASTQueryWithTableAndOutput.h | 10 ++---
 dbms/src/Parsers/ASTShowTablesQuery.cpp       | 37 +++++++++++++++++++
 dbms/src/Parsers/ASTShowTablesQuery.h         | 28 +-------------
 dbms/src/Parsers/TablePropertiesQueriesASTs.h |  4 ++
 6 files changed, 68 insertions(+), 38 deletions(-)
 create mode 100644 dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp
 create mode 100644 dbms/src/Parsers/ASTShowTablesQuery.cpp

diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index f0f1c0379f3..2da1c4a987d 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -903,12 +903,15 @@ private:
     /// Process the query that doesn't require transferring data blocks to the server.
     void processOrdinaryQuery()
     {
-        /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-        ReplaceQueryParameterVisitor visitor(query_parameters);
-        visitor.visit(parsed_query);
+        /// We will always rewrite query (even if there are no query_parameters) because it will help to find errors in query formatter.
+        {
+            /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
+            ReplaceQueryParameterVisitor visitor(query_parameters);
+            visitor.visit(parsed_query);
 
-        /// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data.
-        query = serializeAST(*parsed_query);
+            /// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data.
+            query = serializeAST(*parsed_query);
+        }
 
         connection->sendQuery(connection_parameters.timeouts, query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true);
         sendExternalTables();
diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp b/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp
new file mode 100644
index 00000000000..1e16fb6f0ee
--- /dev/null
+++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp
@@ -0,0 +1,14 @@
+#include <Parsers/ASTQueryWithTableAndOutput.h>
+
+
+namespace DB
+{
+
+void ASTQueryWithTableAndOutput::formatHelper(const FormatSettings & settings, const char * name) const
+{
+    settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : "");
+    settings.ostr << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
+}
+
+}
+
diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h
index 3f3fd036d78..594876ace7b 100644
--- a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h
+++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h
@@ -9,7 +9,7 @@ namespace DB
 
 
 /** Query specifying table name and, possibly, the database and the FORMAT section.
-    */
+  */
 class ASTQueryWithTableAndOutput : public ASTQueryWithOutput
 {
 public:
@@ -18,11 +18,7 @@ public:
     bool temporary{false};
 
 protected:
-    void formatHelper(const FormatSettings & settings, const char * name) const
-    {
-        settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : "")
-            << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
-    }
+    void formatHelper(const FormatSettings & settings, const char * name) const;
 };
 
 
@@ -43,7 +39,7 @@ public:
 protected:
     void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
     {
-        formatHelper(settings, AstIDAndQueryNames::Query);
+        formatHelper(settings, temporary ? AstIDAndQueryNames::QueryTemporary : AstIDAndQueryNames::Query);
     }
 };
 
diff --git a/dbms/src/Parsers/ASTShowTablesQuery.cpp b/dbms/src/Parsers/ASTShowTablesQuery.cpp
new file mode 100644
index 00000000000..dd7b0d013ad
--- /dev/null
+++ b/dbms/src/Parsers/ASTShowTablesQuery.cpp
@@ -0,0 +1,37 @@
+#include <iomanip>
+#include <Parsers/ASTShowTablesQuery.h>
+
+
+namespace DB
+{
+
+ASTPtr ASTShowTablesQuery::clone() const
+{
+    auto res = std::make_shared<ASTShowTablesQuery>(*this);
+    res->children.clear();
+    cloneOutputOptions(*res);
+    return res;
+}
+
+void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+    if (databases)
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : "");
+    }
+    else
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << "TABLES" << (settings.hilite ? hilite_none : "");
+
+        if (!from.empty())
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "")
+                << backQuoteIfNeed(from);
+
+        if (!like.empty())
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIKE " << (settings.hilite ? hilite_none : "")
+                << std::quoted(like, '\'');
+    }
+}
+
+}
+
diff --git a/dbms/src/Parsers/ASTShowTablesQuery.h b/dbms/src/Parsers/ASTShowTablesQuery.h
index 58915df0e60..9b994b6e31f 100644
--- a/dbms/src/Parsers/ASTShowTablesQuery.h
+++ b/dbms/src/Parsers/ASTShowTablesQuery.h
@@ -23,34 +23,10 @@ public:
     /** Get the text that identifies this element. */
     String getID(char) const override { return "ShowTables"; }
 
-    ASTPtr clone() const override
-    {
-        auto res = std::make_shared<ASTShowTablesQuery>(*this);
-        res->children.clear();
-        cloneOutputOptions(*res);
-        return res;
-    }
+    ASTPtr clone() const override;
 
 protected:
-    void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
-    {
-        if (databases)
-        {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : "");
-        }
-        else
-        {
-            settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW TABLES" << (settings.hilite ? hilite_none : "");
-
-            if (!from.empty())
-                settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "")
-                    << backQuoteIfNeed(from);
-
-            if (!like.empty())
-                settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIKE " << (settings.hilite ? hilite_none : "")
-                    << std::quoted(like, '\'');
-        }
-    }
+    void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
 };
 
 }
diff --git a/dbms/src/Parsers/TablePropertiesQueriesASTs.h b/dbms/src/Parsers/TablePropertiesQueriesASTs.h
index e68a3b46e4a..f2fa7c506a6 100644
--- a/dbms/src/Parsers/TablePropertiesQueriesASTs.h
+++ b/dbms/src/Parsers/TablePropertiesQueriesASTs.h
@@ -10,24 +10,28 @@ struct ASTExistsQueryIDAndQueryNames
 {
     static constexpr auto ID = "ExistsQuery";
     static constexpr auto Query = "EXISTS TABLE";
+    static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE";
 };
 
 struct ASTShowCreateTableQueryIDAndQueryNames
 {
     static constexpr auto ID = "ShowCreateTableQuery";
     static constexpr auto Query = "SHOW CREATE TABLE";
+    static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY TABLE";
 };
 
 struct ASTShowCreateDatabaseQueryIDAndQueryNames
 {
     static constexpr auto ID = "ShowCreateDatabaseQuery";
     static constexpr auto Query = "SHOW CREATE DATABASE";
+    static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DATABASE";
 };
 
 struct ASTDescribeQueryExistsQueryIDAndQueryNames
 {
     static constexpr auto ID = "DescribeQuery";
     static constexpr auto Query = "DESCRIBE TABLE";
+    static constexpr auto QueryTemporary = "DESCRIBE TEMPORARY TABLE";
 };
 
 using ASTExistsQuery = ASTQueryWithTableAndOutputImpl<ASTExistsQueryIDAndQueryNames>;

From 3f67572075c0682a309ba72b6f26a290c8a665f5 Mon Sep 17 00:00:00 2001
From: Denis Zhuravlev <deniszhuravlov@gmail.com>
Date: Sat, 29 Jun 2019 13:58:46 -0300
Subject: [PATCH 142/191] =?UTF-8?q?=D0=B7=D0=B0=D0=BC=D0=B5=D1=87=D0=B0?=
 =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20=D1=87=D1=82=D0=BE=20MV=20=D0=BC=D0=BE?=
 =?UTF-8?q?=D0=B6=D0=B5=D1=82=20=D0=B1=D1=8B=D1=82=D1=8C=20=D0=B1=D0=BE?=
 =?UTF-8?q?=D0=BB=D0=B5=D0=B5=20=D0=BE=D0=B4=D0=BD=D0=BE=D0=B3=D0=BE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/ru/operations/table_engines/kafka.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/ru/operations/table_engines/kafka.md b/docs/ru/operations/table_engines/kafka.md
index bdbc13e171a..3fe2e4d5cba 100644
--- a/docs/ru/operations/table_engines/kafka.md
+++ b/docs/ru/operations/table_engines/kafka.md
@@ -97,6 +97,7 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
 3. Создайте материализованное представление, которое преобразует данные от движка и помещает их в ранее созданную таблицу.
 
 Когда к движку присоединяется материализованное представление (`MATERIALIZED VIEW`), оно начинает в фоновом режиме собирать данные. Это позволяет непрерывно получать сообщения от Kafka и преобразовывать их в необходимый формат с помощью `SELECT`.
+Материализованных представлений у одной kafka таблицы может быть сколько угодно, они не считывают данные из таблицы kafka непосредственно, а получают новые записи (блоками), таким образом можно писать в несколько таблиц с разным уровнем детализации (с группировкой - агрегацией и без).
 
 Пример:
 

From b3e8e397cbacc7769eaff945d557267908504924 Mon Sep 17 00:00:00 2001
From: Denis Zhuravlev <deniszhuravlov@gmail.com>
Date: Sat, 29 Jun 2019 14:09:39 -0300
Subject: [PATCH 143/191] note about several MV to one kafka table

---
 docs/en/operations/table_engines/kafka.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md
index 22d0384fd42..0c9a10c63fc 100644
--- a/docs/en/operations/table_engines/kafka.md
+++ b/docs/en/operations/table_engines/kafka.md
@@ -100,6 +100,7 @@ Groups are flexible and synced on the cluster. For instance, if you have 10 topi
 3. Create a materialized view that converts data from the engine and puts it into a previously created table.
 
 When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`.
+One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without).
 
 Example:
 

From de8a15b5f4ba440510c8262346d3ba27bba7f75c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 20:19:46 +0300
Subject: [PATCH 144/191] Removed unused method

---
 dbms/src/Interpreters/ExternalLoader.cpp | 18 ------------------
 dbms/src/Interpreters/ExternalLoader.h   |  1 -
 2 files changed, 19 deletions(-)

diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp
index 658f17b531d..da40bdfbb5b 100644
--- a/dbms/src/Interpreters/ExternalLoader.cpp
+++ b/dbms/src/Interpreters/ExternalLoader.cpp
@@ -343,19 +343,6 @@ public:
         enable_async_loading = enable;
     }
 
-    /// Returns the names of all the objects in the configuration (loaded or not).
-    std::vector<String> getNames() const
-    {
-        std::lock_guard lock{mutex};
-        std::vector<String> all_names;
-        for (const auto & name_and_info : infos)
-        {
-            const String & name = name_and_info.first;
-            all_names.emplace_back(name);
-        }
-        return all_names;
-    }
-
     size_t getNumberOfNames() const
     {
         std::lock_guard lock{mutex};
@@ -1008,11 +995,6 @@ void ExternalLoader::enablePeriodicUpdates(bool enable_, const ExternalLoaderUpd
     periodic_updater->enable(enable_, settings_);
 }
 
-std::vector<String> ExternalLoader::getNames() const
-{
-    return loading_dispatcher->getNames();
-}
-
 size_t ExternalLoader::getNumberOfNames() const
 {
     return loading_dispatcher->getNumberOfNames();
diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h
index 8fe565c7667..d14506371e6 100644
--- a/dbms/src/Interpreters/ExternalLoader.h
+++ b/dbms/src/Interpreters/ExternalLoader.h
@@ -108,7 +108,6 @@ public:
     void enablePeriodicUpdates(bool enable, const ExternalLoaderUpdateSettings & settings = {});
 
     /// Returns the names of all the objects in the configuration (loaded or not).
-    std::vector<String> getNames() const;
     size_t getNumberOfNames() const;
 
     /// Returns the status of the object.

From 90898905e0644352fedee8a87016c0ea17f12bc3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 20:27:32 +0300
Subject: [PATCH 145/191] Allow to DROP database with Dictionary engine

---
 dbms/src/Databases/DatabaseDictionary.cpp     |  2 +-
 dbms/src/Interpreters/ExternalLoader.cpp      | 19 +++++++++++--------
 dbms/src/Interpreters/ExternalLoader.h        |  6 +++---
 .../00080_show_tables_and_system_tables.sql   |  2 +-
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp
index 01aa397148f..b11f4de88b8 100644
--- a/dbms/src/Databases/DatabaseDictionary.cpp
+++ b/dbms/src/Databases/DatabaseDictionary.cpp
@@ -86,7 +86,7 @@ DatabaseIteratorPtr DatabaseDictionary::getIterator(const Context & context, con
 
 bool DatabaseDictionary::empty(const Context & context) const
 {
-    return context.getExternalDictionaries().getNumberOfNames() == 0;
+    return !context.getExternalDictionaries().hasCurrentlyLoadedObjects();
 }
 
 StoragePtr DatabaseDictionary::detachTable(const String & /*table_name*/)
diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp
index da40bdfbb5b..1bccad41b7a 100644
--- a/dbms/src/Interpreters/ExternalLoader.cpp
+++ b/dbms/src/Interpreters/ExternalLoader.cpp
@@ -343,12 +343,6 @@ public:
         enable_async_loading = enable;
     }
 
-    size_t getNumberOfNames() const
-    {
-        std::lock_guard lock{mutex};
-        return infos.size();
-    }
-
     /// Returns the status of the object.
     /// If the object has not been loaded yet then the function returns Status::NOT_LOADED.
     /// If the specified name isn't found in the configuration then the function returns Status::NOT_EXIST.
@@ -406,6 +400,15 @@ public:
         return count;
     }
 
+    bool hasCurrentlyLoadedObjects() const
+    {
+        std::lock_guard lock{mutex};
+        for (auto & [name, info] : infos)
+            if (info.loaded())
+                return true;
+        return false;
+    }
+
     /// Starts loading of a specified object.
     void load(const String & name)
     {
@@ -995,9 +998,9 @@ void ExternalLoader::enablePeriodicUpdates(bool enable_, const ExternalLoaderUpd
     periodic_updater->enable(enable_, settings_);
 }
 
-size_t ExternalLoader::getNumberOfNames() const
+bool ExternalLoader::hasCurrentlyLoadedObjects() const
 {
-    return loading_dispatcher->getNumberOfNames();
+    return loading_dispatcher->hasCurrentlyLoadedObjects();
 }
 
 ExternalLoader::Status ExternalLoader::getCurrentStatus(const String & name) const
diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h
index d14506371e6..da999bfe21a 100644
--- a/dbms/src/Interpreters/ExternalLoader.h
+++ b/dbms/src/Interpreters/ExternalLoader.h
@@ -107,9 +107,6 @@ public:
     /// Sets settings for periodic updates.
     void enablePeriodicUpdates(bool enable, const ExternalLoaderUpdateSettings & settings = {});
 
-    /// Returns the names of all the objects in the configuration (loaded or not).
-    size_t getNumberOfNames() const;
-
     /// Returns the status of the object.
     /// If the object has not been loaded yet then the function returns Status::NOT_LOADED.
     /// If the specified name isn't found in the configuration then the function returns Status::NOT_EXIST.
@@ -132,6 +129,9 @@ public:
     Loadables getCurrentlyLoadedObjects(const FilterByNameFunction & filter_by_name) const;
     size_t getNumberOfCurrentlyLoadedObjects() const;
 
+    /// Returns true if any object was loaded.
+    bool hasCurrentlyLoadedObjects() const;
+
     static constexpr Duration NO_TIMEOUT = Duration::max();
 
     /// Starts loading of a specified object.
diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
index d3295f086e8..a6261b69967 100644
--- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
+++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
@@ -30,5 +30,5 @@ CREATE DATABASE test_DatabaseDictionary ENGINE = Dictionary;
 
 SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables;
 
-DROP DATABASE test_DatabaseDictionary; -- { serverError 48 }
+DROP DATABASE test_DatabaseDictionary;
 DROP DATABASE test_DatabaseMemory;

From 1d3658662a14bd15839176c85e1ce672c384a5f7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 21:30:53 +0300
Subject: [PATCH 146/191] Updated test

---
 .../0_stateless/00957_format_with_clashed_aliases.reference     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
index c97c2d66b51..b1ce10e8b07 100644
--- a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
+++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
@@ -1,7 +1,7 @@
 SELECT 
     1 AS x, 
     x.y
-FROM 
+FROM
 (
     SELECT 'Hello, world' AS y
 ) AS x 

From 16cfce7ab3621c701be5825df3e5003cfd81a842 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 21:32:07 +0300
Subject: [PATCH 147/191] Updated test

---
 .../0_stateless/00916_create_or_replace_view.reference        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference b/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference
index a0313be86ff..30d14bf1e41 100644
--- a/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference
+++ b/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference
@@ -1,2 +1,2 @@
-CREATE VIEW default.t (`number` UInt64) AS SELECT number FROM system.numbers 
-CREATE VIEW default.t (`next_number` UInt64) AS SELECT number + 1 AS next_number FROM system.numbers 
+CREATE VIEW default.t (`number` UInt64) AS SELECT number FROM system.numbers
+CREATE VIEW default.t (`next_number` UInt64) AS SELECT number + 1 AS next_number FROM system.numbers

From 22bb0b5ca73e35c9d8600dcafb3385387cb64caa Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 21:32:48 +0300
Subject: [PATCH 148/191] Updated test

---
 dbms/tests/queries/0_stateless/00908_analyze_query.reference | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/queries/0_stateless/00908_analyze_query.reference b/dbms/tests/queries/0_stateless/00908_analyze_query.reference
index a10c36ca4dd..a8619cfcd4b 100644
--- a/dbms/tests/queries/0_stateless/00908_analyze_query.reference
+++ b/dbms/tests/queries/0_stateless/00908_analyze_query.reference
@@ -1 +1 @@
-SELECT \n    a, \n    b\nFROM a 
+SELECT \n    a, \n    b\nFROM a

From 3adfbff78c60329888801c44ed94c4b176807891 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 21:33:56 +0300
Subject: [PATCH 149/191] Updated test

---
 .../00849_multiple_comma_join.reference       | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference
index 6a3ccd22249..868f3cecaae 100644
--- a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference
+++ b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference
@@ -1,17 +1,17 @@
-SELECT a\nFROM t1_00849 \nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00849 \n) AS t2_00849 
-SELECT a\nFROM t1_00849 \nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00849 \n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a
-SELECT a\nFROM t1_00849 \nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00849 \n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b, \n        t2_00849.a AS `--t2_00849.a`, \n        t2_00849.b\n    FROM t1_00849 \n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849 \n    ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n    WHERE `--t1_00849.a` = `--t2_00849.a`\n) \nALL INNER JOIN \n(\n    SELECT *\n    FROM t3_00849 \n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b AS `--t1_00849.b`, \n        t2_00849.a, \n        t2_00849.b AS `--t2_00849.b`\n    FROM t1_00849 \n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849 \n    ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n    WHERE `--t1_00849.b` = `--t2_00849.b`\n) \nALL INNER JOIN \n(\n    SELECT *\n    FROM t3_00849 \n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849 \n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849 \n        ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n        WHERE `--t1_00849.a` = `--t2_00849.a`\n    ) \n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849 \n    ) AS t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n    WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n) \nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849 \n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        `--t1_00849.b`, \n        `t2_00849.a`, \n        `--t2_00849.b`, \n        a, \n        b AS `--t3_00849.b`\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b AS `--t1_00849.b`, \n            t2_00849.a, \n            t2_00849.b AS `--t2_00849.b`\n        FROM t1_00849 \n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849 \n        ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n        WHERE `--t1_00849.b` = `--t2_00849.b`\n    ) \n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849 \n    ) AS t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n    WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n) \nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849 \n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849 \n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849 \n        ) AS t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n        WHERE `--t2_00849.a` = `--t1_00849.a`\n    ) \n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849 \n    ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n    WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n) \nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849 \n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849 \n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849 \n        ) AS t2_00849 \n    ) \n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849 \n    ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n    WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n) \nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849 \n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849 \n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849 \n        ) AS t2_00849 \n    ) \n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849 \n    ) AS t3_00849 \n) \nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849 \n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849 \n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849 \n        ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n        WHERE `--t1_00849.a` = `--t2_00849.a`\n    ) \n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849 \n    ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n    WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n) \nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849 \n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `t2_00849.a`, \n        `t2_00849.b`, \n        a, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a, \n            t2_00849.b\n        FROM t1_00849 \n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849 \n        ) AS t2_00849 \n    ) \n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849 \n    ) AS t3_00849 \n) \nCROSS JOIN \n(\n    SELECT *\n    FROM t4_00849 \n) AS t4_00849 
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `t2_00849.a`, \n        `t2_00849.b`, \n        a, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a, \n            t2_00849.b\n        FROM t1_00849 \n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849 \n        ) AS t2_00849 \n    ) \n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849 \n    ) AS t3_00849 \n) \nCROSS JOIN \n(\n    SELECT *\n    FROM t4_00849 \n) AS t4_00849 
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b, \n        t2_00849.a AS `--t2_00849.a`, \n        t2_00849.b\n    FROM t1_00849 \n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849 \n    ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n) \nCROSS JOIN \n(\n    SELECT *\n    FROM t3_00849 \n) AS t3_00849 
+SELECT a\nFROM t1_00849\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00849\n) AS t2_00849
+SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00849\n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a
+SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00849\n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b, \n        t2_00849.a AS `--t2_00849.a`, \n        t2_00849.b\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n    WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b AS `--t1_00849.b`, \n        t2_00849.a, \n        t2_00849.b AS `--t2_00849.b`\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON (b AS `--t1_00849.b`) = (t2_00849.b AS `--t2_00849.b`)\n    WHERE `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n        WHERE `--t1_00849.a` = `--t2_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t1_00849.a` = (a AS `--t3_00849.a`)\n    WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        `--t1_00849.b`, \n        `t2_00849.a`, \n        `--t2_00849.b`, \n        a, \n        b AS `--t3_00849.b`\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b AS `--t1_00849.b`, \n            t2_00849.a, \n            t2_00849.b AS `--t2_00849.b`\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON (b AS `--t1_00849.b`) = (t2_00849.b AS `--t2_00849.b`)\n        WHERE `--t1_00849.b` = `--t2_00849.b`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t1_00849.b` = (b AS `--t3_00849.b`)\n    WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON (t2_00849.a AS `--t2_00849.a`) = (a AS `--t1_00849.a`)\n        WHERE `--t2_00849.a` = `--t1_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t2_00849.a` = (a AS `--t3_00849.a`)\n    WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON ((a AS `--t3_00849.a`) = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n    WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n        WHERE `--t1_00849.a` = `--t2_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t2_00849.a` = (a AS `--t3_00849.a`)\n    WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `t2_00849.a`, \n        `t2_00849.b`, \n        a, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849\n)\nCROSS JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `t2_00849.a`, \n        `t2_00849.b`, \n        a, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849\n)\nCROSS JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b, \n        t2_00849.a AS `--t2_00849.a`, \n        t2_00849.b\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n)\nCROSS JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849
 SELECT * FROM t1, t2
 1	1	1	1
 1	1	1	\N

From b8e6cd0311625671c85c0513d136f4e8c1dd914f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 21:35:23 +0300
Subject: [PATCH 150/191] Updated test

---
 .../0_stateless/00751_default_databasename_for_view.reference   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference
index 35217410c2d..e45dde1921e 100644
--- a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference
+++ b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference
@@ -1,4 +1,4 @@
-CREATE MATERIALIZED VIEW test.t_mv_00751 (`date` Date, `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t_00751  WHERE (app = (SELECT min(app) FROM test.u_00751 )) AND (platform = (SELECT (SELECT min(platform) FROM test.v_00751 )))
+CREATE MATERIALIZED VIEW test.t_mv_00751 (`date` Date, `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t_00751 WHERE (app = (SELECT min(app) FROM test.u_00751)) AND (platform = (SELECT (SELECT min(platform) FROM test.v_00751)))
 2000-01-01	a	a
 2000-01-02	b	b
 2000-01-03	a	a

From d29ab639d968d36f75dc9dd5cfa99b4c0333f880 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 21:36:34 +0300
Subject: [PATCH 151/191] Updated test

---
 .../00597_push_down_predicate.reference       | 58 +++++++++----------
 .../00599_create_view_with_subquery.reference |  2 +-
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
index ee84060db57..4e1cc35bd62 100644
--- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -4,59 +4,59 @@
 1
 2000-01-01	1	test string 1	1
 -------Forbid push down-------
-SELECT count()\nFROM \n(\n    SELECT \n        [number] AS a, \n        [number * 2] AS b\n    FROM system.numbers \n    LIMIT 1\n) AS t \nARRAY JOIN \n    a, \n    b\nWHERE NOT ignore(a + b)
+SELECT count()\nFROM \n(\n    SELECT \n        [number] AS a, \n        [number * 2] AS b\n    FROM system.numbers\n    LIMIT 1\n) AS t\nARRAY JOIN \n    a, \n    b\nWHERE NOT ignore(a + b)
 1
-SELECT \n    a, \n    b\nFROM \n(\n    SELECT 1 AS a\n) \nANY LEFT JOIN \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) USING (a)\nWHERE b = 0
-SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) \nANY RIGHT JOIN \n(\n    SELECT 1 AS a\n) USING (a)\nWHERE b = 0
-SELECT \n    a, \n    b\nFROM \n(\n    SELECT 1 AS a\n) \nANY FULL OUTER JOIN \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) USING (a)\nWHERE b = 0
-SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) \nANY FULL OUTER JOIN \n(\n    SELECT 1 AS a\n) USING (a)\nWHERE b = 0
+SELECT \n    a, \n    b\nFROM \n(\n    SELECT 1 AS a\n)\nANY LEFT JOIN \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) USING (a)\nWHERE b = 0
+SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b\n)\nANY RIGHT JOIN \n(\n    SELECT 1 AS a\n) USING (a)\nWHERE b = 0
+SELECT \n    a, \n    b\nFROM \n(\n    SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) USING (a)\nWHERE b = 0
+SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b\n)\nANY FULL OUTER JOIN \n(\n    SELECT 1 AS a\n) USING (a)\nWHERE b = 0
 -------Need push down-------
-SELECT toString(value) AS value\nFROM \n(\n    SELECT 1 AS value\n    WHERE toString(value) = \'1\'\n) \nWHERE value = \'1\'
+SELECT toString(value) AS value\nFROM \n(\n    SELECT 1 AS value\n    WHERE toString(value) = \'1\'\n)\nWHERE (toString(value) AS value) = \'1\'
 1
-SELECT id\nFROM \n(\n    SELECT 1 AS id\n    WHERE id = 1\n    UNION ALL\n    SELECT 2 AS `2`\n    WHERE `2` = 1\n) \nWHERE id = 1
+SELECT id\nFROM \n(\n    SELECT 1 AS id\n    WHERE (1 AS id) = 1\n    UNION ALL\n    SELECT 2 AS `2`\n    WHERE (2 AS `2`) = 1\n)\nWHERE id = 1
 1
-SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n) \nWHERE id = 1
+SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE (arrayJoin([1, 2, 3]) AS id) = 1\n)\nWHERE id = 1
 1
-SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n) \nWHERE id = 1
+SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE (arrayJoin([1, 2, 3]) AS id) = 1\n)\nWHERE id = 1
 1
-SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n    WHERE subquery = 1\n) \nWHERE subquery = 1
+SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n    WHERE (CAST(1, \'UInt8\') AS subquery) = 1\n)\nWHERE subquery = 1
 1	1
-SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597 \n    HAVING a = 3\n) \nWHERE a = 3
+SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597\n    HAVING (toUInt64(b) AS a) = 3\n)\nWHERE a = 3
 3	3
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        name, \n        value, \n        min(id) AS id\n    FROM test_00597 \n    GROUP BY \n        date, \n        name, \n        value\n    HAVING id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        name, \n        value, \n        min(id) AS id\n    FROM test_00597\n    GROUP BY \n        date, \n        name, \n        value\n    HAVING (min(id) AS id) = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597 AS table_alias \n    HAVING b = 3\n) AS outer_table_alias \nWHERE b = 3
+SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597 AS table_alias\n    HAVING (sum(id) AS b) = 3\n) AS outer_table_alias\nWHERE b = 3
 3	3
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597 \n    WHERE id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597 \n        WHERE id = 1\n    ) \n    WHERE id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597\n        WHERE id = 1\n    )\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597 \n        WHERE id = 1\n    ) AS b \n    WHERE id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597\n        WHERE id = 1\n    ) AS b\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597 \n    WHERE id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597 \n        WHERE id = 1\n    ) \n    WHERE id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597\n        WHERE id = 1\n    )\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597 \n    WHERE id = 1\n) AS b \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597\n    WHERE id = 1\n) AS b\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597 \n        WHERE id = 1\n    ) AS a \n    WHERE id = 1\n) AS b \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597\n        WHERE id = 1\n    ) AS a\n    WHERE id = 1\n) AS b\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    id, \n    date, \n    value\nFROM \n(\n    SELECT \n        id, \n        date, \n        min(value) AS value\n    FROM test_00597 \n    WHERE id = 1\n    GROUP BY \n        id, \n        date\n) \nWHERE id = 1
+SELECT \n    id, \n    date, \n    value\nFROM \n(\n    SELECT \n        id, \n        date, \n        min(value) AS value\n    FROM test_00597\n    WHERE id = 1\n    GROUP BY \n        id, \n        date\n)\nWHERE id = 1
 1	2000-01-01	1
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597 \n    WHERE id = 1\n    UNION ALL\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597 \n    WHERE id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597\n    WHERE id = 1\n    UNION ALL\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
 2000-01-01	1	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value, \n    date, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597 \n    WHERE id = 1\n) \nANY LEFT JOIN \n(\n    SELECT *\n    FROM test_00597 \n) USING (id)\nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value, \n    date, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597\n    WHERE id = 1\n)\nANY LEFT JOIN \n(\n    SELECT *\n    FROM test_00597\n) USING (id)\nWHERE id = 1
 2000-01-01	1	test string 1	1	2000-01-01	test string 1	1
-SELECT \n    id, \n    date, \n    name, \n    value\nFROM \n(\n    SELECT toInt8(1) AS id\n) \nANY LEFT JOIN \n(\n    SELECT *\n    FROM test_00597 \n) AS test_00597 USING (id)\nWHERE value = 1
+SELECT \n    id, \n    date, \n    name, \n    value\nFROM \n(\n    SELECT toInt8(1) AS id\n)\nANY LEFT JOIN \n(\n    SELECT *\n    FROM test_00597\n) AS test_00597 USING (id)\nWHERE value = 1
 1	2000-01-01	test string 1	1
-SELECT value\nFROM \n(\n    SELECT toInt8(1) AS id\n) \nANY LEFT JOIN test_00597 AS b USING (id)\nWHERE value = 1
+SELECT value\nFROM \n(\n    SELECT toInt8(1) AS id\n)\nANY LEFT JOIN test_00597 AS b USING (id)\nWHERE value = 1
 1
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value, \n        date, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597 \n        WHERE id = 1\n    ) \n    ANY LEFT JOIN \n    (\n        SELECT *\n        FROM test_00597 \n    ) USING (id)\n    WHERE id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value, \n        date, \n        name, \n        value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597\n        WHERE id = 1\n    )\n    ANY LEFT JOIN \n    (\n        SELECT *\n        FROM test_00597\n    ) USING (id)\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value, \n    b.date, \n    b.name, \n    b.value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597 \n) \nANY LEFT JOIN \n(\n    SELECT *\n    FROM test_00597 \n) AS b USING (id)\nWHERE b.id = 1
+SELECT \n    date, \n    id, \n    name, \n    value, \n    b.date, \n    b.name, \n    b.value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597\n)\nANY LEFT JOIN \n(\n    SELECT *\n    FROM test_00597\n) AS b USING (id)\nWHERE b.id = 1
 2000-01-01	1	test string 1	1	2000-01-01	test string 1	1
-SELECT \n    id, \n    date, \n    name, \n    value\nFROM \n(\n    SELECT \n        toInt8(1) AS id, \n        toDate(\'2000-01-01\') AS date\n    FROM system.numbers \n    LIMIT 1\n) \nANY LEFT JOIN \n(\n    SELECT *\n    FROM test_00597 \n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\')
+SELECT \n    id, \n    date, \n    name, \n    value\nFROM \n(\n    SELECT \n        toInt8(1) AS id, \n        toDate(\'2000-01-01\') AS date\n    FROM system.numbers\n    LIMIT 1\n)\nANY LEFT JOIN \n(\n    SELECT *\n    FROM test_00597\n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\')
 1	2000-01-01	test string 1	1
-SELECT \n    date, \n    id, \n    name, \n    value, \n    `b.date`, \n    `b.id`, \n    `b.name`, \n    `b.value`\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value, \n        b.date, \n        b.id, \n        b.name, \n        b.value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597 \n        WHERE id = 1\n    ) AS a \n    ANY LEFT JOIN \n    (\n        SELECT *\n        FROM test_00597 \n    ) AS b ON id = b.id\n    WHERE id = 1\n) \nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value, \n    `b.date`, \n    `b.id`, \n    `b.name`, \n    `b.value`\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value, \n        b.date, \n        b.id, \n        b.name, \n        b.value\n    FROM \n    (\n        SELECT \n            date, \n            id, \n            name, \n            value\n        FROM test_00597\n        WHERE id = 1\n    ) AS a\n    ANY LEFT JOIN \n    (\n        SELECT *\n        FROM test_00597\n    ) AS b ON id = b.id\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1	2000-01-01	1	test string 1	1
diff --git a/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference b/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference
index 311c1ed53a4..13e0f35b075 100644
--- a/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference
+++ b/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference
@@ -1 +1 @@
-CREATE VIEW default.test_view_00599 (`id` UInt64) AS SELECT * FROM default.test_00599  WHERE id = (SELECT 1)
+CREATE VIEW default.test_view_00599 (`id` UInt64) AS SELECT * FROM default.test_00599 WHERE id = (SELECT 1)

From c7e70df5fc07f80f3a0f103cb012c61b97e9a863 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 21:59:07 +0300
Subject: [PATCH 152/191] Fixed error with formatting aliases

---
 dbms/src/Parsers/ASTWithAlias.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp
index 916d8c7346c..e793e7264fb 100644
--- a/dbms/src/Parsers/ASTWithAlias.cpp
+++ b/dbms/src/Parsers/ASTWithAlias.cpp
@@ -19,12 +19,6 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
     /// We will compare formatting result with previously formatted nodes.
     std::stringstream temporary_buffer;
     FormatSettings temporary_settings(temporary_buffer, settings);
-
-    /// If there is an alias, then parentheses are required around the entire expression, including the alias.
-    /// Because a record of the form `0 AS x + 0` is syntactically invalid.
-    if (frame.need_parens && !alias.empty())
-        temporary_buffer << '(';
-
     formatImplWithoutAlias(temporary_settings, state, frame);
 
     /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias.
@@ -35,6 +29,11 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
     }
     else
     {
+        /// If there is an alias, then parentheses are required around the entire expression, including the alias.
+        /// Because a record of the form `0 AS x + 0` is syntactically invalid.
+        if (frame.need_parens && !alias.empty())
+            settings.ostr << '(';
+
         settings.ostr << temporary_buffer.rdbuf();
 
         if (!alias.empty())

From e2431a571f4f115cc8e42270ba51e583f05b8b91 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 23:05:11 +0300
Subject: [PATCH 153/191] Updated test

---
 .../00849_multiple_comma_join.reference          | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference
index 868f3cecaae..e1256053739 100644
--- a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference
+++ b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference
@@ -1,17 +1,17 @@
 SELECT a\nFROM t1_00849\nCROSS JOIN \n(\n    SELECT *\n    FROM t2_00849\n) AS t2_00849
 SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00849\n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a
 SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n    SELECT *\n    FROM t2_00849\n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b, \n        t2_00849.a AS `--t2_00849.a`, \n        t2_00849.b\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n    WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b AS `--t1_00849.b`, \n        t2_00849.a, \n        t2_00849.b AS `--t2_00849.b`\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON (b AS `--t1_00849.b`) = (t2_00849.b AS `--t2_00849.b`)\n    WHERE `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n        WHERE `--t1_00849.a` = `--t2_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t1_00849.a` = (a AS `--t3_00849.a`)\n    WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        `--t1_00849.b`, \n        `t2_00849.a`, \n        `--t2_00849.b`, \n        a, \n        b AS `--t3_00849.b`\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b AS `--t1_00849.b`, \n            t2_00849.a, \n            t2_00849.b AS `--t2_00849.b`\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON (b AS `--t1_00849.b`) = (t2_00849.b AS `--t2_00849.b`)\n        WHERE `--t1_00849.b` = `--t2_00849.b`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t1_00849.b` = (b AS `--t3_00849.b`)\n    WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON (t2_00849.a AS `--t2_00849.a`) = (a AS `--t1_00849.a`)\n        WHERE `--t2_00849.a` = `--t1_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t2_00849.a` = (a AS `--t3_00849.a`)\n    WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON ((a AS `--t3_00849.a`) = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n    WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b, \n        t2_00849.a AS `--t2_00849.a`, \n        t2_00849.b\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n    WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b AS `--t1_00849.b`, \n        t2_00849.a, \n        t2_00849.b AS `--t2_00849.b`\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n    WHERE `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n        WHERE `--t1_00849.a` = `--t2_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n    WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        `--t1_00849.b`, \n        `t2_00849.a`, \n        `--t2_00849.b`, \n        a, \n        b AS `--t3_00849.b`\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b AS `--t1_00849.b`, \n            t2_00849.a, \n            t2_00849.b AS `--t2_00849.b`\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n        WHERE `--t1_00849.b` = `--t2_00849.b`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n    WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n        WHERE `--t2_00849.a` = `--t1_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n    WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n    WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a)
 SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n        WHERE `--t1_00849.a` = `--t2_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t2_00849.a` = (a AS `--t3_00849.a`)\n    WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a)
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `--t2_00849.a`, \n        `t2_00849.b`, \n        a AS `--t3_00849.a`, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a AS `--t2_00849.a`, \n            t2_00849.b\n        FROM t1_00849\n        ALL INNER JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n        WHERE `--t1_00849.a` = `--t2_00849.a`\n    )\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n    WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a)
 SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `t2_00849.a`, \n        `t2_00849.b`, \n        a, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849\n)\nCROSS JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849
 SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        `--t1_00849.a`, \n        b, \n        `t2_00849.a`, \n        `t2_00849.b`, \n        a, \n        t3_00849.b\n    FROM \n    (\n        SELECT \n            a AS `--t1_00849.a`, \n            b, \n            t2_00849.a, \n            t2_00849.b\n        FROM t1_00849\n        CROSS JOIN \n        (\n            SELECT *\n            FROM t2_00849\n        ) AS t2_00849\n    )\n    CROSS JOIN \n    (\n        SELECT *\n        FROM t3_00849\n    ) AS t3_00849\n)\nCROSS JOIN \n(\n    SELECT *\n    FROM t4_00849\n) AS t4_00849
-SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b, \n        t2_00849.a AS `--t2_00849.a`, \n        t2_00849.b\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n)\nCROSS JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849
+SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n    SELECT \n        a AS `--t1_00849.a`, \n        b, \n        t2_00849.a AS `--t2_00849.a`, \n        t2_00849.b\n    FROM t1_00849\n    ALL INNER JOIN \n    (\n        SELECT *\n        FROM t2_00849\n    ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n)\nCROSS JOIN \n(\n    SELECT *\n    FROM t3_00849\n) AS t3_00849
 SELECT * FROM t1, t2
 1	1	1	1
 1	1	1	\N

From 3fd3cc3ff410d4ef40cc6a59f15847094fc2453b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 23:05:41 +0300
Subject: [PATCH 154/191] Updated test

---
 .../0_stateless/00957_format_with_clashed_aliases.reference   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
index b1ce10e8b07..d3f7a9aa18b 100644
--- a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
+++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference
@@ -1,7 +1,7 @@
 SELECT 
     1 AS x, 
     x.y
-FROM
+FROM 
 (
     SELECT 'Hello, world' AS y
-) AS x 
+) AS x

From b78b000ec1a7ac7e151ae183aabe013627b71b41 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 23:06:53 +0300
Subject: [PATCH 155/191] Updated test

---
 .../00597_push_down_predicate.reference            | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
index 4e1cc35bd62..f1d76a3c0bd 100644
--- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -11,21 +11,21 @@ SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n    SELECT \n        1 AS a, \n        1 AS b\n) USING (a)\nWHERE b = 0
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        1 AS a, \n        1 AS b\n)\nANY FULL OUTER JOIN \n(\n    SELECT 1 AS a\n) USING (a)\nWHERE b = 0
 -------Need push down-------
-SELECT toString(value) AS value\nFROM \n(\n    SELECT 1 AS value\n    WHERE toString(value) = \'1\'\n)\nWHERE (toString(value) AS value) = \'1\'
+SELECT toString(value) AS value\nFROM \n(\n    SELECT 1 AS value\n    WHERE toString(value) = \'1\'\n)\nWHERE value = \'1\'
 1
-SELECT id\nFROM \n(\n    SELECT 1 AS id\n    WHERE (1 AS id) = 1\n    UNION ALL\n    SELECT 2 AS `2`\n    WHERE (2 AS `2`) = 1\n)\nWHERE id = 1
+SELECT id\nFROM \n(\n    SELECT 1 AS id\n    WHERE id = 1\n    UNION ALL\n    SELECT 2 AS `2`\n    WHERE `2` = 1\n)\nWHERE id = 1
 1
-SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE (arrayJoin([1, 2, 3]) AS id) = 1\n)\nWHERE id = 1
+SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n)\nWHERE id = 1
 1
-SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE (arrayJoin([1, 2, 3]) AS id) = 1\n)\nWHERE id = 1
+SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n)\nWHERE id = 1
 1
-SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n    WHERE (CAST(1, \'UInt8\') AS subquery) = 1\n)\nWHERE subquery = 1
+SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n    WHERE subquery = 1\n)\nWHERE subquery = 1
 1	1
 SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597\n    HAVING (toUInt64(b) AS a) = 3\n)\nWHERE a = 3
 3	3
-SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        name, \n        value, \n        min(id) AS id\n    FROM test_00597\n    GROUP BY \n        date, \n        name, \n        value\n    HAVING (min(id) AS id) = 1\n)\nWHERE id = 1
+SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        name, \n        value, \n        min(id) AS id\n    FROM test_00597\n    GROUP BY \n        date, \n        name, \n        value\n    HAVING id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1
-SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597 AS table_alias\n    HAVING (sum(id) AS b) = 3\n) AS outer_table_alias\nWHERE b = 3
+SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597 AS table_alias\n    HAVING b = 3\n) AS outer_table_alias\nWHERE b = 3
 3	3
 SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        id, \n        name, \n        value\n    FROM test_00597\n    WHERE id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1

From af1e3b97eadd10306fff2d5429c906d536f0cf45 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sat, 29 Jun 2019 23:08:57 +0300
Subject: [PATCH 156/191] Updated test

---
 .../0_stateless/00080_show_tables_and_system_tables.sql      | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
index a6261b69967..88facac19e1 100644
--- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
+++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql
@@ -24,11 +24,6 @@ DROP DATABASE IF EXISTS test_DatabaseMemory;
 CREATE DATABASE test_DatabaseMemory ENGINE = Memory;
 CREATE TABLE test_DatabaseMemory.A (A UInt8) ENGINE = Null;
 
--- Just in case
-DROP DATABASE IF EXISTS test_DatabaseDictionary;
-CREATE DATABASE test_DatabaseDictionary ENGINE = Dictionary;
-
 SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables;
 
-DROP DATABASE test_DatabaseDictionary;
 DROP DATABASE test_DatabaseMemory;

From 299607a301eac2f8fa6469d150cb59d40c034f78 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 01:37:46 +0300
Subject: [PATCH 157/191] ThreadPool: more informative error message if
 exception is thrown before we schedule a next thread #5305

---
 dbms/src/Common/ThreadPool.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp
index 6ed350240c6..91ec29dc188 100644
--- a/dbms/src/Common/ThreadPool.cpp
+++ b/dbms/src/Common/ThreadPool.cpp
@@ -30,10 +30,18 @@ template <typename Thread>
 template <typename ReturnType>
 ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, int priority, std::optional<uint64_t> wait_microseconds)
 {
-    auto on_error = []
+    auto on_error = [&]
     {
         if constexpr (std::is_same_v<ReturnType, void>)
+        {
+            if (first_exception)
+            {
+                std::exception_ptr exception;
+                std::swap(exception, first_exception);
+                std::rethrow_exception(exception);
+            }
             throw DB::Exception("Cannot schedule a task", DB::ErrorCodes::CANNOT_SCHEDULE_TASK);
+        }
         else
             return false;
     };

From b76d5a7cf074800337aa072ea04756e1d69d9c88 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 01:45:57 +0300
Subject: [PATCH 158/191] Added a test (not gtest though)

---
 dbms/src/Common/tests/CMakeLists.txt          |  3 +++
 .../tests/thread_pool_schedule_exception.cpp  | 25 +++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 dbms/src/Common/tests/thread_pool_schedule_exception.cpp

diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt
index 1c6c7e9f504..11e2d59660e 100644
--- a/dbms/src/Common/tests/CMakeLists.txt
+++ b/dbms/src/Common/tests/CMakeLists.txt
@@ -62,6 +62,9 @@ target_link_libraries (thread_pool_2 PRIVATE clickhouse_common_io)
 add_executable (thread_pool_3 thread_pool_3.cpp)
 target_link_libraries (thread_pool_3 PRIVATE clickhouse_common_io)
 
+add_executable (thread_pool_schedule_exception thread_pool_schedule_exception.cpp)
+target_link_libraries (thread_pool_schedule_exception PRIVATE clickhouse_common_io)
+
 add_executable (multi_version multi_version.cpp)
 target_link_libraries (multi_version PRIVATE clickhouse_common_io)
 add_check(multi_version)
diff --git a/dbms/src/Common/tests/thread_pool_schedule_exception.cpp b/dbms/src/Common/tests/thread_pool_schedule_exception.cpp
new file mode 100644
index 00000000000..8f4b84ff180
--- /dev/null
+++ b/dbms/src/Common/tests/thread_pool_schedule_exception.cpp
@@ -0,0 +1,25 @@
+#include <iostream>
+#include <stdexcept>
+#include <Common/ThreadPool.h>
+
+
+int main(int, char **)
+{
+    ThreadPool pool(10);
+
+    pool.schedule([]{ throw std::runtime_error("Hello, world!"); });
+
+    try
+    {
+        while (true)
+            pool.schedule([]{});    /// An exception will be rethrown from this method.
+    }
+    catch (const std::runtime_error & e)
+    {
+        std::cerr << e.what() << "\n";
+    }
+
+    pool.wait();
+
+    return 0;
+}

From f25b96bed268ec0ae679f136c940e1435f2dfb99 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 02:23:53 +0300
Subject: [PATCH 159/191] Moved a few tests to gtest

---
 dbms/src/Common/tests/CMakeLists.txt          | 15 ----
 dbms/src/Common/tests/gtest_shell_command.cpp | 72 +++++++++++++++++++
 ... => gtest_thread_pool_concurrent_wait.cpp} | 11 ++-
 .../Common/tests/gtest_thread_pool_limit.cpp  | 32 +++++++++
 ..._pool_2.cpp => gtest_thread_pool_loop.cpp} | 14 ++--
 .../gtest_thread_pool_schedule_exception.cpp  | 38 ++++++++++
 dbms/src/Common/tests/shell_command_test.cpp  | 63 ----------------
 dbms/src/Common/tests/thread_pool_3.cpp       | 27 -------
 .../tests/thread_pool_schedule_exception.cpp  | 25 -------
 9 files changed, 160 insertions(+), 137 deletions(-)
 create mode 100644 dbms/src/Common/tests/gtest_shell_command.cpp
 rename dbms/src/Common/tests/{thread_pool.cpp => gtest_thread_pool_concurrent_wait.cpp} (73%)
 create mode 100644 dbms/src/Common/tests/gtest_thread_pool_limit.cpp
 rename dbms/src/Common/tests/{thread_pool_2.cpp => gtest_thread_pool_loop.cpp} (50%)
 create mode 100644 dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp
 delete mode 100644 dbms/src/Common/tests/shell_command_test.cpp
 delete mode 100644 dbms/src/Common/tests/thread_pool_3.cpp
 delete mode 100644 dbms/src/Common/tests/thread_pool_schedule_exception.cpp

diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt
index 11e2d59660e..23b1614e704 100644
--- a/dbms/src/Common/tests/CMakeLists.txt
+++ b/dbms/src/Common/tests/CMakeLists.txt
@@ -41,9 +41,6 @@ target_link_libraries (compact_array PRIVATE clickhouse_common_io ${Boost_FILESY
 add_executable (radix_sort radix_sort.cpp)
 target_link_libraries (radix_sort PRIVATE clickhouse_common_io)
 
-add_executable (shell_command_test shell_command_test.cpp)
-target_link_libraries (shell_command_test PRIVATE clickhouse_common_io)
-
 add_executable (arena_with_free_lists arena_with_free_lists.cpp)
 target_link_libraries (arena_with_free_lists PRIVATE clickhouse_compression clickhouse_common_io)
 
@@ -53,18 +50,6 @@ target_link_libraries (pod_array PRIVATE clickhouse_common_io)
 add_executable (thread_creation_latency thread_creation_latency.cpp)
 target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
 
-add_executable (thread_pool thread_pool.cpp)
-target_link_libraries (thread_pool PRIVATE clickhouse_common_io)
-
-add_executable (thread_pool_2 thread_pool_2.cpp)
-target_link_libraries (thread_pool_2 PRIVATE clickhouse_common_io)
-
-add_executable (thread_pool_3 thread_pool_3.cpp)
-target_link_libraries (thread_pool_3 PRIVATE clickhouse_common_io)
-
-add_executable (thread_pool_schedule_exception thread_pool_schedule_exception.cpp)
-target_link_libraries (thread_pool_schedule_exception PRIVATE clickhouse_common_io)
-
 add_executable (multi_version multi_version.cpp)
 target_link_libraries (multi_version PRIVATE clickhouse_common_io)
 add_check(multi_version)
diff --git a/dbms/src/Common/tests/gtest_shell_command.cpp b/dbms/src/Common/tests/gtest_shell_command.cpp
new file mode 100644
index 00000000000..2378cda2ee7
--- /dev/null
+++ b/dbms/src/Common/tests/gtest_shell_command.cpp
@@ -0,0 +1,72 @@
+#include <iostream>
+#include <Core/Types.h>
+#include <Common/ShellCommand.h>
+#include <IO/copyData.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadHelpers.h>
+
+#include <chrono>
+#include <thread>
+
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#ifdef __clang__
+    #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+    #pragma clang diagnostic ignored "-Wundef"
+#endif
+#include <gtest/gtest.h>
+
+
+using namespace DB;
+
+
+TEST(ShellCommand, Execute)
+{
+    auto command = ShellCommand::execute("echo 'Hello, world!'");
+
+    std::string res;
+    readStringUntilEOF(res, command->out);
+    command->wait();
+
+    EXPECT_EQ(res, "Hello, world!\n");
+}
+
+TEST(ShellCommand, ExecuteDirect)
+{
+    auto command = ShellCommand::executeDirect("/bin/echo", {"Hello, world!"});
+
+    std::string res;
+    readStringUntilEOF(res, command->out);
+    command->wait();
+
+    EXPECT_EQ(res, "Hello, world!\n");
+}
+
+TEST(ShellCommand, ExecuteWithInput)
+{
+    auto command = ShellCommand::execute("cat");
+
+    String in_str = "Hello, world!\n";
+    ReadBufferFromString in(in_str);
+    copyData(in, command->in);
+    command->in.close();
+
+    std::string res;
+    readStringUntilEOF(res, command->out);
+    command->wait();
+
+    EXPECT_EQ(res, "Hello, world!\n");
+}
+
+TEST(ShellCommand, AutoWait)
+{
+    // <defunct> hunting:
+    for (int i = 0; i < 1000; ++i)
+    {
+        auto command = ShellCommand::execute("echo " + std::to_string(i));
+        //command->wait(); // now automatic
+    }
+
+    // std::cerr << "inspect me: ps auxwwf" << "\n";
+    // std::this_thread::sleep_for(std::chrono::seconds(100));
+}
diff --git a/dbms/src/Common/tests/thread_pool.cpp b/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp
similarity index 73%
rename from dbms/src/Common/tests/thread_pool.cpp
rename to dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp
index 23dba2aadec..1e38e418a22 100644
--- a/dbms/src/Common/tests/thread_pool.cpp
+++ b/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp
@@ -1,11 +1,18 @@
 #include <Common/ThreadPool.h>
 
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#ifdef __clang__
+    #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+    #pragma clang diagnostic ignored "-Wundef"
+#endif
+#include <gtest/gtest.h>
+
 /** Reproduces bug in ThreadPool.
   * It get stuck if we call 'wait' many times from many other threads simultaneously.
   */
 
 
-int main(int, char **)
+TEST(ThreadPool, ConcurrentWait)
 {
     auto worker = []
     {
@@ -29,6 +36,4 @@ int main(int, char **)
         waiting_pool.schedule([&pool]{ pool.wait(); });
 
     waiting_pool.wait();
-
-    return 0;
 }
diff --git a/dbms/src/Common/tests/gtest_thread_pool_limit.cpp b/dbms/src/Common/tests/gtest_thread_pool_limit.cpp
new file mode 100644
index 00000000000..2bd38f34d10
--- /dev/null
+++ b/dbms/src/Common/tests/gtest_thread_pool_limit.cpp
@@ -0,0 +1,32 @@
+#include <atomic>
+#include <iostream>
+#include <Common/ThreadPool.h>
+
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#ifdef __clang__
+    #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+    #pragma clang diagnostic ignored "-Wundef"
+#endif
+#include <gtest/gtest.h>
+
+/// Test for thread self-removal when number of free threads in pool is too large.
+/// Just checks that nothing weird happens.
+
+template <typename Pool>
+int test()
+{
+    Pool pool(10, 2, 10);
+
+    std::atomic<int> counter{0};
+    for (size_t i = 0; i < 10; ++i)
+        pool.schedule([&]{ ++counter; });
+    pool.wait();
+
+    return counter;
+}
+
+TEST(ThreadPool, ThreadRemoval)
+{
+    EXPECT_EQ(test<FreeThreadPool>(), 10);
+    EXPECT_EQ(test<ThreadPool>(), 10);
+}
diff --git a/dbms/src/Common/tests/thread_pool_2.cpp b/dbms/src/Common/tests/gtest_thread_pool_loop.cpp
similarity index 50%
rename from dbms/src/Common/tests/thread_pool_2.cpp
rename to dbms/src/Common/tests/gtest_thread_pool_loop.cpp
index 029c3695e36..80b7b94d988 100644
--- a/dbms/src/Common/tests/thread_pool_2.cpp
+++ b/dbms/src/Common/tests/gtest_thread_pool_loop.cpp
@@ -2,10 +2,17 @@
 #include <iostream>
 #include <Common/ThreadPool.h>
 
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#ifdef __clang__
+    #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+    #pragma clang diagnostic ignored "-Wundef"
+#endif
+#include <gtest/gtest.h>
 
-int main(int, char **)
+
+TEST(ThreadPool, Loop)
 {
-    std::atomic<size_t> res{0};
+    std::atomic<int> res{0};
 
     for (size_t i = 0; i < 1000; ++i)
     {
@@ -16,6 +23,5 @@ int main(int, char **)
         pool.wait();
     }
 
-    std::cerr << res << "\n";
-    return 0;
+    EXPECT_EQ(res, 16000);
 }
diff --git a/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp b/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp
new file mode 100644
index 00000000000..001d9c30b27
--- /dev/null
+++ b/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp
@@ -0,0 +1,38 @@
+#include <iostream>
+#include <stdexcept>
+#include <Common/ThreadPool.h>
+
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#ifdef __clang__
+    #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+    #pragma clang diagnostic ignored "-Wundef"
+#endif
+#include <gtest/gtest.h>
+
+
+bool check()
+{
+    ThreadPool pool(10);
+
+    pool.schedule([]{ throw std::runtime_error("Hello, world!"); });
+
+    try
+    {
+        for (size_t i = 0; i < 100; ++i)
+            pool.schedule([]{});    /// An exception will be rethrown from this method.
+    }
+    catch (const std::runtime_error &)
+    {
+        return true;
+    }
+
+    pool.wait();
+
+    return false;
+}
+
+
+TEST(ThreadPool, ExceptionFromSchedule)
+{
+    EXPECT_TRUE(check());
+}
diff --git a/dbms/src/Common/tests/shell_command_test.cpp b/dbms/src/Common/tests/shell_command_test.cpp
deleted file mode 100644
index 7de6c18bfdf..00000000000
--- a/dbms/src/Common/tests/shell_command_test.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <iostream>
-#include <Core/Types.h>
-#include <Common/ShellCommand.h>
-#include <IO/copyData.h>
-#include <IO/WriteBufferFromFileDescriptor.h>
-#include <IO/ReadBufferFromString.h>
-
-#include <chrono>
-#include <thread>
-
-using namespace DB;
-
-
-int main(int, char **)
-try
-{
-    {
-        auto command = ShellCommand::execute("echo 'Hello, world!'");
-
-        WriteBufferFromFileDescriptor out(STDOUT_FILENO);
-        copyData(command->out, out);
-
-        command->wait();
-    }
-
-    {
-        auto command = ShellCommand::executeDirect("/bin/echo", {"Hello, world!"});
-
-        WriteBufferFromFileDescriptor out(STDOUT_FILENO);
-        copyData(command->out, out);
-
-        command->wait();
-    }
-
-    {
-        auto command = ShellCommand::execute("cat");
-
-        String in_str = "Hello, world!\n";
-        ReadBufferFromString in(in_str);
-        copyData(in, command->in);
-        command->in.close();
-
-        WriteBufferFromFileDescriptor out(STDOUT_FILENO);
-        copyData(command->out, out);
-
-        command->wait();
-    }
-
-    // <defunct> hunting:
-    for (int i = 0; i < 1000; ++i)
-    {
-        auto command = ShellCommand::execute("echo " + std::to_string(i));
-        //command->wait(); // now automatic
-    }
-
-    // std::cerr << "inspect me: ps auxwwf" << "\n";
-    // std::this_thread::sleep_for(std::chrono::seconds(100));
-}
-catch (...)
-{
-    std::cerr << getCurrentExceptionMessage(false) << "\n";
-    return 1;
-}
diff --git a/dbms/src/Common/tests/thread_pool_3.cpp b/dbms/src/Common/tests/thread_pool_3.cpp
deleted file mode 100644
index 924895de308..00000000000
--- a/dbms/src/Common/tests/thread_pool_3.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <mutex>
-#include <iostream>
-#include <Common/ThreadPool.h>
-
-/// Test for thread self-removal when number of free threads in pool is too large.
-/// Just checks that nothing weird happens.
-
-template <typename Pool>
-void test()
-{
-    Pool pool(10, 2, 10);
-
-    std::mutex mutex;
-    for (size_t i = 0; i < 10; ++i)
-        pool.schedule([&]{ std::lock_guard lock(mutex); std::cerr << '.'; });
-    pool.wait();
-}
-
-int main(int, char **)
-{
-    test<FreeThreadPool>();
-    std::cerr << '\n';
-    test<ThreadPool>();
-    std::cerr << '\n';
-
-    return 0;
-}
diff --git a/dbms/src/Common/tests/thread_pool_schedule_exception.cpp b/dbms/src/Common/tests/thread_pool_schedule_exception.cpp
deleted file mode 100644
index 8f4b84ff180..00000000000
--- a/dbms/src/Common/tests/thread_pool_schedule_exception.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <iostream>
-#include <stdexcept>
-#include <Common/ThreadPool.h>
-
-
-int main(int, char **)
-{
-    ThreadPool pool(10);
-
-    pool.schedule([]{ throw std::runtime_error("Hello, world!"); });
-
-    try
-    {
-        while (true)
-            pool.schedule([]{});    /// An exception will be rethrown from this method.
-    }
-    catch (const std::runtime_error & e)
-    {
-        std::cerr << e.what() << "\n";
-    }
-
-    pool.wait();
-
-    return 0;
-}

From fd6998951d7b1eafd9737ced8b4cc302128beace Mon Sep 17 00:00:00 2001
From: Vladimir Chebotarev <vladimir.chebotarev@gmail.com>
Date: Sun, 30 Jun 2019 03:35:48 +0300
Subject: [PATCH 160/191] Update Platform.cmake

---
 contrib/libhdfs3-cmake/CMake/Platform.cmake | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake
index ea00fa3f401..d9bc760ee3f 100644
--- a/contrib/libhdfs3-cmake/CMake/Platform.cmake
+++ b/contrib/libhdfs3-cmake/CMake/Platform.cmake
@@ -15,9 +15,14 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
     
     STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
     
+    LIST(LENGTH GCC_COMPILER_VERSION GCC_COMPILER_VERSION_LENGTH)
     LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
-    LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
-    
+    if (GCC_COMPILER_VERSION_LENGTH GREATER 1)
+        LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
+    else ()
+        set (GCC_COMPILER_VERSION_MINOR 0)
+    endif ()
+
     SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
     SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")
     

From 1a8695ffd940ff64f9009e56cf4ff8ac467b721e Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Sun, 30 Jun 2019 04:56:16 +0300
Subject: [PATCH 161/191] fix shutdown of system_logs

---
 dbms/src/Interpreters/Context.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp
index 0abf34c5170..3642418061a 100644
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -278,6 +278,7 @@ struct ContextShared
         /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
         /// TODO: Get rid of this.
 
+        system_logs.reset();
         embedded_dictionaries.reset();
         external_dictionaries.reset();
         external_models.reset();

From 6ad07172eea9b86898d2d489f526fd75dfecfe57 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 15:49:06 +0300
Subject: [PATCH 162/191] Fixed error in query formatting

---
 dbms/src/Parsers/ASTWithAlias.cpp | 9 ++-------
 dbms/src/Parsers/IAST.h           | 2 +-
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp
index e793e7264fb..0239d0b34cd 100644
--- a/dbms/src/Parsers/ASTWithAlias.cpp
+++ b/dbms/src/Parsers/ASTWithAlias.cpp
@@ -16,14 +16,9 @@ void ASTWithAlias::writeAlias(const String & name, const FormatSettings & settin
 
 void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
-    /// We will compare formatting result with previously formatted nodes.
-    std::stringstream temporary_buffer;
-    FormatSettings temporary_settings(temporary_buffer, settings);
-    formatImplWithoutAlias(temporary_settings, state, frame);
-
     /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias.
     /// This is needed because the query can become extraordinary large after substitution of aliases.
-    if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, temporary_buffer.str()).second)
+    if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash()).second)
     {
         settings.writeIdentifier(alias);
     }
@@ -34,7 +29,7 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
         if (frame.need_parens && !alias.empty())
             settings.ostr << '(';
 
-        settings.ostr << temporary_buffer.rdbuf();
+        formatImplWithoutAlias(settings, state, frame);
 
         if (!alias.empty())
         {
diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h
index 04656816133..a2aa9f2b23e 100644
--- a/dbms/src/Parsers/IAST.h
+++ b/dbms/src/Parsers/IAST.h
@@ -180,7 +180,7 @@ public:
         std::set<std::tuple<
             const IAST * /* SELECT query node */,
             std::string /* alias */,
-            std::string /* printed content */>> printed_asts_with_alias;
+            Hash /* printed content */>> printed_asts_with_alias;
     };
 
     /// The state that is copied when each node is formatted. For example, nesting level.

From a69990ce2741e7f7f129f0f2dd24614b48754570 Mon Sep 17 00:00:00 2001
From: proller <proller@users.noreply.github.com>
Date: Sun, 30 Jun 2019 16:17:27 +0300
Subject: [PATCH 163/191] CLICKHOUSE-4514 Unique query_id among all users
 (#5430)

* CLICKHOUSE-4514 Unique query_id among all users

* try 1

* Fix

* fix

* use condvar

* fix style

* Update ProcessList.cpp
---
 dbms/src/Interpreters/ProcessList.cpp         | 50 ++++++++++++-------
 dbms/src/Interpreters/ProcessList.h           |  2 +-
 .../00600_replace_running_query.reference     |  4 ++
 .../00600_replace_running_query.sh            | 13 +++++
 4 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp
index a4fe438af8f..def39d4d91c 100644
--- a/dbms/src/Interpreters/ProcessList.cpp
+++ b/dbms/src/Interpreters/ProcessList.cpp
@@ -87,10 +87,9 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as
     {
         std::unique_lock lock(mutex);
 
+        const auto max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds();
         if (!is_unlimited_query && max_size && processes.size() >= max_size)
         {
-            auto max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds();
-
             if (!max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(max_wait_ms), [&]{ return processes.size() < max_size; }))
                 throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES);
         }
@@ -117,20 +116,41 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as
                         + ", maximum: " + settings.max_concurrent_queries_for_user.toString(),
                         ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES);
 
-                auto range = user_process_list->second.queries.equal_range(client_info.current_query_id);
-                if (range.first != range.second)
+                auto running_query = user_process_list->second.queries.find(client_info.current_query_id);
+
+                if (running_query != user_process_list->second.queries.end())
                 {
                     if (!settings.replace_running_query)
                         throw Exception("Query with id = " + client_info.current_query_id + " is already running.",
                             ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING);
 
                     /// Ask queries to cancel. They will check this flag.
-                    for (auto it = range.first; it != range.second; ++it)
-                        it->second->is_killed.store(true, std::memory_order_relaxed);
-                }
+                    running_query->second->is_killed.store(true, std::memory_order_relaxed);
+
+                    if (!max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(max_wait_ms), [&]
+                        {
+                            running_query = user_process_list->second.queries.find(client_info.current_query_id);
+                            if (running_query == user_process_list->second.queries.end())
+                                return true;
+                            running_query->second->is_killed.store(true, std::memory_order_relaxed);
+                            return false;
+                        }))
+                        throw Exception("Query with id = " + client_info.current_query_id + " is already running and can't be stopped",
+                            ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING);
+                 }
             }
         }
 
+        /// Check other users running query with our query_id
+        for (const auto & user_process_list : user_to_queries)
+        {
+            if (user_process_list.first == client_info.current_user)
+                continue;
+            if (auto running_query = user_process_list.second.queries.find(client_info.current_query_id); running_query != user_process_list.second.queries.end())
+                throw Exception("Query with id = " + client_info.current_query_id + " is already running by user " + user_process_list.first,
+                    ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING);
+        }
+
         auto process_it = processes.emplace(processes.end(),
             query_, client_info, settings.max_memory_usage, settings.memory_tracker_fault_probability, priorities.insert(settings.priority));
 
@@ -226,17 +246,12 @@ ProcessListEntry::~ProcessListEntry()
 
     bool found = false;
 
-    auto range = user_process_list.queries.equal_range(query_id);
-    if (range.first != range.second)
+    if (auto running_query = user_process_list.queries.find(query_id); running_query != user_process_list.queries.end())
     {
-        for (auto jt = range.first; jt != range.second; ++jt)
+        if (running_query->second == process_list_element_ptr)
         {
-            if (jt->second == process_list_element_ptr)
-            {
-                user_process_list.queries.erase(jt);
-                found = true;
-                break;
-            }
+            user_process_list.queries.erase(running_query->first);
+            found = true;
         }
     }
 
@@ -245,8 +260,7 @@ ProcessListEntry::~ProcessListEntry()
         LOG_ERROR(&Logger::get("ProcessList"), "Logical error: cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser");
         std::terminate();
     }
-
-    parent.have_space.notify_one();
+    parent.have_space.notify_all();
 
     /// If there are no more queries for the user, then we will reset memory tracker and network throttler.
     if (user_process_list.queries.empty())
diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h
index 32f59749450..b75a4e7a730 100644
--- a/dbms/src/Interpreters/ProcessList.h
+++ b/dbms/src/Interpreters/ProcessList.h
@@ -203,7 +203,7 @@ struct ProcessListForUser
     ProcessListForUser();
 
     /// query_id -> ProcessListElement(s). There can be multiple queries with the same query_id as long as all queries except one are cancelled.
-    using QueryToElement = std::unordered_multimap<String, QueryStatus *>;
+    using QueryToElement = std::unordered_map<String, QueryStatus *>;
     QueryToElement queries;
 
     ProfileEvents::Counters user_performance_counters{VariableContext::User, &ProfileEvents::global_counters};
diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference
index 573541ac970..237dd6b5309 100644
--- a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference
+++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference
@@ -1 +1,5 @@
 0
+1	0
+3	0
+2	0
+44
diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh
index 6778bbce149..abe5dd69b8f 100755
--- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh
+++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh
@@ -9,3 +9,16 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d
 sleep 0.1 # First query (usually) should be received by the server after this sleep.
 $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT 0'
 wait
+
+${CLICKHOUSE_CLIENT} --user=readonly --query_id=42 --query='SELECT 1, sleep(1)' &
+sleep 0.1
+( ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' ||: ) 2>&1 | grep -F 'is already running by user' > /dev/null
+wait
+
+${CLICKHOUSE_CLIENT} --query='SELECT 3, sleep(1)' &
+sleep 0.1
+${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 2, sleep(1)' &
+sleep 0.1
+( ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' ||: ) 2>&1 | grep -F 'cant be stopped' > /dev/null
+${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44'
+wait

From 0bba515f8f82e59afeaac07cc92a05070f6d72fb Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 17:29:24 +0300
Subject: [PATCH 164/191] Updated test

---
 .../queries/0_stateless/00597_push_down_predicate.reference     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
index f1d76a3c0bd..c71e5c1cdd9 100644
--- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
+++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference
@@ -21,7 +21,7 @@ SELECT id\nFROM \n(\n    SELECT arrayJoin([1, 2, 3]) AS id\n    WHERE id = 1\n)\
 1
 SELECT \n    id, \n    subquery\nFROM \n(\n    SELECT \n        1 AS id, \n        CAST(1, \'UInt8\') AS subquery\n    WHERE subquery = 1\n)\nWHERE subquery = 1
 1	1
-SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597\n    HAVING (toUInt64(b) AS a) = 3\n)\nWHERE a = 3
+SELECT \n    a, \n    b\nFROM \n(\n    SELECT \n        toUInt64(sum(id) AS b) AS a, \n        b\n    FROM test_00597\n    HAVING a = 3\n)\nWHERE a = 3
 3	3
 SELECT \n    date, \n    id, \n    name, \n    value\nFROM \n(\n    SELECT \n        date, \n        name, \n        value, \n        min(id) AS id\n    FROM test_00597\n    GROUP BY \n        date, \n        name, \n        value\n    HAVING id = 1\n)\nWHERE id = 1
 2000-01-01	1	test string 1	1

From 34e82485b2111ac07baa8ad0117ae3bc1cf7ccf9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 19:30:06 +0300
Subject: [PATCH 165/191] Better check for OS in miscellaneous CI scripts

---
 ci/install-os-packages.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/install-os-packages.sh b/ci/install-os-packages.sh
index fe5b4f84833..38fa6dbba15 100755
--- a/ci/install-os-packages.sh
+++ b/ci/install-os-packages.sh
@@ -7,9 +7,9 @@ WHAT=$1
 
 [[ $EUID -ne 0 ]] && SUDO=sudo
 
-command -v apt-get && PACKAGE_MANAGER=apt
 command -v yum && PACKAGE_MANAGER=yum
 command -v pkg && PACKAGE_MANAGER=pkg
+command -v apt-get && PACKAGE_MANAGER=apt
 
 
 case $PACKAGE_MANAGER in

From 008f3a247e6c6e5729508b570083b7ee73b69e9f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 21:20:32 +0300
Subject: [PATCH 166/191] Merging H3 integration

---
 dbms/src/Functions/CMakeLists.txt             |   1 -
 dbms/src/Functions/geoToH3.cpp                |   2 +-
 dbms/src/Functions/geohashDecode.cpp          |  99 +++++++++
 dbms/src/Functions/geohashEncode.cpp          | 136 +++++++++++++
 dbms/src/Functions/greatCircleDistance.cpp    | 166 +++++++++++++++
 .../{FunctionsGeo.h => pointInEllipses.cpp}   | 152 +-------------
 .../{FunctionsGeo.cpp => pointInPolygon.cpp}  | 190 +-----------------
 dbms/src/Functions/registerFunctions.cpp      |  11 -
 dbms/src/Functions/registerFunctionsGeo.cpp   |  32 +++
 9 files changed, 444 insertions(+), 345 deletions(-)
 create mode 100644 dbms/src/Functions/geohashDecode.cpp
 create mode 100644 dbms/src/Functions/geohashEncode.cpp
 create mode 100644 dbms/src/Functions/greatCircleDistance.cpp
 rename dbms/src/Functions/{FunctionsGeo.h => pointInEllipses.cpp} (54%)
 rename dbms/src/Functions/{FunctionsGeo.cpp => pointInPolygon.cpp} (55%)
 create mode 100644 dbms/src/Functions/registerFunctionsGeo.cpp

diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt
index 75c01782aaf..a584bd14a7d 100644
--- a/dbms/src/Functions/CMakeLists.txt
+++ b/dbms/src/Functions/CMakeLists.txt
@@ -18,7 +18,6 @@ target_link_libraries(clickhouse_functions
         ${FARMHASH_LIBRARIES}
         ${METROHASH_LIBRARIES}
         murmurhash
-        m
         ${BASE64_LIBRARY}
 )
 
diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 41ca3cd31e2..65a94d1401d 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -173,7 +173,7 @@ public:
 
 void registerFunctionGeoToH3(FunctionFactory & factory)
 {
-    factory.registerFunction<FunctionGeoToH3>(FunctionFactory::CaseInsensitive);
+    factory.registerFunction<FunctionGeoToH3>();
 }
 
 }
diff --git a/dbms/src/Functions/geohashDecode.cpp b/dbms/src/Functions/geohashDecode.cpp
new file mode 100644
index 00000000000..866bc81bb07
--- /dev/null
+++ b/dbms/src/Functions/geohashDecode.cpp
@@ -0,0 +1,99 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/GeoUtils.h>
+#include <Functions/FunctionHelpers.h>
+
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnTuple.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <string>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+}
+
+
+// geohashDecode(string) => (lon float64, lat float64)
+class FunctionGeohashDecode : public IFunction
+{
+public:
+    static constexpr auto name = "geohashDecode";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeohashDecode>(); }
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    size_t getNumberOfArguments() const override { return 1; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string");
+
+        return std::make_shared<DataTypeTuple>(
+                DataTypes{std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeFloat64>()},
+                Strings{"longitude", "latitude"});
+    }
+
+    template <typename ColumnTypeEncoded>
+    bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column)
+    {
+        const auto * encoded = checkAndGetColumn<ColumnTypeEncoded>(encoded_column);
+        if (!encoded)
+            return false;
+
+        const size_t count = encoded->size();
+
+        auto latitude = ColumnFloat64::create(count);
+        auto longitude = ColumnFloat64::create(count);
+
+        ColumnFloat64::Container & lon_data = longitude->getData();
+        ColumnFloat64::Container & lat_data = latitude->getData();
+
+        for (size_t i = 0; i < count; ++i)
+        {
+            StringRef encoded_string = encoded->getDataAt(i);
+            GeoUtils::geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]);
+        }
+
+        MutableColumns result;
+        result.emplace_back(std::move(longitude));
+        result.emplace_back(std::move(latitude));
+        result_column = ColumnTuple::create(std::move(result));
+
+        return true;
+    }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
+    {
+        const IColumn * encoded = block.getByPosition(arguments[0]).column.get();
+        ColumnPtr & res_column = block.getByPosition(result).column;
+
+        if (tryExecute<ColumnString>(encoded, res_column) ||
+            tryExecute<ColumnFixedString>(encoded, res_column))
+            return;
+
+        throw Exception("Unsupported argument type:" + block.getByPosition(arguments[0]).column->getName()
+                        + " of argument of function " + getName(),
+                        ErrorCodes::ILLEGAL_COLUMN);
+    }
+};
+
+
+void registerFunctionsGeo(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionGeohashDecode>();
+}
+
+}
diff --git a/dbms/src/Functions/geohashEncode.cpp b/dbms/src/Functions/geohashEncode.cpp
new file mode 100644
index 00000000000..9079580aaa3
--- /dev/null
+++ b/dbms/src/Functions/geohashEncode.cpp
@@ -0,0 +1,136 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/GeoUtils.h>
+#include <Functions/FunctionHelpers.h>
+
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeString.h>
+
+#include <string>
+
+#define GEOHASH_MAX_TEXT_LENGTH 16
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int ILLEGAL_COLUMN;
+    extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
+}
+
+// geohashEncode(lon float32/64, lat float32/64, length UInt8) => string
+class FunctionGeohashEncode : public IFunction
+{
+public:
+    static constexpr auto name = "geohashEncode";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeohashEncode>(); }
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    bool isVariadic() const override { return true; }
+    size_t getNumberOfArguments() const override { return 0; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        validateArgumentType(*this, arguments, 0, isFloat, "float");
+        validateArgumentType(*this, arguments, 1, isFloat, "float");
+        if (arguments.size() == 3)
+        {
+            validateArgumentType(*this, arguments, 2, isInteger, "integer");
+        }
+        if (arguments.size() > 3)
+        {
+            throw Exception("Too many arguments for function " + getName() +
+                            " expected at most 3",
+                            ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION);
+        }
+
+        return std::make_shared<DataTypeString>();
+    }
+
+    template <typename LonType, typename LatType>
+    bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result)
+    {
+        const ColumnVector<LonType> * longitude = checkAndGetColumn<ColumnVector<LonType>>(lon_column);
+        const ColumnVector<LatType> * latitude = checkAndGetColumn<ColumnVector<LatType>>(lat_column);
+        if (!latitude || !longitude)
+            return false;
+
+        auto col_str = ColumnString::create();
+        ColumnString::Chars & out_vec = col_str->getChars();
+        ColumnString::Offsets & out_offsets = col_str->getOffsets();
+
+        const size_t size = lat_column->size();
+
+        out_offsets.resize(size);
+        out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1));
+
+        char * begin = reinterpret_cast<char *>(out_vec.data());
+        char * pos = begin;
+
+        for (size_t i = 0; i < size; ++i)
+        {
+            const Float64 longitude_value = longitude->getElement(i);
+            const Float64 latitude_value = latitude->getElement(i);
+
+            const size_t encoded_size = GeoUtils::geohashEncode(longitude_value, latitude_value, precision_value, pos);
+
+            pos += encoded_size;
+            *pos = '\0';
+            out_offsets[i] = ++pos - begin;
+        }
+        out_vec.resize(pos - begin);
+
+        if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
+            throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
+
+        result = std::move(col_str);
+
+        return true;
+
+    }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
+    {
+        const IColumn * longitude = block.getByPosition(arguments[0]).column.get();
+        const IColumn * latitude = block.getByPosition(arguments[1]).column.get();
+
+        const UInt64 precision_value = std::min<UInt64>(GEOHASH_MAX_TEXT_LENGTH,
+                arguments.size() == 3 ? block.getByPosition(arguments[2]).column->get64(0) : GEOHASH_MAX_TEXT_LENGTH);
+
+        ColumnPtr & res_column = block.getByPosition(result).column;
+
+        if (tryExecute<Float32, Float32>(longitude, latitude, precision_value, res_column) ||
+            tryExecute<Float64, Float32>(longitude, latitude, precision_value, res_column) ||
+            tryExecute<Float32, Float64>(longitude, latitude, precision_value, res_column) ||
+            tryExecute<Float64, Float64>(longitude, latitude, precision_value, res_column))
+            return;
+
+        std::string arguments_description;
+        for (size_t i = 0; i < arguments.size(); ++i)
+        {
+            if (i != 0)
+                arguments_description += ", ";
+            arguments_description += block.getByPosition(arguments[i]).column->getName();
+        }
+
+        throw Exception("Unsupported argument types: " + arguments_description +
+                        + " for function " + getName(),
+                        ErrorCodes::ILLEGAL_COLUMN);
+    }
+};
+
+
+void registerFunctionsGeohashEncode(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionGeohashEncode>();
+}
+
+}
diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp
new file mode 100644
index 00000000000..593334c6cfb
--- /dev/null
+++ b/dbms/src/Functions/greatCircleDistance.cpp
@@ -0,0 +1,166 @@
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnConst.h>
+#include <Common/typeid_cast.h>
+#include <Functions/IFunction.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionFactory.h>
+#include <ext/range.h>
+#include <math.h>
+#include <array>
+
+#define DEGREES_IN_RADIANS (M_PI / 180.0)
+#define EARTH_RADIUS_IN_METERS 6372797.560856
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int ILLEGAL_COLUMN;
+    extern const int LOGICAL_ERROR;
+}
+
+static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; }
+
+/**
+ *  The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees.
+ *  The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance.
+ *  Throws exception when one or several input values are not within reasonable bounds.
+ *  Latitude must be in [-90, 90], longitude must be [-180, 180]
+ *
+ */
+class FunctionGreatCircleDistance : public IFunction
+{
+public:
+
+    static constexpr auto name = "greatCircleDistance";
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionGreatCircleDistance>(); }
+
+private:
+
+    enum class instr_type : uint8_t
+    {
+        get_float_64,
+        get_const_float_64
+    };
+
+    using instr_t = std::pair<instr_type, const IColumn *>;
+    using instrs_t = std::array<instr_t, 4>;
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 4; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        for (const auto arg_idx : ext::range(0, arguments.size()))
+        {
+            const auto arg = arguments[arg_idx].get();
+            if (!WhichDataType(arg).isFloat64())
+                throw Exception(
+                    "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+
+        return std::make_shared<DataTypeFloat64>();
+    }
+
+    instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const)
+    {
+        instrs_t result;
+        out_const = true;
+
+        for (const auto arg_idx : ext::range(0, arguments.size()))
+        {
+            const auto column = block.getByPosition(arguments[arg_idx]).column.get();
+
+            if (const auto col = checkAndGetColumn<ColumnVector<Float64>>(column))
+            {
+                out_const = false;
+                result[arg_idx] = instr_t{instr_type::get_float_64, col};
+            }
+            else if (const auto col_const = checkAndGetColumnConst<ColumnVector<Float64>>(column))
+            {
+                result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const};
+            }
+            else
+                throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(),
+                    ErrorCodes::ILLEGAL_COLUMN);
+        }
+
+        return result;
+    }
+
+    /// https://en.wikipedia.org/wiki/Great-circle_distance
+    Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg)
+    {
+        if (lon1Deg < -180 || lon1Deg > 180 ||
+            lon2Deg < -180 || lon2Deg > 180 ||
+            lat1Deg < -90 || lat1Deg > 90 ||
+            lat2Deg < -90 || lat2Deg > 90)
+        {
+            throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+        }
+
+        Float64 lon1Rad = degToRad(lon1Deg);
+        Float64 lat1Rad = degToRad(lat1Deg);
+        Float64 lon2Rad = degToRad(lon2Deg);
+        Float64 lat2Rad = degToRad(lat2Deg);
+        Float64 u = sin((lat2Rad - lat1Rad) / 2);
+        Float64 v = sin((lon2Rad - lon1Rad) / 2);
+        return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v));
+    }
+
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
+    {
+        const auto size = input_rows_count;
+
+        bool result_is_const{};
+        auto instrs = getInstructions(block, arguments, result_is_const);
+
+        if (result_is_const)
+        {
+            const auto & colLon1 = static_cast<const ColumnConst *>(block.getByPosition(arguments[0]).column.get())->getValue<Float64>();
+            const auto & colLat1 = static_cast<const ColumnConst *>(block.getByPosition(arguments[1]).column.get())->getValue<Float64>();
+            const auto & colLon2 = static_cast<const ColumnConst *>(block.getByPosition(arguments[2]).column.get())->getValue<Float64>();
+            const auto & colLat2 = static_cast<const ColumnConst *>(block.getByPosition(arguments[3]).column.get())->getValue<Float64>();
+
+            Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2);
+            block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res);
+        }
+        else
+        {
+            auto dst = ColumnVector<Float64>::create();
+            auto & dst_data = dst->getData();
+            dst_data.resize(size);
+            Float64 vals[instrs.size()];
+            for (const auto row : ext::range(0, size))
+            {
+                for (const auto idx : ext::range(0, instrs.size()))
+                {
+                    if (instr_type::get_float_64 == instrs[idx].first)
+                        vals[idx] = static_cast<const ColumnVector<Float64> *>(instrs[idx].second)->getData()[row];
+                    else if (instr_type::get_const_float_64 == instrs[idx].first)
+                        vals[idx] = static_cast<const ColumnConst *>(instrs[idx].second)->getValue<Float64>();
+                    else
+                        throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR};
+                }
+                dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]);
+            }
+            block.getByPosition(result).column = std::move(dst);
+        }
+    }
+};
+
+
+void registerFunctionGreatCircleDistance(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionGreatCircleDistance>();
+}
+
+}
+
diff --git a/dbms/src/Functions/FunctionsGeo.h b/dbms/src/Functions/pointInEllipses.cpp
similarity index 54%
rename from dbms/src/Functions/FunctionsGeo.h
rename to dbms/src/Functions/pointInEllipses.cpp
index 1f351633dd7..2958d6171f1 100644
--- a/dbms/src/Functions/FunctionsGeo.h
+++ b/dbms/src/Functions/pointInEllipses.cpp
@@ -1,17 +1,11 @@
-#pragma once
-
 #include <DataTypes/DataTypesNumber.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnConst.h>
 #include <Common/typeid_cast.h>
 #include <Functions/IFunction.h>
 #include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionFactory.h>
 #include <ext/range.h>
-#include <math.h>
-#include <array>
-
-#define DEGREES_IN_RADIANS (M_PI / 180.0)
-#define EARTH_RADIUS_IN_METERS 6372797.560856
 
 
 namespace DB
@@ -19,148 +13,11 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int ILLEGAL_COLUMN;
-    extern const int LOGICAL_ERROR;
 }
 
-static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; }
-static inline Float64 radToDeg(Float64 angle) { return angle / DEGREES_IN_RADIANS; }
-
-/**
- *  The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees.
- *  The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance.
- *  Throws exception when one or several input values are not within reasonable bounds.
- *  Latitude must be in [-90, 90], longitude must be [-180, 180]
- *
- */
-class FunctionGreatCircleDistance : public IFunction
-{
-public:
-
-    static constexpr auto name = "greatCircleDistance";
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionGreatCircleDistance>(); }
-
-private:
-
-    enum class instr_type : uint8_t
-    {
-        get_float_64,
-        get_const_float_64
-    };
-
-    using instr_t = std::pair<instr_type, const IColumn *>;
-    using instrs_t = std::array<instr_t, 4>;
-
-    String getName() const override { return name; }
-
-    size_t getNumberOfArguments() const override { return 4; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        for (const auto arg_idx : ext::range(0, arguments.size()))
-        {
-            const auto arg = arguments[arg_idx].get();
-            if (!WhichDataType(arg).isFloat64())
-                throw Exception(
-                    "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64",
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        }
-
-        return std::make_shared<DataTypeFloat64>();
-    }
-
-    instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const)
-    {
-        instrs_t result;
-        out_const = true;
-
-        for (const auto arg_idx : ext::range(0, arguments.size()))
-        {
-            const auto column = block.getByPosition(arguments[arg_idx]).column.get();
-
-            if (const auto col = checkAndGetColumn<ColumnVector<Float64>>(column))
-            {
-                out_const = false;
-                result[arg_idx] = instr_t{instr_type::get_float_64, col};
-            }
-            else if (const auto col_const = checkAndGetColumnConst<ColumnVector<Float64>>(column))
-            {
-                result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const};
-            }
-            else
-                throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(),
-                    ErrorCodes::ILLEGAL_COLUMN);
-        }
-
-        return result;
-    }
-
-    /// https://en.wikipedia.org/wiki/Great-circle_distance
-    Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg)
-    {
-        if (lon1Deg < -180 || lon1Deg > 180 ||
-            lon2Deg < -180 || lon2Deg > 180 ||
-            lat1Deg < -90 || lat1Deg > 90 ||
-            lat2Deg < -90 || lat2Deg > 90)
-        {
-            throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-        }
-
-        Float64 lon1Rad = degToRad(lon1Deg);
-        Float64 lat1Rad = degToRad(lat1Deg);
-        Float64 lon2Rad = degToRad(lon2Deg);
-        Float64 lat2Rad = degToRad(lat2Deg);
-        Float64 u = sin((lat2Rad - lat1Rad) / 2);
-        Float64 v = sin((lon2Rad - lon1Rad) / 2);
-        return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v));
-    }
-
-
-    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
-    {
-        const auto size = input_rows_count;
-
-        bool result_is_const{};
-        auto instrs = getInstructions(block, arguments, result_is_const);
-
-        if (result_is_const)
-        {
-            const auto & colLon1 = static_cast<const ColumnConst *>(block.getByPosition(arguments[0]).column.get())->getValue<Float64>();
-            const auto & colLat1 = static_cast<const ColumnConst *>(block.getByPosition(arguments[1]).column.get())->getValue<Float64>();
-            const auto & colLon2 = static_cast<const ColumnConst *>(block.getByPosition(arguments[2]).column.get())->getValue<Float64>();
-            const auto & colLat2 = static_cast<const ColumnConst *>(block.getByPosition(arguments[3]).column.get())->getValue<Float64>();
-
-            Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2);
-            block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res);
-        }
-        else
-        {
-            auto dst = ColumnVector<Float64>::create();
-            auto & dst_data = dst->getData();
-            dst_data.resize(size);
-            Float64 vals[instrs.size()];
-            for (const auto row : ext::range(0, size))
-            {
-                for (const auto idx : ext::range(0, instrs.size()))
-                {
-                    if (instr_type::get_float_64 == instrs[idx].first)
-                        vals[idx] = static_cast<const ColumnVector<Float64> *>(instrs[idx].second)->getData()[row];
-                    else if (instr_type::get_const_float_64 == instrs[idx].first)
-                        vals[idx] = static_cast<const ColumnConst *>(instrs[idx].second)->getValue<Float64>();
-                    else
-                        throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR};
-                }
-                dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]);
-            }
-            block.getByPosition(result).column = std::move(dst);
-        }
-    }
-};
-
-
 /**
  * The function checks if a point is in one of ellipses in set.
  * The number of arguments must be 2 + 4*N where N is the number of ellipses.
@@ -177,7 +34,6 @@ private:
 class FunctionPointInEllipses : public IFunction
 {
 public:
-
     static constexpr auto name = "pointInEllipses";
     static FunctionPtr create(const Context &) { return std::make_shared<FunctionPointInEllipses>(); }
 
@@ -330,6 +186,10 @@ private:
     }
 };
 
+
+void registerFunctionPointInEllipses(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionPointInEllipses>();
 }
 
-#undef DEGREES_IN_RADIANS
+}
diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/pointInPolygon.cpp
similarity index 55%
rename from dbms/src/Functions/FunctionsGeo.cpp
rename to dbms/src/Functions/pointInPolygon.cpp
index 05ed8db2969..fc94be6c343 100644
--- a/dbms/src/Functions/FunctionsGeo.cpp
+++ b/dbms/src/Functions/pointInPolygon.cpp
@@ -1,5 +1,4 @@
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsGeo.h>
 #include <Functions/GeoUtils.h>
 #include <Functions/FunctionHelpers.h>
 
@@ -16,6 +15,7 @@
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/ExpressionActions.h>
 
@@ -37,6 +37,7 @@ namespace ErrorCodes
     extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
     extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int ILLEGAL_COLUMN;
 }
 
 namespace FunctionPointInPolygonDetail
@@ -251,185 +252,6 @@ private:
 
 };
 
-const size_t GEOHASH_MAX_TEXT_LENGTH = 16;
-
-// geohashEncode(lon float32/64, lat float32/64, length UInt8) => string
-class FunctionGeohashEncode : public IFunction
-{
-public:
-    static constexpr auto name = "geohashEncode";
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeohashEncode>(); }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    bool isVariadic() const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        validateArgumentType(*this, arguments, 0, isFloat, "float");
-        validateArgumentType(*this, arguments, 1, isFloat, "float");
-        if (arguments.size() == 3)
-        {
-            validateArgumentType(*this, arguments, 2, isInteger, "integer");
-        }
-        if (arguments.size() > 3)
-        {
-            throw Exception("Too many arguments for function " + getName() +
-                            " expected at most 3",
-                            ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION);
-        }
-
-        return std::make_shared<DataTypeString>();
-    }
-
-    template <typename LonType, typename LatType>
-    bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result)
-    {
-        const ColumnVector<LonType> * longitude = checkAndGetColumn<ColumnVector<LonType>>(lon_column);
-        const ColumnVector<LatType> * latitude = checkAndGetColumn<ColumnVector<LatType>>(lat_column);
-        if (!latitude || !longitude)
-            return false;
-
-        auto col_str = ColumnString::create();
-        ColumnString::Chars & out_vec = col_str->getChars();
-        ColumnString::Offsets & out_offsets = col_str->getOffsets();
-
-        const size_t size = lat_column->size();
-
-        out_offsets.resize(size);
-        out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1));
-
-        char * begin = reinterpret_cast<char *>(out_vec.data());
-        char * pos = begin;
-
-        for (size_t i = 0; i < size; ++i)
-        {
-            const Float64 longitude_value = longitude->getElement(i);
-            const Float64 latitude_value = latitude->getElement(i);
-
-            const size_t encoded_size = GeoUtils::geohashEncode(longitude_value, latitude_value, precision_value, pos);
-
-            pos += encoded_size;
-            *pos = '\0';
-            out_offsets[i] = ++pos - begin;
-        }
-        out_vec.resize(pos - begin);
-
-        if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
-            throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
-
-        result = std::move(col_str);
-
-        return true;
-
-    }
-
-    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
-    {
-        const IColumn * longitude = block.getByPosition(arguments[0]).column.get();
-        const IColumn * latitude = block.getByPosition(arguments[1]).column.get();
-
-        const UInt64 precision_value = std::min<UInt64>(GEOHASH_MAX_TEXT_LENGTH,
-                arguments.size() == 3 ? block.getByPosition(arguments[2]).column->get64(0) : GEOHASH_MAX_TEXT_LENGTH);
-
-        ColumnPtr & res_column = block.getByPosition(result).column;
-
-        if (tryExecute<Float32, Float32>(longitude, latitude, precision_value, res_column) ||
-            tryExecute<Float64, Float32>(longitude, latitude, precision_value, res_column) ||
-            tryExecute<Float32, Float64>(longitude, latitude, precision_value, res_column) ||
-            tryExecute<Float64, Float64>(longitude, latitude, precision_value, res_column))
-            return;
-
-        const char sep[] = ", ";
-        std::string arguments_description = "";
-        for (size_t i = 0; i < arguments.size(); ++i)
-        {
-            arguments_description += block.getByPosition(arguments[i]).column->getName() + sep;
-        }
-        if (arguments_description.size() > sizeof(sep))
-        {
-            arguments_description.erase(arguments_description.size() - sizeof(sep) - 1);
-        }
-
-        throw Exception("Unsupported argument types: " + arguments_description +
-                        + " for function " + getName(),
-                        ErrorCodes::ILLEGAL_COLUMN);
-    }
-};
-
-// geohashDecode(string) => (lon float64, lat float64)
-class FunctionGeohashDecode : public IFunction
-{
-public:
-    static constexpr auto name = "geohashDecode";
-    static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeohashDecode>(); }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override { return 1; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string");
-
-        return std::make_shared<DataTypeTuple>(
-                DataTypes{std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeFloat64>()},
-                Strings{"longitude", "latitude"});
-    }
-
-    template <typename ColumnTypeEncoded>
-    bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column)
-    {
-        const auto * encoded = checkAndGetColumn<ColumnTypeEncoded>(encoded_column);
-        if (!encoded)
-            return false;
-
-        const size_t count = encoded->size();
-
-        auto latitude = ColumnFloat64::create(count);
-        auto longitude = ColumnFloat64::create(count);
-
-        ColumnFloat64::Container & lon_data = longitude->getData();
-        ColumnFloat64::Container & lat_data = latitude->getData();
-
-        for (size_t i = 0; i < count; ++i)
-        {
-            StringRef encoded_string = encoded->getDataAt(i);
-            GeoUtils::geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]);
-        }
-
-        MutableColumns result;
-        result.emplace_back(std::move(longitude));
-        result.emplace_back(std::move(latitude));
-        result_column = ColumnTuple::create(std::move(result));
-
-        return true;
-    }
-
-    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
-    {
-        const IColumn * encoded = block.getByPosition(arguments[0]).column.get();
-        ColumnPtr & res_column = block.getByPosition(result).column;
-
-        if (tryExecute<ColumnString>(encoded, res_column) ||
-            tryExecute<ColumnFixedString>(encoded, res_column))
-            return;
-
-        throw Exception("Unsupported argument type:" + block.getByPosition(arguments[0]).column->getName()
-                        + " of argument of function " + getName(),
-                        ErrorCodes::ILLEGAL_COLUMN);
-    }
-};
 
 template <typename Type>
 using Point = boost::geometry::model::d2::point_xy<Type>;
@@ -440,13 +262,9 @@ using PointInPolygonWithGrid = GeoUtils::PointInPolygonWithGrid<Type>;
 template <>
 const char * FunctionPointInPolygon<PointInPolygonWithGrid, true>::name = "pointInPolygon";
 
-void registerFunctionsGeo(FunctionFactory & factory)
+void registerFunctionPointInPolygon(FunctionFactory & factory)
 {
-    factory.registerFunction<FunctionGreatCircleDistance>();
-    factory.registerFunction<FunctionPointInEllipses>();
-
     factory.registerFunction<FunctionPointInPolygon<PointInPolygonWithGrid, true>>();
-    factory.registerFunction<FunctionGeohashEncode>();
-    factory.registerFunction<FunctionGeohashDecode>();
 }
+
 }
diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp
index 3e7f9c7136d..1e76eb3032b 100644
--- a/dbms/src/Functions/registerFunctions.cpp
+++ b/dbms/src/Functions/registerFunctions.cpp
@@ -1,9 +1,6 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/registerFunctions.h>
 
-#include "config_core.h"
-#include "config_functions.h"
-
 namespace DB
 {
 /** These functions are defined in a separate translation units.
@@ -43,10 +40,6 @@ void registerFunctionsNull(FunctionFactory &);
 void registerFunctionsFindCluster(FunctionFactory &);
 void registerFunctionsJSON(FunctionFactory &);
 
-#if USE_H3
-void registerFunctionGeoToH3(FunctionFactory &);
-#endif
-
 void registerFunctions()
 {
     auto & factory = FunctionFactory::instance();
@@ -84,10 +77,6 @@ void registerFunctions()
     registerFunctionsNull(factory);
     registerFunctionsFindCluster(factory);
     registerFunctionsJSON(factory);
-
-#if USE_H3
-    registerFunctionGeoToH3(factory);
-#endif
 }
 
 }
diff --git a/dbms/src/Functions/registerFunctionsGeo.cpp b/dbms/src/Functions/registerFunctionsGeo.cpp
new file mode 100644
index 00000000000..15f399b026d
--- /dev/null
+++ b/dbms/src/Functions/registerFunctionsGeo.cpp
@@ -0,0 +1,32 @@
+#include "config_functions.h"
+
+namespace DB
+{
+
+class FunctionFactory;
+
+void registerFunctionGreatCircleDistance(FunctionFactory & factory);
+void registerFunctionPointInEllipses(FunctionFactory & factory);
+void registerFunctionPointInPolygon(FunctionFactory & factory);
+void registerFunctionGeohashEncode(FunctionFactory & factory);
+void registerFunctionGeohashDecode(FunctionFactory & factory);
+
+#if USE_H3
+void registerFunctionGeoToH3(FunctionFactory &);
+#endif
+
+void registerFunctionsArithmetic(FunctionFactory & factory)
+{
+    registerFunctionGreatCircleDistance(factory);
+    registerFunctionPointInEllipses(factory);
+    registerFunctionPointInPolygon(factory);
+    registerFunctionGeohashEncode(factory);
+    registerFunctionGeohashDecode(factory);
+
+#if USE_H3
+    registerFunctionGeoToH3(factory);
+#endif
+}
+
+}
+

From 7181ecabb82d73c70445b4eeba5bd0496ced45b9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 21:22:51 +0300
Subject: [PATCH 167/191] Removed wrong instruction from Dockerfile

---
 docker/packager/deb/Dockerfile | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index c3c4bc3c0d6..7651d4f1f24 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -71,8 +71,5 @@ RUN apt-get --allow-unauthenticated update -y \
             gperf \
             alien
 
-
-RUN git clone https://github.com/uber/h3 && cd h3 && cmake . && make && make install && cd .. && rm -rf h3
-
 COPY build.sh /
 CMD ["/bin/bash", "/build.sh"]

From 1777313821a755a4097b6e88d9dede041404d1cf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 21:49:57 +0300
Subject: [PATCH 168/191] Own CMakeLists for H3 because otherwise "m" library
 does not link correctly

---
 contrib/CMakeLists.txt          |  2 +-
 contrib/h3-cmake/CMakeLists.txt | 27 +++++++++++++++++++++++++++
 dbms/src/Functions/geoToH3.cpp  |  2 +-
 3 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 contrib/h3-cmake/CMakeLists.txt

diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 78ddc692b3d..ba75615aadc 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -107,7 +107,7 @@ if (USE_INTERNAL_CPUID_LIBRARY)
 endif ()
 
 if (USE_INTERNAL_H3_LIBRARY)
-    add_subdirectory(h3)
+    add_subdirectory(h3-cmake)
 endif ()
 
 if (USE_INTERNAL_SSL_LIBRARY)
diff --git a/contrib/h3-cmake/CMakeLists.txt b/contrib/h3-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..5df0a205a34
--- /dev/null
+++ b/contrib/h3-cmake/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(H3_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib)
+set(H3_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/h3/src/h3lib)
+
+set(SRCS
+${H3_SOURCE_DIR}/lib/algos.c
+${H3_SOURCE_DIR}/lib/baseCells.c
+${H3_SOURCE_DIR}/lib/bbox.c
+${H3_SOURCE_DIR}/lib/coordijk.c
+${H3_SOURCE_DIR}/lib/faceijk.c
+${H3_SOURCE_DIR}/lib/geoCoord.c
+${H3_SOURCE_DIR}/lib/h3Index.c
+${H3_SOURCE_DIR}/lib/h3UniEdge.c
+${H3_SOURCE_DIR}/lib/linkedGeo.c
+${H3_SOURCE_DIR}/lib/localij.c
+${H3_SOURCE_DIR}/lib/mathExtensions.c
+${H3_SOURCE_DIR}/lib/polygon.c
+${H3_SOURCE_DIR}/lib/vec2d.c
+${H3_SOURCE_DIR}/lib/vec3d.c
+${H3_SOURCE_DIR}/lib/vertexGraph.c
+)
+
+configure_file(${H3_SOURCE_DIR}/include/h3api.h.in ${H3_BINARY_DIR}/include/h3api.h)
+
+add_library(h3 ${SRCS})
+target_include_directories(h3 SYSTEM PUBLIC ${H3_SOURCE_DIR}/include)
+target_include_directories(h3 SYSTEM PUBLIC ${H3_BINARY_DIR}/include)
+target_compile_definitions(h3 PRIVATE H3_HAVE_VLA)
diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 65a94d1401d..1dd809b349e 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -111,10 +111,10 @@ public:
             }
         }
 
-
         const auto col_lat = block.getByPosition(arguments[0]).column.get();
         const auto col_lon = block.getByPosition(arguments[1]).column.get();
         const auto col_res = block.getByPosition(arguments[2]).column.get();
+
         if (const_cnt == 0)
         {
             const auto col_vec_lat = static_cast<const ColumnVector<Float64> *>(col_lat);

From a7fc631de700a034f0cc872ae1b91b26f97e05e0 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 21:55:08 +0300
Subject: [PATCH 169/191] Style

---
 dbms/src/Functions/geoToH3.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 1dd809b349e..19c3d8e5193 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -11,7 +11,8 @@
 #include <ext/range.h>
 
 
-extern "C" {
+extern "C"
+{
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdocumentation"

From 7ca7d6c77440bac9582ea0c0abd6308a0c3ee9e6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 21:56:53 +0300
Subject: [PATCH 170/191] Initial support for clang-tidy (not yet useful)

---
 CMakeLists.txt      | 18 ++++++++++++++++++
 dbms/CMakeLists.txt |  4 ++++
 2 files changed, 22 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 283e19247af..85e0bae50ec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -273,6 +273,24 @@ if (USE_INCLUDE_WHAT_YOU_USE)
     endif()
 endif ()
 
+# Using clang-tidy static analyzer http://mariobadr.com/using-clang-tidy-with-cmake-36.html https://cmake.org/cmake/help/v3.6/prop_tgt/LANG_CLANG_TIDY.html
+option (ENABLE_CLANG_TIDY "Use 'clang-tidy' static analyzer" OFF)
+if (ENABLE_CLANG_TIDY)
+    if (${CMAKE_VERSION} VERSION_LESS "3.6.0")
+        message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.")
+    endif()
+    find_program (CLANG_TIDY_EXE NAMES "clang-tidy" DOC "Path to clang-tidy executable")
+    if (NOT CLANG_TIDY_EXE)
+        set (USE_CLANG_TIDY 0)
+        message (STATUS "clang-tidy not found.")
+    else ()
+        set (USE_CLANG_TIDY 1)
+        message (STATUS "clang-tidy found: ${CLANG_TIDY_EXE}")
+        set (DO_CLANG_TIDY "${CLANG_TIDY_EXE}" "-checks=*,-clang-analyzer-alpha.*")
+        # You can enable it within a directory by: set (CMAKE_CXX_CLANG_TIDY "${DO_CLANG_TIDY}")
+    endif ()
+endif ()
+
 if (ENABLE_TESTS)
     message (STATUS "Tests are enabled")
 endif ()
diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt
index 4b47b77dec2..18c169211d9 100644
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@@ -2,6 +2,10 @@ if (USE_INCLUDE_WHAT_YOU_USE)
     set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
 endif ()
 
+if (USE_CLANG_TIDY)
+    set (CMAKE_CXX_CLANG_TIDY "${DO_CLANG_TIDY}")
+endif ()
+
 if(COMPILER_PIPE)
     set(MAX_COMPILER_MEMORY 2500)
 else()

From 9127c8b27c93463ac7a4fb6b6d8cd2b5874c23c8 Mon Sep 17 00:00:00 2001
From: Danila Kutenin <kutdanila@yandex.ru>
Date: Sun, 30 Jun 2019 22:34:17 +0300
Subject: [PATCH 171/191] inverting ngramSearch to be more intuitive

---
 .../Functions/FunctionsStringSimilarity.cpp   |   34 +-
 ...reference => 00951_ngram_search.reference} | 1524 ++++++++---------
 ...ngram_entry.sql => 00951_ngram_search.sql} |    0
 .../functions/string_search_functions.md      |    2 +-
 .../functions/string_search_functions.md      |    2 +-
 5 files changed, 790 insertions(+), 772 deletions(-)
 rename dbms/tests/queries/0_stateless/{00951_ngram_entry.reference => 00951_ngram_search.reference} (68%)
 rename dbms/tests/queries/0_stateless/{00951_ngram_entry.sql => 00951_ngram_search.sql} (100%)

diff --git a/dbms/src/Functions/FunctionsStringSimilarity.cpp b/dbms/src/Functions/FunctionsStringSimilarity.cpp
index 9a9dd01a972..d5632b136e4 100644
--- a/dbms/src/Functions/FunctionsStringSimilarity.cpp
+++ b/dbms/src/Functions/FunctionsStringSimilarity.cpp
@@ -271,11 +271,17 @@ struct NgramDistanceImpl
         {
             size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats, distance, nullptr);
             /// For !Symmetric version we should not use first_size.
-            res = distance * 1.f / std::max(Symmetric * first_size + second_size, size_t(1));
+            if constexpr (Symmetric)
+                res = distance * 1.f / std::max(first_size + second_size, size_t(1));
+            else
+                res = 1.f - distance * 1.f / std::max(second_size, size_t(1));
         }
         else
         {
-            res = 1.f;
+            if constexpr (Symmetric)
+                res = 1.f;
+            else
+                res = 0.f;
         }
     }
 
@@ -333,13 +339,19 @@ struct NgramDistanceImpl
 
 
                 /// For !Symmetric version we should not use haystack_stats_size.
-                res[i] = distance * 1.f / std::max(Symmetric * haystack_stats_size + needle_stats_size, size_t(1));
+                if constexpr (Symmetric)
+                    res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
+                else
+                    res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1));
             }
             else
             {
                 /// Strings are too big, we are assuming they are not the same. This is done because of limiting number
                 /// of bigrams added and not allocating too much memory.
-                res[i] = 1.f;
+                if constexpr (Symmetric)
+                    res[i] = 1.f;
+                else
+                    res[i] = 0.f;
             }
 
             prev_needle_offset = needle_offsets[i];
@@ -399,11 +411,11 @@ struct NgramDistanceImpl
                     for (size_t j = 0; j < needle_stats_size; ++j)
                         --common_stats[needle_ngram_storage[j]];
 
-                    res[i] = distance * 1.f / std::max(needle_stats_size, size_t(1));
+                    res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1));
                 }
                 else
                 {
-                    res[i] = 1.f;
+                    res[i] = 0.f;
                 }
 
                 prev_offset = needle_offsets[i];
@@ -446,12 +458,18 @@ struct NgramDistanceImpl
                     distance,
                     ngram_storage.get());
                 /// For !Symmetric version we should not use haystack_stats_size.
-                res[i] = distance * 1.f / std::max(Symmetric * haystack_stats_size + needle_stats_size, size_t(1));
+                if constexpr (Symmetric)
+                    res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
+                else
+                    res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1));
             }
             else
             {
                 /// if the strings are too big, we say they are completely not the same
-                res[i] = 1.f;
+                if constexpr (Symmetric)
+                    res[i] = 1.f;
+                else
+                    res[i] = 0.f;
             }
             distance = needle_stats_size;
             prev_offset = offsets[i];
diff --git a/dbms/tests/queries/0_stateless/00951_ngram_entry.reference b/dbms/tests/queries/0_stateless/00951_ngram_search.reference
similarity index 68%
rename from dbms/tests/queries/0_stateless/00951_ngram_entry.reference
rename to dbms/tests/queries/0_stateless/00951_ngram_search.reference
index d6d97eaaab9..1b845b6015d 100644
--- a/dbms/tests/queries/0_stateless/00951_ngram_entry.reference
+++ b/dbms/tests/queries/0_stateless/00951_ngram_search.reference
@@ -1,13 +1,8 @@
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
+1000
+1000
+1000
+1000
+1000
 1000
 1000
 1000
@@ -18,98 +13,202 @@
 0
 0
 0
-0
-0
-0
-0
-0
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
 500
 500
 500
 500
 500
-1000
-1000
-1000
-1000
-1000
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
 0
 0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1000
 1000
 0
-0
+1000
+1000
 500
-1000
+0
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+привет	1000
+пап привет как дела - Яндекс.Видео	1000
+привет братан как дела - Яндекс.Видео	1000
+http://metric.ru/	1000
+http://autometric.ru/	1000
+http://metrica.yandex.com/	1000
+http://metris.ru/	1000
+http://metrika.ru/	1000
+	1000
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+привет	1000
+пап привет как дела - Яндекс.Видео	1000
+привет братан как дела - Яндекс.Видео	1000
+http://metric.ru/	1000
+http://autometric.ru/	1000
+http://metrica.yandex.com/	1000
+http://metris.ru/	1000
+http://metrika.ru/	1000
+	1000
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+привет	1000
+пап привет как дела - Яндекс.Видео	1000
+привет братан как дела - Яндекс.Видео	1000
+http://metric.ru/	1000
+http://autometric.ru/	1000
+http://metrica.yandex.com/	1000
+http://metris.ru/	1000
+http://metrika.ru/	1000
+	1000
+http://metric.ru/	0
+http://autometric.ru/	0
+http://metrica.yandex.com/	0
+http://metris.ru/	0
+http://metrika.ru/	0
+	0
+привет	308
+привет братан как дела - Яндекс.Видео	923
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+пап привет как дела - Яндекс.Видео	1000
+http://metric.ru/	0
+http://autometric.ru/	0
+http://metrica.yandex.com/	0
+http://metris.ru/	0
+http://metrika.ru/	0
+	0
+привет	308
+привет как дела?... Херсон	769
+привет как дела клип - Яндекс.Видео	769
+привет братан как дела - Яндекс.Видео	769
+пап привет как дела - Яндекс.Видео	846
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metric.ru/	600
+http://autometric.ru/	600
+http://metrica.yandex.com/	600
+http://metris.ru/	600
+http://metrika.ru/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metris.ru/	600
+http://metrika.ru/	600
+http://metric.ru/	800
+http://autometric.ru/	800
+http://metrica.yandex.com/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metric.ru/	600
+http://autometric.ru/	600
+http://metrica.yandex.com/	600
+http://metris.ru/	600
+http://metrika.ru/	800
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metris.ru/	600
+http://metrika.ru/	600
+http://metric.ru/	800
+http://autometric.ru/	800
+http://metrica.yandex.com/	800
 привет как дела?... Херсон	0
 привет как дела клип - Яндекс.Видео	0
 привет	0
@@ -117,10 +216,232 @@
 привет братан как дела - Яндекс.Видео	0
 http://metric.ru/	0
 http://autometric.ru/	0
+http://metris.ru/	0
+http://metrika.ru/	0
+	0
+http://metrica.yandex.com/	1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+0
+0
+0
+0
+0
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+500
+500
+500
+500
+500
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1000
+1000
+0
+571
+1000
+500
+0
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+привет	1000
+пап привет как дела - Яндекс.Видео	1000
+привет братан как дела - Яндекс.Видео	1000
+http://metric.ru/	1000
+http://autometric.ru/	1000
+http://metrica.yandex.com/	1000
+http://metris.ru/	1000
+http://metrika.ru/	1000
+	1000
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+привет	1000
+пап привет как дела - Яндекс.Видео	1000
+привет братан как дела - Яндекс.Видео	1000
+http://metric.ru/	1000
+http://autometric.ru/	1000
+http://metrica.yandex.com/	1000
+http://metris.ru/	1000
+http://metrika.ru/	1000
+	1000
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+привет	1000
+пап привет как дела - Яндекс.Видео	1000
+привет братан как дела - Яндекс.Видео	1000
+http://metric.ru/	1000
+http://autometric.ru/	1000
+http://metrica.yandex.com/	1000
+http://metris.ru/	1000
+http://metrika.ru/	1000
+	1000
+http://metric.ru/	0
+http://autometric.ru/	0
 http://metrica.yandex.com/	0
 http://metris.ru/	0
 http://metrika.ru/	0
 	0
+привет	308
+привет братан как дела - Яндекс.Видео	923
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+пап привет как дела - Яндекс.Видео	1000
+http://metric.ru/	0
+http://autometric.ru/	0
+http://metrica.yandex.com/	0
+http://metris.ru/	0
+http://metrika.ru/	0
+	0
+привет	308
+привет как дела?... Херсон	769
+привет как дела клип - Яндекс.Видео	769
+привет братан как дела - Яндекс.Видео	769
+пап привет как дела - Яндекс.Видео	846
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metric.ru/	600
+http://autometric.ru/	600
+http://metrica.yandex.com/	600
+http://metris.ru/	600
+http://metrika.ru/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metric.ru/	600
+http://autometric.ru/	600
+http://metrica.yandex.com/	600
+http://metris.ru/	600
+http://metrika.ru/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metris.ru/	600
+http://metrika.ru/	600
+http://metric.ru/	800
+http://autometric.ru/	800
+http://metrica.yandex.com/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metric.ru/	600
+http://autometric.ru/	600
+http://metrica.yandex.com/	600
+http://metris.ru/	600
+http://metrika.ru/	800
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metris.ru/	600
+http://metrika.ru/	600
+http://metric.ru/	800
+http://autometric.ru/	800
+http://metrica.yandex.com/	800
 привет как дела?... Херсон	0
 привет как дела клип - Яндекс.Видео	0
 привет	0
@@ -128,108 +449,46 @@ http://metrika.ru/	0
 привет братан как дела - Яндекс.Видео	0
 http://metric.ru/	0
 http://autometric.ru/	0
-http://metrica.yandex.com/	0
 http://metris.ru/	0
 http://metrika.ru/	0
 	0
-привет как дела?... Херсон	0
-привет как дела клип - Яндекс.Видео	0
-привет	0
-пап привет как дела - Яндекс.Видео	0
-привет братан как дела - Яндекс.Видео	0
+http://metrica.yandex.com/	1000
 http://metric.ru/	0
 http://autometric.ru/	0
 http://metrica.yandex.com/	0
 http://metris.ru/	0
 http://metrika.ru/	0
 	0
-привет как дела?... Херсон	0
-привет как дела клип - Яндекс.Видео	0
-пап привет как дела - Яндекс.Видео	0
-привет братан как дела - Яндекс.Видео	77
-привет	692
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-пап привет как дела - Яндекс.Видео	154
-привет как дела?... Херсон	231
-привет как дела клип - Яндекс.Видео	231
-привет братан как дела - Яндекс.Видео	231
-привет	692
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-http://metrika.ru/	0
-http://metric.ru/	400
-http://autometric.ru/	400
-http://metrica.yandex.com/	400
-http://metris.ru/	400
-привет как дела?... Херсон	1000
+привет	121
+привет как дела?... Херсон	394
+привет братан как дела - Яндекс.Видео	788
+пап привет как дела - Яндекс.Видео	818
 привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrica.yandex.com/	0
-http://metric.ru/	200
-http://autometric.ru/	200
-http://metris.ru/	400
-http://metrika.ru/	400
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrika.ru/	200
-http://metric.ru/	400
-http://autometric.ru/	400
-http://metrica.yandex.com/	400
-http://metris.ru/	400
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metric.ru/	200
-http://autometric.ru/	200
-http://metrica.yandex.com/	200
-http://metris.ru/	400
-http://metrika.ru/	400
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrica.yandex.com/	0
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
 1000
 1000
 1000
@@ -240,616 +499,357 @@ http://metrika.ru/	1000
 0
 0
 0
-0
-0
-0
-0
-0
-500
-500
-500
-500
-500
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
 1000
 1000
 1000
 1000
 1000
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-0
-0
 1000
-429
-0
-500
 1000
-привет как дела?... Херсон	0
-привет как дела клип - Яндекс.Видео	0
-привет	0
-пап привет как дела - Яндекс.Видео	0
-привет братан как дела - Яндекс.Видео	0
 http://metric.ru/	0
 http://autometric.ru/	0
 http://metrica.yandex.com/	0
 http://metris.ru/	0
 http://metrika.ru/	0
 	0
-привет как дела?... Херсон	0
-привет как дела клип - Яндекс.Видео	0
-привет	0
-пап привет как дела - Яндекс.Видео	0
-привет братан как дела - Яндекс.Видео	0
+привет	360
+привет братан как дела - Яндекс.Видео	960
+привет как дела?... Херсон	1000
+привет как дела клип - Яндекс.Видео	1000
+пап привет как дела - Яндекс.Видео	1000
 http://metric.ru/	0
 http://autometric.ru/	0
 http://metrica.yandex.com/	0
 http://metris.ru/	0
 http://metrika.ru/	0
 	0
-привет как дела?... Херсон	0
-привет как дела клип - Яндекс.Видео	0
-привет	0
-пап привет как дела - Яндекс.Видео	0
-привет братан как дела - Яндекс.Видео	0
-http://metric.ru/	0
-http://autometric.ru/	0
-http://metrica.yandex.com/	0
-http://metris.ru/	0
-http://metrika.ru/	0
-	0
-привет как дела?... Херсон	0
-привет как дела клип - Яндекс.Видео	0
-пап привет как дела - Яндекс.Видео	0
-привет братан как дела - Яндекс.Видео	77
-привет	692
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-пап привет как дела - Яндекс.Видео	154
-привет как дела?... Херсон	231
-привет как дела клип - Яндекс.Видео	231
-привет братан как дела - Яндекс.Видео	231
-привет	692
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-http://metrika.ru/	0
-http://metric.ru/	400
-http://autometric.ru/	400
-http://metrica.yandex.com/	400
-http://metris.ru/	400
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrika.ru/	0
-http://metric.ru/	400
-http://autometric.ru/	400
-http://metrica.yandex.com/	400
-http://metris.ru/	400
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrica.yandex.com/	0
-http://metric.ru/	200
-http://autometric.ru/	200
-http://metris.ru/	400
-http://metrika.ru/	400
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrika.ru/	200
-http://metric.ru/	400
-http://autometric.ru/	400
-http://metrica.yandex.com/	400
-http://metris.ru/	400
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metric.ru/	200
-http://autometric.ru/	200
-http://metrica.yandex.com/	200
-http://metris.ru/	400
-http://metrika.ru/	400
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrica.yandex.com/	0
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-привет как дела клип - Яндекс.Видео	0
-пап привет как дела - Яндекс.Видео	182
-привет братан как дела - Яндекс.Видео	212
-привет как дела?... Херсон	606
-привет	879
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-1000
-1000
-1000
-1000
-1000
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-0
-0
-0
-0
-0
-0
-0
-привет как дела?... Херсон	0
-привет как дела клип - Яндекс.Видео	0
-пап привет как дела - Яндекс.Видео	0
-привет братан как дела - Яндекс.Видео	40
-привет	640
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-пап привет как дела - Яндекс.Видео	80
-привет как дела?... Херсон	120
-привет как дела клип - Яндекс.Видео	120
-привет братан как дела - Яндекс.Видео	120
-привет	640
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-http://metrika.ru/	0
-http://metric.ru/	500
-http://autometric.ru/	500
-http://metrica.yandex.com/	500
-http://metris.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrica.yandex.com/	0
-http://metric.ru/	250
-http://autometric.ru/	250
-http://metris.ru/	500
-http://metrika.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrika.ru/	250
-http://metric.ru/	500
-http://autometric.ru/	500
-http://metrica.yandex.com/	500
-http://metris.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metric.ru/	250
-http://autometric.ru/	250
-http://metrica.yandex.com/	250
-http://metris.ru/	500
-http://metrika.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrica.yandex.com/	0
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-1000
-1000
-1000
-1000
-1000
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-0
-0
-0
-0
-0
-0
-0
+привет	360
 привет как дела?... Херсон	880
 привет как дела клип - Яндекс.Видео	880
-пап привет как дела - Яндекс.Видео	880
-привет братан как дела - Яндекс.Видео	920
-привет	1000
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-привет как дела?... Херсон	560
-привет как дела клип - Яндекс.Видео	560
-пап привет как дела - Яндекс.Видео	560
-привет братан как дела - Яндекс.Видео	560
-привет	1000
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metrica.yandex.com/	1000
-http://metris.ru/	1000
-http://metrika.ru/	1000
-	1000
-http://metrika.ru/	0
+привет братан как дела - Яндекс.Видео	880
+пап привет как дела - Яндекс.Видео	920
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
 http://metric.ru/	500
 http://autometric.ru/	500
 http://metrica.yandex.com/	500
 http://metris.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrika.ru/	0
-http://metric.ru/	500
-http://autometric.ru/	500
-http://metrica.yandex.com/	500
-http://metris.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrica.yandex.com/	0
-http://metric.ru/	250
-http://autometric.ru/	250
+http://metrika.ru/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
 http://metris.ru/	500
 http://metrika.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metrika.ru/	250
+http://metric.ru/	750
+http://autometric.ru/	750
+http://metrica.yandex.com/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
 http://metric.ru/	500
 http://autometric.ru/	500
 http://metrica.yandex.com/	500
 http://metris.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
-http://metric.ru/	250
-http://autometric.ru/	250
-http://metrica.yandex.com/	250
+http://metrika.ru/	750
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
 http://metris.ru/	500
 http://metrika.ru/	500
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-	1000
+http://metric.ru/	750
+http://autometric.ru/	750
+http://metrica.yandex.com/	750
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+http://metric.ru/	0
+http://autometric.ru/	0
+http://metris.ru/	0
+http://metrika.ru/	0
+	0
+http://metrica.yandex.com/	1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1000
+1000
+1000
+1000
+1000
+1000
+1000
+привет	0
+http://metric.ru/	0
+http://autometric.ru/	0
 http://metrica.yandex.com/	0
-привет как дела?... Херсон	1000
-привет как дела клип - Яндекс.Видео	1000
-привет	1000
-пап привет как дела - Яндекс.Видео	1000
-привет братан как дела - Яндекс.Видео	1000
-http://metric.ru/	1000
-http://autometric.ru/	1000
-http://metris.ru/	1000
+http://metris.ru/	0
+http://metrika.ru/	0
+	0
+привет братан как дела - Яндекс.Видео	80
+привет как дела?... Херсон	120
+привет как дела клип - Яндекс.Видео	120
+пап привет как дела - Яндекс.Видео	120
+привет	0
+http://metric.ru/	0
+http://autometric.ru/	0
+http://metrica.yandex.com/	0
+http://metris.ru/	0
+http://metrika.ru/	0
+	0
+привет как дела?... Херсон	440
+привет как дела клип - Яндекс.Видео	440
+пап привет как дела - Яндекс.Видео	440
+привет братан как дела - Яндекс.Видео	440
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metric.ru/	500
+http://autometric.ru/	500
+http://metrica.yandex.com/	500
+http://metris.ru/	500
 http://metrika.ru/	1000
-	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metric.ru/	500
+http://autometric.ru/	500
+http://metrica.yandex.com/	500
+http://metris.ru/	500
+http://metrika.ru/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metris.ru/	500
+http://metrika.ru/	500
+http://metric.ru/	750
+http://autometric.ru/	750
+http://metrica.yandex.com/	1000
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metric.ru/	500
+http://autometric.ru/	500
+http://metrica.yandex.com/	500
+http://metris.ru/	500
+http://metrika.ru/	750
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+	0
+http://metris.ru/	500
+http://metrika.ru/	500
+http://metric.ru/	750
+http://autometric.ru/	750
+http://metrica.yandex.com/	750
+привет как дела?... Херсон	0
+привет как дела клип - Яндекс.Видео	0
+привет	0
+пап привет как дела - Яндекс.Видео	0
+привет братан как дела - Яндекс.Видео	0
+http://metric.ru/	0
+http://autometric.ru/	0
+http://metris.ru/	0
+http://metrika.ru/	0
+	0
+http://metrica.yandex.com/	1000
diff --git a/dbms/tests/queries/0_stateless/00951_ngram_entry.sql b/dbms/tests/queries/0_stateless/00951_ngram_search.sql
similarity index 100%
rename from dbms/tests/queries/0_stateless/00951_ngram_entry.sql
rename to dbms/tests/queries/0_stateless/00951_ngram_search.sql
diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md
index 71df498d994..fb02a13c3a0 100644
--- a/docs/en/query_language/functions/string_search_functions.md
+++ b/docs/en/query_language/functions/string_search_functions.md
@@ -108,7 +108,7 @@ For case-insensitive search or/and in UTF-8 format use functions `ngramDistanceC
 
 ## ngramSearch(haystack, needle)
 
-Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` -- the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Can be useful for fuzzy string search.
+Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` -- the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. The closer to one, the more likely `needle` is in the `haystack`. Can be useful for fuzzy string search.
 
 For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`.
 
diff --git a/docs/ru/query_language/functions/string_search_functions.md b/docs/ru/query_language/functions/string_search_functions.md
index 0301b094c86..7e94e378814 100644
--- a/docs/ru/query_language/functions/string_search_functions.md
+++ b/docs/ru/query_language/functions/string_search_functions.md
@@ -97,7 +97,7 @@
 
 ## ngramSearch(haystack, needle)
 
-То же, что и `ngramDistance`, но вычисляет несимметричную разность между `needle` и `haystack` -- количество n-грамм из `needle` минус количество общих n-грамм, нормированное на количество n-грамм из `needle`. Может быть использовано для приближенного поиска.
+То же, что и `ngramDistance`, но вычисляет несимметричную разность между `needle` и `haystack` -- количество n-грамм из `needle` минус количество общих n-грамм, нормированное на количество n-грамм из `needle`. Чем ближе результат к единице, тем вероятнее, что `needle` внутри `haystack`. Может быть использовано для приближенного поиска.
 
 Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`.
 

From 86093a474ff707bd0a2c3e074c97233c0c558758 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 22:45:23 +0300
Subject: [PATCH 172/191] Fixed error

---
 dbms/src/Functions/registerFunctionsGeo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/registerFunctionsGeo.cpp b/dbms/src/Functions/registerFunctionsGeo.cpp
index 15f399b026d..0f436811874 100644
--- a/dbms/src/Functions/registerFunctionsGeo.cpp
+++ b/dbms/src/Functions/registerFunctionsGeo.cpp
@@ -15,7 +15,7 @@ void registerFunctionGeohashDecode(FunctionFactory & factory);
 void registerFunctionGeoToH3(FunctionFactory &);
 #endif
 
-void registerFunctionsArithmetic(FunctionFactory & factory)
+void registerFunctionsGeo(FunctionFactory & factory)
 {
     registerFunctionGreatCircleDistance(factory);
     registerFunctionPointInEllipses(factory);

From 7c98327e4df76e7fcc9020058f32bb7a42f5c449 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 22:46:23 +0300
Subject: [PATCH 173/191] Fixed error

---
 dbms/src/Functions/geohashDecode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/geohashDecode.cpp b/dbms/src/Functions/geohashDecode.cpp
index 866bc81bb07..9774ecdee40 100644
--- a/dbms/src/Functions/geohashDecode.cpp
+++ b/dbms/src/Functions/geohashDecode.cpp
@@ -91,7 +91,7 @@ public:
 };
 
 
-void registerFunctionsGeo(FunctionFactory & factory)
+void registerFunctionGeohashDecode(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionGeohashDecode>();
 }

From 7591c3b7b2903a1222620caa2c789d8c9d58930c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 22:47:20 +0300
Subject: [PATCH 174/191] Fixed error

---
 dbms/src/Functions/geohashEncode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/Functions/geohashEncode.cpp b/dbms/src/Functions/geohashEncode.cpp
index 9079580aaa3..9f4ccddd0f4 100644
--- a/dbms/src/Functions/geohashEncode.cpp
+++ b/dbms/src/Functions/geohashEncode.cpp
@@ -128,7 +128,7 @@ public:
 };
 
 
-void registerFunctionsGeohashEncode(FunctionFactory & factory)
+void registerFunctionGeohashEncode(FunctionFactory & factory)
 {
     factory.registerFunction<FunctionGeohashEncode>();
 }

From 663aab6f5b5296f5f02332bd8d9ebc960f8ecebf Mon Sep 17 00:00:00 2001
From: Maxim Sabyanin <sabyanin.mx@gmail.com>
Date: Sat, 29 Jun 2019 17:09:30 +0300
Subject: [PATCH 175/191] complete ExternalLoader method's implementations

---
 dbms/src/Interpreters/ExternalLoader.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp
index 1bccad41b7a..018565e0a2c 100644
--- a/dbms/src/Interpreters/ExternalLoader.cpp
+++ b/dbms/src/Interpreters/ExternalLoader.cpp
@@ -1038,6 +1038,11 @@ size_t ExternalLoader::getNumberOfCurrentlyLoadedObjects() const
     return loading_dispatcher->getNumberOfCurrentlyLoadedObjects();
 }
 
+void ExternalLoader::load(const String & name) const
+{
+    loading_dispatcher->load(name);
+}
+
 void ExternalLoader::load(const String & name, LoadablePtr & loaded_object, Duration timeout) const
 {
     loading_dispatcher->load(name, loaded_object, timeout);
@@ -1058,6 +1063,11 @@ void ExternalLoader::loadStrict(const String & name, LoadResult & load_result) c
     loading_dispatcher->loadStrict(name, load_result);
 }
 
+void ExternalLoader::load(const FilterByNameFunction & filter_by_name) const
+{
+    loading_dispatcher->load(filter_by_name);
+}
+
 void ExternalLoader::load(const FilterByNameFunction & filter_by_name, Loadables & loaded_objects, Duration timeout) const
 {
     if (filter_by_name)
@@ -1074,6 +1084,11 @@ void ExternalLoader::load(const FilterByNameFunction & filter_by_name, LoadResul
         loading_dispatcher->load(load_results, timeout);
 }
 
+void ExternalLoader::load() const
+{
+    loading_dispatcher->load();
+}
+
 void ExternalLoader::load(Loadables & loaded_objects, Duration timeout) const
 {
     return loading_dispatcher->load(loaded_objects, timeout);

From e541deb5ca9f365623f556dcf1a686fcb2c14b0d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 23:06:04 +0300
Subject: [PATCH 176/191] Added performance test

---
 dbms/tests/performance/h3.xml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 dbms/tests/performance/h3.xml

diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml
new file mode 100644
index 00000000000..f5a9f784e18
--- /dev/null
+++ b/dbms/tests/performance/h3.xml
@@ -0,0 +1,14 @@
+<test>
+    <type>once</type>
+
+    <stop_conditions>
+        <any_of>
+            <!-- This is only for infinite running query. -->
+            <average_speed_not_changing_for_ms>2000</average_speed_not_changing_for_ms>
+            <total_time_ms>10000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+    <!-- Moscow coordinates, maximum precision. NOTE: H3 library is extraordinary slow. -->
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(55.75 + rand(1) / 0x100000000, 37.62 + rand(2) / 0x100000000, 15))</query>
+</test>

From 6eae511b6ed5378c0f6f05523ece0b3aa2ff6d83 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 23:13:32 +0300
Subject: [PATCH 177/191] Changed order of (lat, lon) to (lon, lat) to be
 consistent with "greatCircleDistance" function and PostGIS

---
 dbms/src/Functions/geoToH3.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 19c3d8e5193..4d34446197e 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -127,8 +127,8 @@ public:
 
             for (const auto row : ext::range(0, size))
             {
-                const double lat = col_vec_lat->getData()[row];
-                const double lon = col_vec_lon->getData()[row];
+                const double lon = col_vec_lat->getData()[row];
+                const double lat = col_vec_lon->getData()[row];
                 if (!is_const_resulution)
                 {
                     const auto col_vec_res = static_cast<const ColumnVector<UInt8> *>(col_res);

From 65ce94bb56931080493456fc714c968a3407d6ce Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 23:14:45 +0300
Subject: [PATCH 178/191] Updated performance test

---
 dbms/tests/performance/h3.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml
index f5a9f784e18..7381f559a0f 100644
--- a/dbms/tests/performance/h3.xml
+++ b/dbms/tests/performance/h3.xml
@@ -10,5 +10,5 @@
     </stop_conditions>
 
     <!-- Moscow coordinates, maximum precision. NOTE: H3 library is extraordinary slow. -->
-    <query>SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(55.75 + rand(1) / 0x100000000, 37.62 + rand(2) / 0x100000000, 15))</query>
+    <query>SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(37.62 + rand(1) / 0x100000000, 55.75 + rand(2) / 0x100000000, 15))</query>
 </test>

From 49ce1cc29b90452a9a2d4677de8d6f96bc401bd1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 23:39:23 +0300
Subject: [PATCH 179/191] Updated test

---
 dbms/tests/queries/0_stateless/00926_geo_to_h3.sql | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql
index 38a60c0061e..d3ce898c56a 100644
--- a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql
+++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql
@@ -10,10 +10,10 @@ INSERT INTO table1 VALUES(55.72076200, 37.59813500, 15);
 INSERT INTO table1 VALUES(55.72076201, 37.59813500, 15);
 INSERT INTO table1 VALUES(55.72076200, 37.59813500, 14);
 
-select geoToH3(55.77922738, 37.63098076, 15);
-select geoToH3(lat, lon, resolution) from table1 order by lat, lon, resolution;
-select geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15);
-select lat, lon, geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15);
-select geoToH3(lat, lon, resolution), count(*) from table1 group by geoToH3(lat, lon, resolution) order by geoToH3(lat, lon, resolution);
+select geoToH3(37.63098076, 55.77922738, 15);
+select geoToH3(lon, lat, resolution) from table1 order by lat, lon, resolution;
+select geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k;
+select lat, lon, geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k;
+select geoToH3(lon, lat, resolution) AS k, count(*) from table1 group by k order by k;
 
 DROP TABLE table1

From 1d2008bf10348003eb996e9c907da10738f6dc07 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 23:46:29 +0300
Subject: [PATCH 180/191] Update geo.md

---
 docs/ru/query_language/functions/geo.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md
index cf230185f5e..c23f2e806ec 100644
--- a/docs/ru/query_language/functions/geo.md
+++ b/docs/ru/query_language/functions/geo.md
@@ -154,19 +154,19 @@ SELECT geohashDecode('ezs42') AS res
 
 ## geoToH3
 
-Получает H3 индекс точки (lat, lon) с заданным разрешением
+Получает H3 индекс точки (lon, lat) с заданным разрешением
 
 ```
-geoToH3(lat, lon, resolution)
+geoToH3(lon, lat, resolution)
 ```
 
 **Входные значения**
 
-- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md).
 - `lon` - географическая долгота. Тип данных — [Float64](../../data_types/float.md).
+- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md).
 - `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`.
 
-Параметры `lat` и `lon` должны быть одновременно или константными, или нет. Если параметры `lat` и `lon` не являются константными, то параметр `resolution` не может быть константным.
+Параметры `lon` и `lat` должны быть одновременно или константными, или нет. Если параметры `lon` и `lat` не являются константными, то параметр `resolution` не может быть константным.
 
 **Возвращаемые значения**
 
@@ -177,7 +177,7 @@ geoToH3(lat, lon, resolution)
 **Пример**
 
 ``` sql
-SELECT geoToH3(55.71290588, 37.79506683, 15) as h3Index
+SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
 ```
 ```
 ┌────────────h3Index─┐

From 4a2d3fe90ae98d10c22b3ffa8f3183ca83f5dbee Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 23:47:12 +0300
Subject: [PATCH 181/191] Update Dockerfile

---
 docker/packager/deb/Dockerfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile
index 7651d4f1f24..0c9c82a5e1f 100644
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@@ -9,7 +9,6 @@ RUN apt-get --allow-unauthenticated update -y \
             cmake \
             ccache \
             curl \
-            libtool \
             software-properties-common
 
 RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list

From 76b0a290461b8a05a1833445a0673a89140e3d55 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 23:53:08 +0300
Subject: [PATCH 182/191] Update geo.md

---
 docs/ru/query_language/functions/geo.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md
index c23f2e806ec..33092cf804b 100644
--- a/docs/ru/query_language/functions/geo.md
+++ b/docs/ru/query_language/functions/geo.md
@@ -166,8 +166,6 @@ geoToH3(lon, lat, resolution)
 - `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md).
 - `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`.
 
-Параметры `lon` и `lat` должны быть одновременно или константными, или нет. Если параметры `lon` и `lat` не являются константными, то параметр `resolution` не может быть константным.
-
 **Возвращаемые значения**
 
 Возвращает значение с типом [UInt64] (../../data_types/int_uint.md).

From fd2f90488e732f8729225d4738f19420ce0fc590 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Sun, 30 Jun 2019 23:54:06 +0300
Subject: [PATCH 183/191] Simplification

---
 dbms/src/Functions/geoToH3.cpp | 105 +++++----------------------------
 1 file changed, 16 insertions(+), 89 deletions(-)

diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp
index 4d34446197e..6d3a7197ee0 100644
--- a/dbms/src/Functions/geoToH3.cpp
+++ b/dbms/src/Functions/geoToH3.cpp
@@ -71,103 +71,30 @@ public:
 
     void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
     {
-        int const_cnt = 0;
-        const auto size = input_rows_count;
-
-        for (const auto idx : ext::range(0, 2))
-        {
-            const auto column = block.getByPosition(arguments[idx]).column.get();
-            if (typeid_cast<const ColumnConst *>(column))
-            {
-                ++const_cnt;
-            }
-            else if (!typeid_cast<const ColumnVector<Float64> *>(column))
-            {
-                throw Exception(
-                    "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
-            }
-        }
-
-        double resolution = 0;
-        bool is_const_resulution = false;
-        {
-            const auto column = block.getByPosition(arguments[2]).column.get();
-            if (typeid_cast<const ColumnConst *>(column))
-            {
-                is_const_resulution = true;
-                const auto col_const_res = static_cast<const ColumnConst *>(column);
-                resolution = col_const_res->getValue<UInt8>();
-            }
-            else if (!typeid_cast<const ColumnVector<UInt8> *>(column))
-            {
-                throw Exception(
-                    "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
-            }
-            else if (const_cnt == 2)
-            {
-                throw Exception(
-                    "Illegal type " + column->getName() + " of arguments 3 of function " + getName()
-                        + ". It must be const if arguments 1 and 2 are consts.",
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-            }
-        }
-
-        const auto col_lat = block.getByPosition(arguments[0]).column.get();
-        const auto col_lon = block.getByPosition(arguments[1]).column.get();
+        const auto col_lon = block.getByPosition(arguments[0]).column.get();
+        const auto col_lat = block.getByPosition(arguments[1]).column.get();
         const auto col_res = block.getByPosition(arguments[2]).column.get();
 
-        if (const_cnt == 0)
+        auto dst = ColumnVector<UInt64>::create();
+        auto & dst_data = dst->getData();
+        dst_data.resize(input_rows_count);
+
+        for (const auto row : ext::range(0, input_rows_count))
         {
-            const auto col_vec_lat = static_cast<const ColumnVector<Float64> *>(col_lat);
-            const auto col_vec_lon = static_cast<const ColumnVector<Float64> *>(col_lon);
-
-            auto dst = ColumnVector<UInt64>::create();
-            auto & dst_data = dst->getData();
-            dst_data.resize(size);
-
-            for (const auto row : ext::range(0, size))
-            {
-                const double lon = col_vec_lat->getData()[row];
-                const double lat = col_vec_lon->getData()[row];
-                if (!is_const_resulution)
-                {
-                    const auto col_vec_res = static_cast<const ColumnVector<UInt8> *>(col_res);
-                    resolution = col_vec_res->getData()[row];
-                }
-
-                GeoCoord coord;
-                coord.lat = H3_EXPORT(degsToRads)(lat);
-                coord.lon = H3_EXPORT(degsToRads)(lon);
-
-                H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution);
-
-                dst_data[row] = hindex;
-            }
-
-            block.getByPosition(result).column = std::move(dst);
-        }
-        else if (const_cnt == 2)
-        {
-            const auto col_const_lat = static_cast<const ColumnConst *>(col_lat);
-            const auto col_const_lon = static_cast<const ColumnConst *>(col_lon);
-
-            const double lat = col_const_lat->getValue<Float64>();
-            const double lon = col_const_lon->getValue<Float64>();
+            const double lon = col_lon->getFloat64(row);
+            const double lat = col_lat->getFloat64(row);
+            const UInt8 res = col_res->getUInt(row);
 
             GeoCoord coord;
-            coord.lat = H3_EXPORT(degsToRads)(lat);
             coord.lon = H3_EXPORT(degsToRads)(lon);
-            H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution);
+            coord.lat = H3_EXPORT(degsToRads)(lat);
 
-            block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex);
-        }
-        else
-        {
-            throw Exception(
-                "Illegal types " + col_lat->getName() + ", " + col_lon->getName() + " of arguments 1, 2 of function " + getName()
-                    + ". All must be either const or vector",
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            H3Index hindex = H3_EXPORT(geoToH3)(&coord, res);
+
+            dst_data[row] = hindex;
         }
+
+        block.getByPosition(result).column = std::move(dst);
     }
 };
 

From 22948ba50822129424b56dc2b4cf0a58b2495ea8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Jul 2019 02:53:56 +0300
Subject: [PATCH 184/191] Fixed test

---
 dbms/tests/queries/0_stateless/00600_replace_running_query.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh
index abe5dd69b8f..ce0a4e185ad 100755
--- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh
+++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh
@@ -19,6 +19,6 @@ ${CLICKHOUSE_CLIENT} --query='SELECT 3, sleep(1)' &
 sleep 0.1
 ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 2, sleep(1)' &
 sleep 0.1
-( ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' ||: ) 2>&1 | grep -F 'cant be stopped' > /dev/null
+( ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' ||: ) 2>&1 | grep -F "can't be stopped" > /dev/null
 ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44'
 wait

From 5fb7bf685442e51974ff10fa74155b060e591792 Mon Sep 17 00:00:00 2001
From: alexey-milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Jul 2019 03:15:14 +0300
Subject: [PATCH 185/191] Update ParallelInputsProcessor.h

---
 dbms/src/DataStreams/ParallelInputsProcessor.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h
index 43e66f4a894..813dec594e4 100644
--- a/dbms/src/DataStreams/ParallelInputsProcessor.h
+++ b/dbms/src/DataStreams/ParallelInputsProcessor.h
@@ -95,12 +95,11 @@ public:
     {
         active_threads = max_threads;
         threads.reserve(max_threads);
-        auto thread_group = CurrentThread::getGroup();
 
         try
         {
             for (size_t i = 0; i < max_threads; ++i)
-                threads.emplace_back(&ParallelInputsProcessor::thread, this, std::move(thread_group), i);
+                threads.emplace_back(&ParallelInputsProcessor::thread, this, CurrentThread::getGroup(), i);
         }
         catch (...)
         {

From c0a63801fc6b7e021dc398641a49cd55df62c4d7 Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Mon, 1 Jul 2019 15:50:50 +0300
Subject: [PATCH 186/191] fix segfault in ttl merge with non-physical columns
 in block

---
 dbms/src/DataStreams/TTLBlockInputStream.cpp   | 18 ++++++++----------
 dbms/src/DataStreams/TTLBlockInputStream.h     |  4 +++-
 .../0_stateless/00933_ttl_simple.reference     |  1 +
 .../queries/0_stateless/00933_ttl_simple.sql   | 11 +++++++++++
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp
index 482a3ff4814..1e765f8bb3c 100644
--- a/dbms/src/DataStreams/TTLBlockInputStream.cpp
+++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp
@@ -26,6 +26,7 @@ TTLBlockInputStream::TTLBlockInputStream(
     , date_lut(DateLUT::instance())
 {
     children.push_back(input_);
+    header = children.at(0)->getHeader();
 
     const auto & column_defaults = storage.getColumns().getDefaults();
     ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
@@ -58,11 +59,6 @@ TTLBlockInputStream::TTLBlockInputStream(
 }
 
 
-Block TTLBlockInputStream::getHeader() const
-{
-    return children.at(0)->getHeader();
-}
-
 Block TTLBlockInputStream::readImpl()
 {
     Block block = children.at(0)->read();
@@ -108,11 +104,13 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
     const auto & current = block.getByName(storage.ttl_table_entry.result_column);
     const IColumn * ttl_column = current.column.get();
 
+    const auto & column_names = header.getNames();
     MutableColumns result_columns;
-    result_columns.reserve(getHeader().columns());
-    for (const auto & name : storage.getColumns().getNamesOfPhysical())
+    result_columns.reserve(column_names.size());
+
+    for (auto it = column_names.begin(); it != column_names.end(); ++it)
     {
-        auto & column_with_type = block.getByName(name);
+        auto & column_with_type = block.getByName(*it);
         const IColumn * values_column = column_with_type.column.get();
         MutableColumnPtr result_column = values_column->cloneEmpty();
         result_column->reserve(block.rows());
@@ -125,13 +123,13 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
                 new_ttl_infos.table_ttl.update(cur_ttl);
                 result_column->insertFrom(*values_column, i);
             }
-            else
+            else if (it == column_names.begin())
                 ++rows_removed;
         }
         result_columns.emplace_back(std::move(result_column));
     }
 
-    block = getHeader().cloneWithColumns(std::move(result_columns));
+    block = header.cloneWithColumns(std::move(result_columns));
 }
 
 void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
diff --git a/dbms/src/DataStreams/TTLBlockInputStream.h b/dbms/src/DataStreams/TTLBlockInputStream.h
index a95cd627bc9..6fcdd7400f2 100644
--- a/dbms/src/DataStreams/TTLBlockInputStream.h
+++ b/dbms/src/DataStreams/TTLBlockInputStream.h
@@ -21,7 +21,7 @@ public:
 
     String getName() const override { return "TTLBlockInputStream"; }
 
-    Block getHeader() const override;
+    Block getHeader() const override { return header; };
 
 protected:
     Block readImpl() override;
@@ -47,6 +47,8 @@ private:
 
     std::unordered_map<String, String> defaults_result_column;
     ExpressionActionsPtr defaults_expression;
+
+    Block header;
 private:
     /// Removes values with expired ttl and computes new min_ttl and empty_columns for part
     void removeValuesWithExpiredColumnTTL(Block & block);
diff --git a/dbms/tests/queries/0_stateless/00933_ttl_simple.reference b/dbms/tests/queries/0_stateless/00933_ttl_simple.reference
index f1377e3d220..09e5d7d1f02 100644
--- a/dbms/tests/queries/0_stateless/00933_ttl_simple.reference
+++ b/dbms/tests/queries/0_stateless/00933_ttl_simple.reference
@@ -1,5 +1,6 @@
 0	0
 0	0
+5	6
 2000-10-10 00:00:00	0
 2000-10-10 00:00:00	0
 2000-10-10 00:00:00	0
diff --git a/dbms/tests/queries/0_stateless/00933_ttl_simple.sql b/dbms/tests/queries/0_stateless/00933_ttl_simple.sql
index 62b320cc0b0..11f0055a377 100644
--- a/dbms/tests/queries/0_stateless/00933_ttl_simple.sql
+++ b/dbms/tests/queries/0_stateless/00933_ttl_simple.sql
@@ -9,6 +9,17 @@ select a, b from ttl_00933_1;
 
 drop table if exists ttl_00933_1;
 
+create table ttl_00933_1 (d DateTime, a Int, b Int) engine = MergeTree order by toDate(d) partition by tuple() ttl d + interval 1 second;
+insert into ttl_00933_1 values (now(), 1, 2);
+insert into ttl_00933_1 values (now(), 3, 4);
+insert into ttl_00933_1 values (now() + 1000, 5, 6);
+optimize table ttl_00933_1 final; -- check ttl merge for part with both expired and unexpired values
+select sleep(1.1) format Null; -- wait if very fast merge happen
+optimize table ttl_00933_1 final;
+select a, b from ttl_00933_1;
+
+drop table if exists ttl_00933_1;
+
 create table ttl_00933_1 (d DateTime, a Int ttl d + interval 1 DAY) engine = MergeTree order by tuple() partition by toDayOfMonth(d);
 insert into ttl_00933_1 values (toDateTime('2000-10-10 00:00:00'), 1);
 insert into ttl_00933_1 values (toDateTime('2000-10-10 00:00:00'), 2);

From fa5431a524c3e947235df4bdeb2f5285c7042ed4 Mon Sep 17 00:00:00 2001
From: Ivan Lezhankin <ilezhankin@yandex-team.ru>
Date: Mon, 1 Jul 2019 16:49:00 +0300
Subject: [PATCH 187/191] Don't re-new docker volumes in all tests

---
 dbms/tests/integration/helpers/cluster.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py
index 5743625a8cd..d8e4e9e506e 100644
--- a/dbms/tests/integration/helpers/cluster.py
+++ b/dbms/tests/integration/helpers/cluster.py
@@ -338,7 +338,7 @@ class ClickHouseCluster:
 
         self.docker_client = docker.from_env(version=self.docker_api_version)
 
-        common_opts = ['up', '-d', '--force-recreate', '--renew-anon-volumes']
+        common_opts = ['up', '-d', '--force-recreate']
 
         if self.with_zookeeper and self.base_zookeeper_cmd:
             subprocess_check_call(self.base_zookeeper_cmd + common_opts)
@@ -347,23 +347,23 @@ class ClickHouseCluster:
             self.wait_zookeeper_to_start(120)
 
         if self.with_mysql and self.base_mysql_cmd:
-            subprocess_check_call(self.base_mysql_cmd+ common_opts)
+            subprocess_check_call(self.base_mysql_cmd + common_opts)
             self.wait_mysql_to_start(120)
 
         if self.with_postgres and self.base_postgres_cmd:
-            subprocess_check_call(self.base_postgres_cmd+ common_opts)
+            subprocess_check_call(self.base_postgres_cmd + common_opts)
             self.wait_postgres_to_start(120)
 
         if self.with_kafka and self.base_kafka_cmd:
-            subprocess_check_call(self.base_kafka_cmd+ common_opts)
+            subprocess_check_call(self.base_kafka_cmd + common_opts + ['--renew-anon-volumes'])
             self.kafka_docker_id = self.get_instance_docker_id('kafka1')
 
         if self.with_hdfs and self.base_hdfs_cmd:
-            subprocess_check_call(self.base_hdfs_cmd+ common_opts)
+            subprocess_check_call(self.base_hdfs_cmd + common_opts)
             self.wait_hdfs_to_start(120)
 
         if self.with_mongo and self.base_mongo_cmd:
-            subprocess_check_call(self.base_mongo_cmd+ common_opts)
+            subprocess_check_call(self.base_mongo_cmd + common_opts)
             self.wait_mongo_to_start(30)
 
         subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate'])

From cb8be105d51d73cde6526534557ad36fc5e867fe Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Jul 2019 16:49:50 +0300
Subject: [PATCH 188/191] Added missing implementations of IColumn::getFloat64,
 IColumn::getBool

---
 dbms/src/Columns/ColumnConst.h          | 5 +++++
 dbms/src/Columns/ColumnLowCardinality.h | 2 ++
 dbms/src/Columns/ColumnUnique.h         | 2 ++
 3 files changed, 9 insertions(+)

diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h
index 05a9562e549..be9e9ff839a 100644
--- a/dbms/src/Columns/ColumnConst.h
+++ b/dbms/src/Columns/ColumnConst.h
@@ -99,6 +99,11 @@ public:
         return data->getBool(0);
     }
 
+    Float64 getFloat64(size_t) const override
+    {
+        return data->getFloat64(0);
+    }
+
     bool isNullAt(size_t) const override
     {
         return data->isNullAt(0);
diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h
index d36b91b0c40..60a332b22b9 100644
--- a/dbms/src/Columns/ColumnLowCardinality.h
+++ b/dbms/src/Columns/ColumnLowCardinality.h
@@ -57,6 +57,8 @@ public:
     UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); }
     UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); }
     Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); }
+    Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); }
+    bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); }
     bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
     ColumnPtr cut(size_t start, size_t length) const override
     {
diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h
index 11344a23a1f..322d61081d2 100644
--- a/dbms/src/Columns/ColumnUnique.h
+++ b/dbms/src/Columns/ColumnUnique.h
@@ -64,6 +64,8 @@ public:
     UInt64 get64(size_t n) const override { return getNestedColumn()->get64(n); }
     UInt64 getUInt(size_t n) const override { return getNestedColumn()->getUInt(n); }
     Int64 getInt(size_t n) const override { return getNestedColumn()->getInt(n); }
+    Float64 getFloat64(size_t n) const override { return getNestedColumn()->getFloat64(n); }
+    bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
     bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
     StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     void updateHashWithValue(size_t n, SipHash & hash_func) const override

From ecf9feab9c83a86cf6088e72ff4085e0fe6f966a Mon Sep 17 00:00:00 2001
From: CurtizJ <pad11rus@gmail.com>
Date: Mon, 1 Jul 2019 17:09:22 +0300
Subject: [PATCH 189/191] remove extra semicolon

---
 dbms/src/DataStreams/TTLBlockInputStream.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbms/src/DataStreams/TTLBlockInputStream.h b/dbms/src/DataStreams/TTLBlockInputStream.h
index 6fcdd7400f2..de0d4f9156b 100644
--- a/dbms/src/DataStreams/TTLBlockInputStream.h
+++ b/dbms/src/DataStreams/TTLBlockInputStream.h
@@ -21,7 +21,7 @@ public:
 
     String getName() const override { return "TTLBlockInputStream"; }
 
-    Block getHeader() const override { return header; };
+    Block getHeader() const override { return header; }
 
 protected:
     Block readImpl() override;

From 2acaebb28833826c63a96d8359b2bd183c22299f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Jul 2019 17:28:39 +0300
Subject: [PATCH 190/191] Added test

---
 .../queries/0_stateless/00960_eval_ml_method_const.reference     | 1 +
 dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql    | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference
 create mode 100644 dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql

diff --git a/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference
@@ -0,0 +1 @@
+0
diff --git a/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql
new file mode 100644
index 00000000000..401c83af917
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql
@@ -0,0 +1 @@
+WITH (SELECT stochasticLinearRegressionState(1, 2, 3)) AS model SELECT evalMLMethod(model, toFloat64(1), toFloat64(1));

From 735eb8eecac1417bf8db993e8aef4a641c6a78b2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@yandex-team.ru>
Date: Mon, 1 Jul 2019 22:55:35 +0300
Subject: [PATCH 191/191] Removed `emacs' style of quotes

---
 libs/libcommon/src/DateLUT.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/libcommon/src/DateLUT.cpp b/libs/libcommon/src/DateLUT.cpp
index 66ca8e6d201..cac38634a26 100644
--- a/libs/libcommon/src/DateLUT.cpp
+++ b/libs/libcommon/src/DateLUT.cpp
@@ -13,12 +13,12 @@ Poco::DigestEngine::Digest calcSHA1(const std::string & path)
 {
     std::ifstream stream(path);
     if (!stream)
-        throw Poco::Exception("Error while opening file: `" + path + "'.");
+        throw Poco::Exception("Error while opening file: '" + path + "'.");
     Poco::SHA1Engine digest_engine;
     Poco::DigestInputStream digest_stream(digest_engine, stream);
     digest_stream.ignore(std::numeric_limits<std::streamsize>::max());
     if (!stream.eof())
-        throw Poco::Exception("Error while reading file: `" + path + "'.");
+        throw Poco::Exception("Error while reading file: '" + path + "'.");
     return digest_engine.digest();
 }
 
@@ -39,7 +39,7 @@ std::string determineDefaultTimeZone()
 
     if (tz_env_var)
     {
-        error_prefix = std::string("Could not determine time zone from TZ variable value: `") + tz_env_var + "': ";
+        error_prefix = std::string("Could not determine time zone from TZ variable value: '") + tz_env_var + "': ";
 
         if (*tz_env_var == ':')
             ++tz_env_var;