Merge branch 'master' into some

2024-09-20 08:40:50 +00:00 · 2019-07-12 18:16:03 +03:00 · 2019-07-12 18:16:03 +03:00 · 37bee1104d
commit 37bee1104d
parent 9bd42366f0 6082354d3c
275 changed files with 21202 additions and 352 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -67,6 +67,12 @@
 [submodule "contrib/libgsasl"]
 	path = contrib/libgsasl
 	url = https://github.com/ClickHouse-Extras/libgsasl.git
+[submodule "contrib/libcxx"]
+	path = contrib/libcxx
+	url = https://github.com/llvm-mirror/libcxx.git
+[submodule "contrib/libcxxabi"]
+	path = contrib/libcxxabi
+	url = https://github.com/llvm-mirror/libcxxabi.git
 [submodule "contrib/snappy"]
 	path = contrib/snappy
 	url = https://github.com/google/snappy
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -104,7 +104,6 @@ if (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "8.3.0")
 endif ()

 if (COMPILER_CLANG)
-    # clang: warning: argument unused during compilation: '-stdlib=libc++'
    # clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument]
    set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wno-unused-command-line-argument")
 endif ()
@ -203,19 +202,72 @@ set (CMAKE_C_FLAGS                       "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fn
 set (CMAKE_C_FLAGS_RELWITHDEBINFO        "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}")
 set (CMAKE_C_FLAGS_DEBUG                 "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}")

+# Uses MAKE_STATIC_LIBRARIES
+
+
+option (UNBUNDLED "Try find all libraries in system. We recommend to avoid this mode for production builds, because we cannot guarantee exact versions and variants of libraries your system has installed. This mode exists for enthusiastic developers who search for trouble. Also it is useful for maintainers of OS packages." OFF)
+if (UNBUNDLED)
+    set(NOT_UNBUNDLED 0)
+else ()
+    set(NOT_UNBUNDLED 1)
+endif ()
+# Using system libs can cause lot of warnings in includes.
+if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32)
+    option (NO_WERROR "Disable -Werror compiler option" ON)
+endif ()
+
+
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+find_package (Threads)
+
+include (cmake/find_cxxabi.cmake)
+include (cmake/find_cxx.cmake)
+
+include (cmake/test_compiler.cmake)
+
+if (OS_LINUX AND COMPILER_CLANG AND USE_STATIC_LIBRARIES)
+    option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux)" ${HAVE_LIBCXX})
+
+    if (USE_LIBCXX)
+        set (CMAKE_CXX_FLAGS_DEBUG       "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
+    endif ()
+endif ()
+
+if (USE_LIBCXX)
+    set (STATIC_STDLIB_FLAGS "")
+else ()
+    set (STATIC_STDLIB_FLAGS "-static-libgcc -static-libstdc++")
+endif ()
+
+if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (COMPILER_CLANG AND OS_FREEBSD))
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}")
+
+    # Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained.
+    set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}")
+endif ()
+
+if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE)
+    set (CMAKE_CXX_FLAGS                 "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}")
+    set (CMAKE_C_FLAGS                   "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}")
+endif ()
+
+
 set (CMAKE_EXE_LINKER_FLAGS              "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined")
 set (CMAKE_SHARED_LINKER_FLAGS           "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined")

-include (cmake/use_libcxx.cmake)
 include (cmake/find_unwind.cmake)

 if (USE_INTERNAL_UNWIND_LIBRARY)
    option (USE_INTERNAL_UNWIND_LIBRARY_FOR_EXCEPTION_HANDLING "Use internal unwind library for exception handling" ${USE_STATIC_LIBRARIES})
 endif ()

+
 # Set standard, system and compiler libraries explicitly.
 # This is intended for more control of what we are linking.

+string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
+set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
+
 set (DEFAULT_LIBS "")
 if (OS_LINUX AND NOT UNBUNDLED AND (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_LIBRARY_FOR_EXCEPTION_HANDLING OR USE_LIBCXX))
    # Note: this probably has no effect, but I'm not an expert in CMake.
@ -229,6 +281,8 @@ if (OS_LINUX AND NOT UNBUNDLED AND (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_L
    set (BUILTINS_LIB_PATH "")
    if (COMPILER_CLANG)
        execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
+    else ()
+        set (BUILTINS_LIB_PATH "-lgcc")
    endif ()

    string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
@ -254,9 +308,15 @@ if (OS_LINUX AND NOT UNBUNDLED AND (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_L
    message (STATUS "Using exception handling library: ${EXCEPTION_HANDLING_LIBRARY}")

    if (USE_LIBCXX)
-        set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic -lc++ -lc++abi ${EXCEPTION_HANDLING_LIBRARY} ${BUILTINS_LIB_PATH} -Wl,-Bdynamic")
+        if (USE_INTERNAL_LIBCXX_LIBRARY)
+            set (LIBCXX_LIBS "${ClickHouse_BINARY_DIR}/contrib/libcxx-cmake/libcxx_static${${CMAKE_POSTFIX_VARIABLE}}.a ${ClickHouse_BINARY_DIR}/contrib/libcxxabi-cmake/libcxxabi_static${${CMAKE_POSTFIX_VARIABLE}}.a")
+        else ()
+            set (LIBCXX_LIBS "-lc++ -lc++abi")
+        endif ()
+
+        set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic ${LIBCXX_LIBS} ${EXCEPTION_HANDLING_LIBRARY} ${BUILTINS_LIB_PATH} -Wl,-Bdynamic")
    else ()
-        set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic -lstdc++ ${EXCEPTION_HANDLING_LIBRARY} ${COVERAGE_OPTION} -lgcc ${BUILTINS_LIB_PATH} -Wl,-Bdynamic")
+        set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic -lstdc++ ${EXCEPTION_HANDLING_LIBRARY} ${COVERAGE_OPTION} ${BUILTINS_LIB_PATH} -Wl,-Bdynamic")
    endif ()

    # Linking with GLIBC prevents portability of binaries to older systems.
@ -343,11 +403,16 @@ if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32)
    option (NO_WERROR "Disable -Werror compiler option" ON)
 endif ()

+if (USE_LIBCXX AND USE_INTERNAL_LIBCXX_LIBRARY)
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -nostdinc++ -isystem ${LIBCXX_INCLUDE_DIR} -isystem ${LIBCXXABI_INCLUDE_DIR}")
+endif ()
+
 message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")

 include(GNUInstallDirs)
 include (cmake/find_contrib_lib.cmake)

+find_contrib_lib(double-conversion) # Must be before parquet
 include (cmake/find_ssl.cmake)
 include (cmake/lib_name.cmake)
 include (cmake/find_icu.cmake)
@ -382,16 +447,16 @@ include (cmake/find_pdqsort.cmake)
 include (cmake/find_hdfs3.cmake) # uses protobuf
 include (cmake/find_consistent-hashing.cmake)
 include (cmake/find_base64.cmake)
+include (cmake/find_parquet.cmake)
 include (cmake/find_hyperscan.cmake)
 include (cmake/find_mimalloc.cmake)
 include (cmake/find_simdjson.cmake)
 include (cmake/find_rapidjson.cmake)
+
 find_contrib_lib(cityhash)
 find_contrib_lib(farmhash)
 find_contrib_lib(metrohash)
 find_contrib_lib(btrie)
-find_contrib_lib(double-conversion)
-include (cmake/find_parquet.cmake)

 if (ENABLE_TESTS)
    include (cmake/find_gtest.cmake)
@ -421,6 +486,11 @@ if (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_LIBRARY_FOR_EXCEPTION_HANDLING)
            if (GLIBC_COMPATIBILITY)
                add_dependencies(${target_name} glibc-compatibility)
            endif ()
+
+            if (USE_LIBCXX AND USE_INTERNAL_LIBCXX_LIBRARY)
+                add_dependencies(${target_name} cxx_static cxxabi_static)
+            endif ()
+
            if (USE_INTERNAL_UNWIND_LIBRARY_FOR_EXCEPTION_HANDLING)
                add_dependencies(${target_name} unwind_static)
            endif ()
--- a/cmake/find_cxx.cmake
+++ b/cmake/find_cxx.cmake
@ -0,0 +1,23 @@
+if (NOT APPLE)
+    option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx library instead of bundled" ${NOT_UNBUNDLED})
+endif ()
+
+if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxx/include/vector")
+   message (WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init --recursive")
+   set (USE_INTERNAL_LIBCXX_LIBRARY 0)
+endif ()
+
+if (NOT USE_INTERNAL_LIBCXX_LIBRARY)
+    find_library (LIBCXX_LIBRARY c++)
+    find_path (LIBCXX_INCLUDE_DIR NAMES vector PATHS ${LIBCXX_INCLUDE_PATHS})
+endif ()
+
+if (LIBCXX_LIBRARY AND LIBCXX_INCLUDE_DIR)
+else ()
+    set (LIBCXX_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx/include)
+    set (USE_INTERNAL_LIBCXX_LIBRARY 1)
+    set (LIBCXX_LIBRARY cxx_static)
+    set (HAVE_LIBCXX 1)
+endif ()
+
+message (STATUS "Using libcxx: ${LIBCXX_INCLUDE_DIR} : ${LIBCXX_LIBRARY}")
--- a/cmake/find_cxxabi.cmake
+++ b/cmake/find_cxxabi.cmake
@ -0,0 +1,22 @@
+if (NOT APPLE)
+    option (USE_INTERNAL_LIBCXXABI_LIBRARY "Set to FALSE to use system libcxxabi library instead of bundled" ${NOT_UNBUNDLED})
+endif ()
+
+if (USE_INTERNAL_LIBCXXABI_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi/src")
+   message (WARNING "submodule contrib/libcxxabi is missing. to fix try run: \n git submodule update --init --recursive")
+   set (USE_INTERNAL_LIBCXXABI_LIBRARY 0)
+endif ()
+
+if (NOT USE_INTERNAL_LIBCXXABI_LIBRARY)
+    find_library (LIBCXXABI_LIBRARY cxxabi)
+    find_path (LIBCXXABI_INCLUDE_DIR NAMES vector PATHS ${LIBCXXABI_INCLUDE_PATHS})
+endif ()
+
+if (LIBCXXABI_LIBRARY AND LIBCXXABI_INCLUDE_DIR)
+else ()
+    set (LIBCXXABI_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxxabi/include)
+    set (USE_INTERNAL_LIBCXXABI_LIBRARY 1)
+    set (LIBCXXABI_LIBRARY cxxabi_static)
+endif ()
+
+message (STATUS "Using libcxxabi: ${LIBCXXABI_INCLUDE_DIR} : ${LIBCXXABI_LIBRARY}")
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@ -6,28 +6,39 @@ if (SANITIZE)
    if (SANITIZE STREQUAL "address")
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
+        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
+        endif()
        if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan")
        endif ()
+
    elseif (SANITIZE STREQUAL "memory")
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=memory")
-        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=memory")
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory")
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=memory -fsanitize-memory-track-origins")
+        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=memory -fsanitize-memory-track-origins")
+        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory")
+        endif()
        if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan")
        endif ()
+
    elseif (SANITIZE STREQUAL "thread")
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=thread")
        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=thread")
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread")
+        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread")
+        endif()
        if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan")
        endif ()
+
    elseif (SANITIZE STREQUAL "undefined")
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all")
        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all")
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
+        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
+        endif()
        if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
        endif ()
--- a/cmake/use_libcxx.cmake
+++ b/cmake/use_libcxx.cmake
@ -1,24 +0,0 @@
-# Uses MAKE_STATIC_LIBRARIES
-
-
-set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package (Threads)
-
-include (cmake/test_compiler.cmake)
-include (cmake/arch.cmake)
-
-if (OS_LINUX AND COMPILER_CLANG)
-    set (CMAKE_EXE_LINKER_FLAGS          "${CMAKE_EXE_LINKER_FLAGS}")
-
-    option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX})
-
-    if (USE_LIBCXX)
-        set (CMAKE_CXX_FLAGS             "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning
-        set (CMAKE_CXX_FLAGS_DEBUG       "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
-    endif ()
-endif ()
-
-if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE)
-    set (CMAKE_CXX_FLAGS                 "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}")
-    set (CMAKE_C_FLAGS                   "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}")
-endif ()
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -15,6 +15,14 @@ if (USE_INTERNAL_UNWIND_LIBRARY)
    add_subdirectory (libunwind-cmake)
 endif ()

+if (USE_LIBCXX AND USE_INTERNAL_LIBCXXABI_LIBRARY)
+    add_subdirectory(libcxxabi-cmake)
+endif()
+
+if (USE_LIBCXX AND USE_INTERNAL_LIBCXX_LIBRARY)
+    add_subdirectory(libcxx-cmake)
+endif()
+

 if (USE_INTERNAL_BOOST_LIBRARY)
    add_subdirectory (boost-cmake)
@ -35,8 +43,7 @@ if (USE_INTERNAL_RE2_LIBRARY)
 endif ()

 if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
-    set (BUILD_TESTING 0 CACHE INTERNAL "")
-    add_subdirectory (double-conversion)
+    add_subdirectory (double-conversion-cmake)
 endif ()

 if (USE_INTERNAL_CITYHASH_LIBRARY)
@ -156,7 +163,7 @@ if (ENABLE_ODBC AND USE_INTERNAL_ODBC_LIBRARY)
    add_library(ODBC::ODBC ALIAS ${ODBC_LIBRARIES})
 endif ()

-if (USE_INTERNAL_CAPNP_LIBRARY)
+if (ENABLE_CAPNP AND USE_INTERNAL_CAPNP_LIBRARY)
    set (BUILD_TESTING 0 CACHE INTERNAL "")
    set (_save ${CMAKE_CXX_EXTENSIONS})
    set (CMAKE_CXX_EXTENSIONS)
@ -284,7 +291,7 @@ if (USE_INTERNAL_LLVM_LIBRARY)
 endif ()

 if (USE_INTERNAL_LIBGSASL_LIBRARY)
-  add_subdirectory(libgsasl)
+    add_subdirectory(libgsasl)
 endif()

 if (USE_INTERNAL_LIBXML2_LIBRARY)
--- a/contrib/double-conversion-cmake/CMakeLists.txt
+++ b/contrib/double-conversion-cmake/CMakeLists.txt
@ -0,0 +1,14 @@
+SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion)
+
+add_library(double-conversion
+${LIBRARY_DIR}/double-conversion/bignum.cc
+${LIBRARY_DIR}/double-conversion/bignum-dtoa.cc
+${LIBRARY_DIR}/double-conversion/cached-powers.cc
+${LIBRARY_DIR}/double-conversion/diy-fp.cc
+${LIBRARY_DIR}/double-conversion/double-conversion.cc
+${LIBRARY_DIR}/double-conversion/fast-dtoa.cc
+${LIBRARY_DIR}/double-conversion/fixed-dtoa.cc
+${LIBRARY_DIR}/double-conversion/strtod.cc)
+
+target_include_directories(double-conversion SYSTEM PUBLIC "${LIBRARY_DIR}")
+
--- a/contrib/libcxx
+++ b/contrib/libcxx
@ -0,0 +1 @@
+Subproject commit 9807685d51db467e097ad5eb8d5c2c16922794b2
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@ -0,0 +1,51 @@
+set(LIBCXX_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx)
+#set(LIBCXX_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/libcxx)
+
+set(SRCS
+${LIBCXX_SOURCE_DIR}/src/optional.cpp
+${LIBCXX_SOURCE_DIR}/src/variant.cpp
+${LIBCXX_SOURCE_DIR}/src/chrono.cpp
+${LIBCXX_SOURCE_DIR}/src/thread.cpp
+${LIBCXX_SOURCE_DIR}/src/experimental/memory_resource.cpp
+${LIBCXX_SOURCE_DIR}/src/iostream.cpp
+${LIBCXX_SOURCE_DIR}/src/strstream.cpp
+${LIBCXX_SOURCE_DIR}/src/ios.cpp
+${LIBCXX_SOURCE_DIR}/src/future.cpp
+${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp
+${LIBCXX_SOURCE_DIR}/src/condition_variable.cpp
+${LIBCXX_SOURCE_DIR}/src/hash.cpp
+${LIBCXX_SOURCE_DIR}/src/string.cpp
+${LIBCXX_SOURCE_DIR}/src/debug.cpp
+#${LIBCXX_SOURCE_DIR}/src/support/win32/support.cpp
+#${LIBCXX_SOURCE_DIR}/src/support/win32/locale_win32.cpp
+#${LIBCXX_SOURCE_DIR}/src/support/win32/thread_win32.cpp
+#${LIBCXX_SOURCE_DIR}/src/support/solaris/xlocale.cpp
+${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp
+${LIBCXX_SOURCE_DIR}/src/utility.cpp
+${LIBCXX_SOURCE_DIR}/src/any.cpp
+${LIBCXX_SOURCE_DIR}/src/exception.cpp
+${LIBCXX_SOURCE_DIR}/src/memory.cpp
+${LIBCXX_SOURCE_DIR}/src/new.cpp
+${LIBCXX_SOURCE_DIR}/src/valarray.cpp
+${LIBCXX_SOURCE_DIR}/src/vector.cpp
+${LIBCXX_SOURCE_DIR}/src/algorithm.cpp
+${LIBCXX_SOURCE_DIR}/src/functional.cpp
+${LIBCXX_SOURCE_DIR}/src/regex.cpp
+${LIBCXX_SOURCE_DIR}/src/bind.cpp
+${LIBCXX_SOURCE_DIR}/src/mutex.cpp
+${LIBCXX_SOURCE_DIR}/src/charconv.cpp
+${LIBCXX_SOURCE_DIR}/src/typeinfo.cpp
+${LIBCXX_SOURCE_DIR}/src/locale.cpp
+${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp
+${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp
+${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp
+${LIBCXX_SOURCE_DIR}/src/system_error.cpp
+${LIBCXX_SOURCE_DIR}/src/random.cpp
+)
+
+add_library(cxx_static ${SRCS})
+
+target_include_directories(cxx_static PUBLIC ${LIBCXX_SOURCE_DIR}/include)
+target_compile_definitions(cxx_static PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
+target_compile_options(cxx_static PRIVATE -nostdinc++)
+
--- a/contrib/libcxxabi
+++ b/contrib/libcxxabi
@ -0,0 +1 @@
+Subproject commit d56efcc7a52739518dbe7df9e743073e00951fa1
--- a/contrib/libcxxabi-cmake/CMakeLists.txt
+++ b/contrib/libcxxabi-cmake/CMakeLists.txt
@ -0,0 +1,34 @@
+set(LIBCXXABI_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxxabi)
+set(LIBCXX_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx)
+#set(LIBCXXABI_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/libcxxabi)
+
+set(SRCS
+${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp
+${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp
+#${LIBCXXABI_SOURCE_DIR}/src/cxa_noexception.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp
+${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp
+${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_unexpected.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp
+${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp
+${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp
+${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp
+${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp
+)
+
+add_library(cxxabi_static ${SRCS})
+
+target_include_directories(cxxabi_static PUBLIC ${LIBCXXABI_SOURCE_DIR}/include ${LIBCXX_SOURCE_DIR}/include)
+target_compile_definitions(cxxabi_static PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
+target_compile_options(cxxabi_static PRIVATE -nostdinc++ -fno-sanitize=undefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.
+
+
--- a/contrib/librdkafka-cmake/config.h
+++ b/contrib/librdkafka-cmake/config.h
@ -77,8 +77,6 @@
 #define HAVE_PTHREAD_SETNAME_GNU 1
 // python
 //#define HAVE_PYTHON 1
-// C11 threads
-#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__)
-#  define WITH_C11THREADS 1
-#endif
+// disable C11 threads for compatibility with old libc
+#define WITH_C11THREADS 0
 #endif /* _CONFIG_H_ */
--- a/contrib/libunwind
+++ b/contrib/libunwind
@ -1 +0,0 @@
-Subproject commit ec86b1c6a2c6b8ba316f429db9a6d4122dd12710
--- a/dbms/CMakeLists.txt
+++ b/dbms/CMakeLists.txt
@ -108,6 +108,7 @@ add_headers_and_sources(clickhouse_common_io src/Common/HashTable)
 add_headers_and_sources(clickhouse_common_io src/IO)

 add_headers_and_sources(dbms src/Core)
+add_headers_and_sources(dbms src/Compression/)
 add_headers_and_sources(dbms src/DataStreams)
 add_headers_and_sources(dbms src/DataTypes)
 add_headers_and_sources(dbms src/Databases)
@ -119,6 +120,13 @@ add_headers_and_sources(dbms src/Storages/Distributed)
 add_headers_and_sources(dbms src/Storages/MergeTree)
 add_headers_and_sources(dbms src/Client)
 add_headers_and_sources(dbms src/Formats)
+add_headers_and_sources(dbms src/Processors)
+add_headers_and_sources(dbms src/Processors/Executors)
+add_headers_and_sources(dbms src/Processors/Formats)
+add_headers_and_sources(dbms src/Processors/Formats/Impl)
+add_headers_and_sources(dbms src/Processors/Transforms)
+add_headers_and_sources(dbms src/Processors/Sources)
+add_headers_only(dbms src/Server)

 if(USE_RDKAFKA)
    add_headers_and_sources(dbms src/Storages/Kafka)
@ -233,6 +241,10 @@ target_link_libraries(clickhouse_common_io
    roaring
 )

+if(ZSTD_LIBRARY)
+    target_link_libraries(clickhouse_common_io PRIVATE ${ZSTD_LIBRARY})
+endif()
+
 if (USE_RDKAFKA)
    target_link_libraries(dbms PRIVATE ${CPPKAFKA_LIBRARY} ${RDKAFKA_LIBRARY})
    if(NOT USE_INTERNAL_RDKAFKA_LIBRARY)
@ -259,8 +271,6 @@ if(CPUINFO_LIBRARY)
 endif()

 target_link_libraries (dbms
-        PUBLIC
-    clickhouse_compression
        PRIVATE
    clickhouse_parsers
    clickhouse_common_config
@ -270,6 +280,7 @@ target_link_libraries (dbms
    clickhouse_common_io
        PRIVATE
    clickhouse_dictionaries_embedded
+    ${LZ4_LIBRARY}
        PUBLIC
    ${MYSQLXX_LIBRARY}
        PRIVATE
@ -281,9 +292,22 @@ target_link_libraries (dbms
    Threads::Threads
 )

+if(ZSTD_LIBRARY)
+    target_link_libraries(clickhouse_common_io PRIVATE ${ZSTD_LIBRARY})
+endif()
+
 target_include_directories(dbms PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include)
 target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include) # uses some includes from core
 target_include_directories(dbms SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR})
+target_include_directories(dbms SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR})
+
+if (NOT USE_INTERNAL_LZ4_LIBRARY)
+    target_include_directories(dbms SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR})
+endif ()
+if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
+    target_include_directories(dbms SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR})
+endif ()
+

 if (NOT USE_INTERNAL_BOOST_LIBRARY)
    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
--- a/dbms/cmake/version.cmake
+++ b/dbms/cmake/version.cmake
@ -1,11 +1,11 @@
 # This strings autochanged from release_lib.sh:
-set(VERSION_REVISION 54423)
+set(VERSION_REVISION 54424)
 set(VERSION_MAJOR 19)
-set(VERSION_MINOR 11)
-set(VERSION_PATCH 0)
-set(VERSION_GITHASH badb6ab8310ed94e20f43ac9a9a227f7a2590009)
-set(VERSION_DESCRIBE v19.11.0-testing)
-set(VERSION_STRING 19.11.0)
+set(VERSION_MINOR 12)
+set(VERSION_PATCH 1)
+set(VERSION_GITHASH a584f0ca6cb5df9b0d9baf1e2e1eaa7d12a20a44)
+set(VERSION_DESCRIBE v19.12.1.1-prestable)
+set(VERSION_STRING 19.12.1.1)
 # end of autochange

 set(VERSION_EXTRA "" CACHE STRING "")
--- a/dbms/programs/compressor/CMakeLists.txt
+++ b/dbms/programs/compressor/CMakeLists.txt
@ -1,7 +1,7 @@
 # Also in utils

 set(CLICKHOUSE_COMPRESSOR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Compressor.cpp)
-set(CLICKHOUSE_COMPRESSOR_LINK PRIVATE clickhouse_compression clickhouse_common_io clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY})
+set(CLICKHOUSE_COMPRESSOR_LINK PRIVATE dbms clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY})
 #set(CLICKHOUSE_COMPRESSOR_INCLUDE SYSTEM PRIVATE ...)

 clickhouse_program_add(compressor)
--- a/dbms/programs/server/CMakeLists.txt
+++ b/dbms/programs/server/CMakeLists.txt
@ -8,10 +8,16 @@ set(CLICKHOUSE_SERVER_SOURCES
    ${CMAKE_CURRENT_SOURCE_DIR}/RootRequestHandler.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/Server.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/TCPHandler.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandler.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandlerFactory.cpp
   )

+if (USE_POCO_NETSSL)
+    set(CLICKHOUSE_SERVER_SOURCES
+        ${CLICKHOUSE_SERVER_SOURCES}
+        ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandlerFactory.cpp
+    )
+endif ()
+
 set(CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_dictionaries clickhouse_common_io clickhouse_common_config clickhouse_common_zookeeper clickhouse_parsers string_utils PUBLIC daemon PRIVATE clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Poco_Net_LIBRARY})
 if (USE_POCO_NETSSL)
    set(CLICKHOUSE_SERVER_LINK ${CLICKHOUSE_SERVER_LINK} PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY})
--- a/dbms/programs/server/MySQLHandler.cpp
+++ b/dbms/programs/server/MySQLHandler.cpp
@ -18,13 +18,19 @@
 #include <limits>
 #include <ext/scope_guard.h>

+#include <openssl/rsa.h>
+

 namespace DB
 {
+
 using namespace MySQLProtocol;
+
+
 using Poco::Net::SecureStreamSocket;
 using Poco::Net::SSLManager;

+
 namespace ErrorCodes
 {
    extern const int MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES;
--- a/dbms/programs/server/MySQLHandler.h
+++ b/dbms/programs/server/MySQLHandler.h
@ -4,7 +4,6 @@
 #include <Poco/Net/SecureStreamSocket.h>
 #include <Common/getFQDNOrHostName.h>
 #include <Core/MySQLProtocol.h>
-#include <openssl/rsa.h>
 #include "IServer.h"


--- a/dbms/programs/server/Server.cpp
+++ b/dbms/programs/server/Server.cpp
@ -746,6 +746,7 @@ int Server::main(const std::vector<std::string> & /*args*/)

                if (config().has("mysql_port"))
                {
+#if USE_POCO_NETSSL
                    Poco::Net::ServerSocket socket;
                    auto address = socket_bind_listen(socket, listen_host, config().getInt("mysql_port"), /* secure = */ true);
                    socket.setReceiveTimeout(Poco::Timespan());
@ -757,6 +758,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
                        new Poco::Net::TCPServerParams));

                    LOG_INFO(log, "Listening for MySQL compatibility protocol: " + address.toString());
+#else
+                    throw Exception{"SSL support for MySQL protocol is disabled because Poco library was built without NetSSL support.",
+                            ErrorCodes::SUPPORT_IS_DISABLED};
+#endif
                }
            }
            catch (const Poco::Exception & e)
--- a/dbms/programs/server/TCPHandler.cpp
+++ b/dbms/programs/server/TCPHandler.cpp
@ -29,6 +29,8 @@
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <Compression/CompressionFactory.h>

+#include <Processors/Formats/LazyOutputFormat.h>
+
 #include "TCPHandler.h"


@ -207,6 +209,8 @@ void TCPHandler::runImpl()
            /// Does the request require receive data from client?
            if (state.need_receive_data_for_insert)
                processInsertQuery(global_settings);
+            else if (state.io.pipeline.initialized())
+                processOrdinaryQueryWithProcessors(query_context->getSettingsRef().max_threads);
            else
                processOrdinaryQuery();

@ -447,9 +451,9 @@ void TCPHandler::processOrdinaryQuery()
              */
            if (!block && !isQueryCancelled())
            {
-                sendTotals();
-                sendExtremes();
-                sendProfileInfo();
+                sendTotals(state.io.in->getTotals());
+                sendExtremes(state.io.in->getExtremes());
+                sendProfileInfo(state.io.in->getProfileInfo());
                sendProgress();
                sendLogs();
            }
@ -465,6 +469,129 @@ void TCPHandler::processOrdinaryQuery()
    state.io.onFinish();
 }

+void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads)
+{
+    auto & pipeline = state.io.pipeline;
+
+    /// Send header-block, to allow client to prepare output format for data to send.
+    {
+        auto & header = pipeline.getHeader();
+
+        if (header)
+            sendData(header);
+    }
+
+    auto lazy_format = std::make_shared<LazyOutputFormat>(pipeline.getHeader());
+    pipeline.setOutput(lazy_format);
+
+    {
+        auto thread_group = CurrentThread::getGroup();
+        ThreadPool pool(1);
+        auto executor = pipeline.execute();
+        std::atomic_bool exception = false;
+
+        pool.schedule([&]()
+        {
+            /// ThreadStatus thread_status;
+
+            if (thread_group)
+                CurrentThread::attachTo(thread_group);
+
+            SCOPE_EXIT(
+                    if (thread_group)
+                        CurrentThread::detachQueryIfNotDetached();
+            );
+
+            CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread};
+            setThreadName("QueryPipelineEx");
+
+            try
+            {
+                executor->execute(num_threads);
+            }
+            catch (...)
+            {
+                exception = true;
+                throw;
+            }
+        });
+
+        /// Wait in case of exception. Delete pipeline to release memory.
+        SCOPE_EXIT(
+                /// Clear queue in case if somebody is waiting lazy_format to push.
+                lazy_format->finish();
+                lazy_format->clearQueue();
+
+                pool.wait();
+                pipeline = QueryPipeline()
+        );
+
+        while (true)
+        {
+            Block block;
+
+            while (true)
+            {
+                if (isQueryCancelled())
+                {
+                    /// A packet was received requesting to stop execution of the request.
+                    executor->cancel();
+
+                    break;
+                }
+                else
+                {
+                    if (after_send_progress.elapsed() / 1000 >= query_context->getSettingsRef().interactive_delay)
+                    {
+                        /// Some time passed and there is a progress.
+                        after_send_progress.restart();
+                        sendProgress();
+                    }
+
+                    sendLogs();
+
+                    if ((block = lazy_format->getBlock(query_context->getSettingsRef().interactive_delay / 1000)))
+                        break;
+
+                    if (lazy_format->isFinished())
+                        break;
+
+                    if (exception)
+                    {
+                        pool.wait();
+                        break;
+                    }
+                }
+            }
+
+            /** If data has run out, we will send the profiling data and total values to
+              * the last zero block to be able to use
+              * this information in the suffix output of stream.
+              * If the request was interrupted, then `sendTotals` and other methods could not be called,
+              *  because we have not read all the data yet,
+              *  and there could be ongoing calculations in other threads at the same time.
+              */
+            if (!block && !isQueryCancelled())
+            {
+                pool.wait();
+                pipeline.finalize();
+
+                sendTotals(lazy_format->getTotals());
+                sendExtremes(lazy_format->getExtremes());
+                sendProfileInfo(lazy_format->getProfileInfo());
+                sendProgress();
+                sendLogs();
+            }
+
+            sendData(block);
+            if (!block)
+                break;
+        }
+    }
+
+    state.io.onFinish();
+}
+

 void TCPHandler::processTablesStatusRequest()
 {
@ -495,18 +622,16 @@ void TCPHandler::processTablesStatusRequest()
 }


-void TCPHandler::sendProfileInfo()
+void TCPHandler::sendProfileInfo(const BlockStreamProfileInfo & info)
 {
    writeVarUInt(Protocol::Server::ProfileInfo, *out);
-    state.io.in->getProfileInfo().write(*out);
+    info.write(*out);
    out->next();
 }


-void TCPHandler::sendTotals()
+void TCPHandler::sendTotals(const Block & totals)
 {
-    const Block & totals = state.io.in->getTotals();
-
    if (totals)
    {
        initBlockOutput(totals);
@ -521,10 +646,8 @@ void TCPHandler::sendTotals()
 }


-void TCPHandler::sendExtremes()
+void TCPHandler::sendExtremes(const Block & extremes)
 {
-    Block extremes = state.io.in->getExtremes();
-
    if (extremes)
    {
        initBlockOutput(extremes);
--- a/dbms/programs/server/TCPHandler.h
+++ b/dbms/programs/server/TCPHandler.h
@ -145,6 +145,8 @@ private:
    /// Process a request that does not require the receiving of data blocks from the client
    void processOrdinaryQuery();

+    void processOrdinaryQueryWithProcessors(size_t num_threads);
+
    void processTablesStatusRequest();

    void sendHello();
@ -155,9 +157,9 @@ private:
    void sendProgress();
    void sendLogs();
    void sendEndOfStream();
-    void sendProfileInfo();
-    void sendTotals();
-    void sendExtremes();
+    void sendProfileInfo(const BlockStreamProfileInfo & info);
+    void sendTotals(const Block & totals);
+    void sendExtremes(const Block & extremes);

    /// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled.
    void initBlockInput();
--- a/dbms/src/CMakeLists.txt
+++ b/dbms/src/CMakeLists.txt
@ -12,5 +12,6 @@ add_subdirectory (Interpreters)
 add_subdirectory (AggregateFunctions)
 add_subdirectory (Client)
 add_subdirectory (TableFunctions)
+add_subdirectory (Processors)
 add_subdirectory (Formats)
 add_subdirectory (Compression)
--- a/dbms/src/Columns/Collator.cpp
+++ b/dbms/src/Columns/Collator.cpp
@ -6,9 +6,9 @@
    #include <unicode/ucol.h>
 #else
    #ifdef __clang__
-        #pragma clang diagnostic push
        #pragma clang diagnostic ignored "-Wunused-private-field"
    #endif
+    #pragma clang diagnostic ignored "-Wmissing-noreturn"
 #endif

 #include <Common/Exception.h>
@ -26,7 +26,6 @@ namespace DB
    }
 }

-
 Collator::Collator(const std::string & locale_) : locale(Poco::toLower(locale_))
 {
 #if USE_ICU
--- a/dbms/src/Common/EventCounter.h
+++ b/dbms/src/Common/EventCounter.h
@ -0,0 +1,47 @@
+#pragma once
+
+#include <cstdint>
+#include <mutex>
+#include <condition_variable>
+
+
+/** Allow to subscribe for multiple events and wait for them one by one in arbitrary order.
+  */
+class EventCounter
+{
+private:
+    size_t events_happened = 0;
+    size_t events_waited = 0;
+
+    mutable std::mutex mutex;
+    std::condition_variable condvar;
+
+public:
+    void notify()
+    {
+        {
+            std::lock_guard lock(mutex);
+            ++events_happened;
+        }
+        condvar.notify_all();
+    }
+
+    void wait()
+    {
+        std::unique_lock lock(mutex);
+        condvar.wait(lock, [&]{ return events_happened > events_waited; });
+        ++events_waited;
+    }
+
+    template <typename Duration>
+    bool waitFor(Duration && duration)
+    {
+        std::unique_lock lock(mutex);
+        if (condvar.wait(lock, std::forward<Duration>(duration), [&]{ return events_happened > events_waited; }))
+        {
+            ++events_waited;
+            return true;
+        }
+        return false;
+    }
+};
--- a/dbms/src/Common/Stopwatch.h
+++ b/dbms/src/Common/Stopwatch.h
@ -189,3 +189,16 @@ private:
    Timestamp stop_ts;
    bool is_running = false;
 };
+
+
+template <typename TStopwatch>
+class StopwatchGuard : public TStopwatch
+{
+public:
+    explicit StopwatchGuard(UInt64 & elapsed_ns_) : elapsed_ns(elapsed_ns_) {}
+
+    ~StopwatchGuard() { elapsed_ns += TStopwatch::elapsedNanoseconds(); }
+
+private:
+    UInt64 & elapsed_ns;
+};
--- a/dbms/src/Common/tests/CMakeLists.txt
+++ b/dbms/src/Common/tests/CMakeLists.txt
@ -23,10 +23,10 @@ add_executable (small_table small_table.cpp)
 target_link_libraries (small_table PRIVATE clickhouse_common_io)

 add_executable (parallel_aggregation parallel_aggregation.cpp)
-target_link_libraries (parallel_aggregation PRIVATE clickhouse_compression clickhouse_common_io)
+target_link_libraries (parallel_aggregation PRIVATE dbms)

 add_executable (parallel_aggregation2 parallel_aggregation2.cpp)
-target_link_libraries (parallel_aggregation2 PRIVATE clickhouse_compression clickhouse_common_io)
+target_link_libraries (parallel_aggregation2 PRIVATE dbms)

 add_executable (int_hashes_perf int_hashes_perf.cpp AvalancheTest.cpp Random.cpp)
 target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io)
@ -42,7 +42,7 @@ add_executable (radix_sort radix_sort.cpp)
 target_link_libraries (radix_sort PRIVATE clickhouse_common_io)

 add_executable (arena_with_free_lists arena_with_free_lists.cpp)
-target_link_libraries (arena_with_free_lists PRIVATE clickhouse_compression clickhouse_common_io)
+target_link_libraries (arena_with_free_lists PRIVATE dbms)

 add_executable (pod_array pod_array.cpp)
 target_link_libraries (pod_array PRIVATE clickhouse_common_io)
@ -62,7 +62,7 @@ target_link_libraries (space_saving PRIVATE clickhouse_common_io)

 add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
 target_include_directories (integer_hash_tables_and_hashes SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
-target_link_libraries (integer_hash_tables_and_hashes PRIVATE clickhouse_compression clickhouse_common_io)
+target_link_libraries (integer_hash_tables_and_hashes PRIVATE dbms)

 add_executable (allocator allocator.cpp)
 target_link_libraries (allocator PRIVATE clickhouse_common_io)
--- a/dbms/src/Compression/CMakeLists.txt
+++ b/dbms/src/Compression/CMakeLists.txt
@ -1,21 +1,3 @@
-include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
-add_headers_and_sources(clickhouse_compression .)
-add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources})
-target_link_libraries(clickhouse_compression PRIVATE clickhouse_parsers clickhouse_common_io ${LZ4_LIBRARY} ${CITYHASH_LIBRARIES})
-if(ZSTD_LIBRARY)
-    target_link_libraries(clickhouse_compression PRIVATE ${ZSTD_LIBRARY})
-endif()
-
-target_include_directories(clickhouse_compression PUBLIC ${DBMS_INCLUDE_DIR})
-target_include_directories(clickhouse_compression SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR})
-
-if (NOT USE_INTERNAL_LZ4_LIBRARY)
-    target_include_directories(clickhouse_compression SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR})
-endif ()
-if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
-    target_include_directories(clickhouse_compression SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR})
-endif ()
-
 if(ENABLE_TESTS)
    add_subdirectory(tests)
 endif()
--- a/dbms/src/Compression/LZ4_decompress_faster.cpp
+++ b/dbms/src/Compression/LZ4_decompress_faster.cpp
@ -154,7 +154,7 @@ inline void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset)
  */
 inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
 {
-#ifdef __SSSE3__
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)

    static constexpr UInt8 __attribute__((__aligned__(8))) masks[] =
    {
@ -268,7 +268,7 @@ inline void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset)

 inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
 {
-#ifdef __SSSE3__
+#if defined(__SSSE3__) && !defined(MEMORY_SANITIZER)

    static constexpr UInt8 __attribute__((__aligned__(16))) masks[] =
    {
--- a/dbms/src/Compression/tests/CMakeLists.txt
+++ b/dbms/src/Compression/tests/CMakeLists.txt
@ -1,5 +1,5 @@
 add_executable (compressed_buffer compressed_buffer.cpp)
-target_link_libraries (compressed_buffer PRIVATE clickhouse_compression clickhouse_common_io)
+target_link_libraries (compressed_buffer PRIVATE dbms)

 add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp)
-target_link_libraries (cached_compressed_read_buffer PRIVATE clickhouse_compression clickhouse_common_io)
+target_link_libraries (cached_compressed_read_buffer PRIVATE dbms)
--- a/dbms/src/Core/Block.cpp
+++ b/dbms/src/Core/Block.cpp
@ -370,6 +370,11 @@ Block Block::cloneWithColumns(const Columns & columns) const
    Block res;

    size_t num_columns = data.size();
+
+    if (num_columns != columns.size())
+        throw Exception("Cannot clone block with columns because block has " + toString(num_columns) + " columns, "
+                        "but " + toString(columns.size()) + " columns given.", ErrorCodes::LOGICAL_ERROR);
+
    for (size_t i = 0; i < num_columns; ++i)
        res.insert({ columns[i], data[i].type, data[i].name });

--- a/dbms/src/Core/Defines.h
+++ b/dbms/src/Core/Defines.h
@ -108,6 +108,14 @@
    #define THREAD_SANITIZER 1
 #endif

+#if defined(__has_feature)
+    #if __has_feature(memory_sanitizer)
+        #define MEMORY_SANITIZER 1
+    #endif
+#elif defined(__MEMORY_SANITIZER__)
+    #define MEMORY_SANITIZER 1
+#endif
+
 /// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute.
 /// It is useful in case when compiler cannot see (and exploit) it, but UBSan can.
 /// Example: multiplication of signed integers with possibility of overflow when both sides are from user input.
--- a/dbms/src/Core/MySQLProtocol.cpp
+++ b/dbms/src/Core/MySQLProtocol.cpp
@ -30,7 +30,7 @@ String PacketSender::packetToText(String payload)

 uint64_t readLengthEncodedNumber(std::istringstream & ss)
 {
-    char c;
+    char c{};
    uint64_t buf = 0;
    ss.get(c);
    auto cc = static_cast<uint8_t>(c);
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@ -326,8 +326,10 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \
    M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \
    \
-    M(SettingBool, allow_hyperscan, 1, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \
-    M(SettingBool, allow_simdjson, 1, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \
+    M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \
+    \
+    M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \
+    M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \
    \
    M(SettingUInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.")

--- a/dbms/src/Core/SortCursor.h
+++ b/dbms/src/Core/SortCursor.h
@ -3,6 +3,7 @@
 #include <Common/typeid_cast.h>
 #include <Core/SortDescription.h>
 #include <Core/Block.h>
+#include <Core/ColumnNumbers.h>
 #include <Columns/IColumn.h>
 #include <Columns/ColumnString.h>

@ -47,26 +48,44 @@ struct SortCursorImpl
        reset(block);
    }

+    SortCursorImpl(const Columns & columns, const SortDescription & desc_, size_t order_ = 0)
+        : desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size())
+    {
+        for (auto & column_desc : desc)
+        {
+            if (!column_desc.column_name.empty())
+                throw Exception("SortDesctiption should contain column position if SortCursor was used without header.",
+                        ErrorCodes::LOGICAL_ERROR);
+        }
+        reset(columns, {});
+    }
+
    bool empty() const { return rows == 0; }

    /// Set the cursor to the beginning of the new block.
    void reset(const Block & block)
+    {
+        reset(block.getColumns(), block);
+    }
+
+    /// Set the cursor to the beginning of the new block.
+    void reset(const Columns & columns, const Block & block)
    {
        all_columns.clear();
        sort_columns.clear();

-        size_t num_columns = block.columns();
+        size_t num_columns = columns.size();

        for (size_t j = 0; j < num_columns; ++j)
-            all_columns.push_back(block.safeGetByPosition(j).column.get());
+            all_columns.push_back(columns[j].get());

        for (size_t j = 0, size = desc.size(); j < size; ++j)
        {
-            size_t column_number = !desc[j].column_name.empty()
-                ? block.getPositionByName(desc[j].column_name)
-                : desc[j].column_number;
-
-            sort_columns.push_back(block.safeGetByPosition(column_number).column.get());
+            auto & column_desc = desc[j];
+            size_t column_number = !column_desc.column_name.empty()
+                                   ? block.getPositionByName(column_desc.column_name)
+                                   : column_desc.column_number;
+            sort_columns.push_back(columns[column_number].get());

            need_collation[j] = desc[j].collator != nullptr && typeid_cast<const ColumnString *>(sort_columns.back());    /// TODO Nullable(String)
            has_collation |= need_collation[j];
--- a/dbms/src/DataStreams/BlockIO.h
+++ b/dbms/src/DataStreams/BlockIO.h
@ -4,6 +4,8 @@

 #include <functional>

+#include <Processors/QueryPipeline.h>
+

 namespace DB
 {
@ -25,6 +27,8 @@ struct BlockIO
    BlockOutputStreamPtr out;
    BlockInputStreamPtr in;

+    QueryPipeline pipeline;
+
    /// Callbacks for query logging could be set here.
    std::function<void(IBlockInputStream *, IBlockOutputStream *)>    finish_callback;
    std::function<void()>                                             exception_callback;
@ -54,6 +58,7 @@ struct BlockIO
        process_list_entry      = rhs.process_list_entry;
        in                      = rhs.in;
        out                     = rhs.out;
+        pipeline                = rhs.pipeline;

        finish_callback         = rhs.finish_callback;
        exception_callback      = rhs.exception_callback;
--- a/dbms/src/DataStreams/BlockStreamProfileInfo.cpp
+++ b/dbms/src/DataStreams/BlockStreamProfileInfo.cpp
@ -71,6 +71,9 @@ void BlockStreamProfileInfo::update(Block & block)

 void BlockStreamProfileInfo::collectInfosForStreamsWithName(const char * name, BlockStreamProfileInfos & res) const
 {
+    if (!parent)
+        return;
+
    if (parent->getName() == name)
    {
        res.push_back(this);
--- a/dbms/src/DataStreams/BlockStreamProfileInfo.h
+++ b/dbms/src/DataStreams/BlockStreamProfileInfo.h
@ -50,6 +50,13 @@ struct BlockStreamProfileInfo
    /// If skip_block_size_info if true, then rows, bytes and block fields are ignored.
    void setFrom(const BlockStreamProfileInfo & rhs, bool skip_block_size_info);

+    /// Only for Processors.
+    void setRowsBeforeLimit(size_t rows_before_limit_)
+    {
+        applied_limit = true;
+        rows_before_limit = rows_before_limit_;
+    }
+
 private:
    void calculateRowsBeforeLimit() const;

--- a/dbms/src/DataStreams/LimitBlockInputStream.cpp
+++ b/dbms/src/DataStreams/LimitBlockInputStream.cpp
@ -23,7 +23,7 @@ Block LimitBlockInputStream::readImpl()
    Block res;
    UInt64 rows = 0;

-    /// pos - how many lines were read, including the last read block
+    /// pos - how many rows were read, including the last read block

    if (pos >= offset + limit)
    {
@ -46,7 +46,7 @@ Block LimitBlockInputStream::readImpl()
        pos += rows;
    } while (pos <= offset);

-    /// give away the whole block
+    /// return the whole block
    if (pos >= offset + rows && pos <= offset + limit)
        return res;

@ -61,7 +61,7 @@ Block LimitBlockInputStream::readImpl()
        static_cast<Int64>(limit) + static_cast<Int64>(offset) - static_cast<Int64>(pos) + static_cast<Int64>(rows)));

    for (size_t i = 0; i < res.columns(); ++i)
-        res.safeGetByPosition(i).column = res.safeGetByPosition(i).column->cut(start, length);
+        res.getByPosition(i).column = res.getByPosition(i).column->cut(start, length);

    // TODO: we should provide feedback to child-block, so it will know how many rows are actually consumed.
    //       It's crucial for streaming engines like Kafka.
--- a/dbms/src/DataStreams/processConstants.cpp
+++ b/dbms/src/DataStreams/processConstants.cpp
@ -9,7 +9,7 @@ void removeConstantsFromBlock(Block & block)
    size_t i = 0;
    while (i < columns)
    {
-        if (isColumnConst(*block.getByPosition(i).column))
+        if (block.getByPosition(i).column && isColumnConst(*block.getByPosition(i).column))
        {
            block.erase(i);
            --columns;
@ -22,13 +22,14 @@ void removeConstantsFromBlock(Block & block)

 void removeConstantsFromSortDescription(const Block & header, SortDescription & description)
 {
+    /// Note: This code is not correct if column description contains column numbers instead of column names.
+    /// Hopefully, everywhere where it is used, column description contains names.
    description.erase(std::remove_if(description.begin(), description.end(),
        [&](const SortColumnDescription & elem)
        {
-            if (!elem.column_name.empty())
-                return isColumnConst(*header.getByName(elem.column_name).column);
-            else
-                return isColumnConst(*header.safeGetByPosition(elem.column_number).column);
+            auto & column = !elem.column_name.empty() ? header.getByName(elem.column_name)
+                                                      : header.safeGetByPosition(elem.column_number);
+            return column.column && isColumnConst(*column.column);
        }), description.end());
 }

@ -41,7 +42,7 @@ void enrichBlockWithConstants(Block & block, const Block & header)
    for (size_t i = 0; i < columns; ++i)
    {
        const auto & col_type_name = header.getByPosition(i);
-        if (isColumnConst(*col_type_name.column))
+        if (col_type_name.column && isColumnConst(*col_type_name.column))
            block.insert(i, {col_type_name.column->cloneResized(rows), col_type_name.type, col_type_name.name});
    }
 }
--- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp
@ -74,6 +74,8 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(
 {
    /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication).
    context.setUser(user, password, Poco::Net::SocketAddress("127.0.0.1", 0), {});
+    /// Processors are not supported here yet.
+    context.getSettingsRef().experimental_use_processors = false;
 }


--- a/dbms/src/Formats/FormatFactory.cpp
+++ b/dbms/src/Formats/FormatFactory.cpp
@ -4,6 +4,7 @@
 #include <DataStreams/MaterializingBlockOutputStream.h>
 #include <Formats/FormatSettings.h>
 #include <Formats/FormatFactory.h>
+#include <Processors/Formats/IRowInputFormat.h>


 namespace DB
@ -26,6 +27,50 @@ const FormatFactory::Creators & FormatFactory::getCreators(const String & name)
    throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
 }

+const FormatFactory::ProcessorCreators & FormatFactory::getProcessorCreators(const String & name) const
+{
+    auto it = processors_dict.find(name);
+    if (processors_dict.end() != it)
+        return it->second;
+    throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
+}
+
+static FormatSettings getInputFormatSetting(const Settings & settings)
+{
+    FormatSettings format_settings;
+    format_settings.csv.delimiter = settings.format_csv_delimiter;
+    format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
+    format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
+    format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
+    format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
+    format_settings.with_names_use_header = settings.input_format_with_names_use_header;
+    format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields;
+    format_settings.import_nested_json = settings.input_format_import_nested_json;
+    format_settings.date_time_input_format = settings.date_time_input_format;
+    format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
+    format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
+
+    return format_settings;
+}
+
+static FormatSettings getOutputFormatSetting(const Settings & settings)
+{
+    FormatSettings format_settings;
+    format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
+    format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
+    format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
+    format_settings.csv.delimiter = settings.format_csv_delimiter;
+    format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
+    format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
+    format_settings.pretty.max_rows = settings.output_format_pretty_max_rows;
+    format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
+    format_settings.pretty.color = settings.output_format_pretty_color;
+    format_settings.write_statistics = settings.output_format_write_statistics;
+    format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
+
+    return format_settings;
+}
+

 BlockInputStreamPtr FormatFactory::getInput(
    const String & name,
@ -41,19 +86,7 @@ BlockInputStreamPtr FormatFactory::getInput(
        throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);

    const Settings & settings = context.getSettingsRef();
-
-    FormatSettings format_settings;
-    format_settings.csv.delimiter = settings.format_csv_delimiter;
-    format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
-    format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
-    format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
-    format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
-    format_settings.with_names_use_header = settings.input_format_with_names_use_header;
-    format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields;
-    format_settings.import_nested_json = settings.input_format_import_nested_json;
-    format_settings.date_time_input_format = settings.date_time_input_format;
-    format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
-    format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
+    FormatSettings format_settings = getInputFormatSetting(settings);

    return input_getter(
        buf, sample, context, max_block_size, rows_portion_size, callback ? callback : ReadCallback(), format_settings);
@ -67,19 +100,7 @@ BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer &
        throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);

    const Settings & settings = context.getSettingsRef();
-
-    FormatSettings format_settings;
-    format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
-    format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
-    format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
-    format_settings.csv.delimiter = settings.format_csv_delimiter;
-    format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
-    format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
-    format_settings.pretty.max_rows = settings.output_format_pretty_max_rows;
-    format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
-    format_settings.pretty.color = settings.output_format_pretty_color;
-    format_settings.write_statistics = settings.output_format_write_statistics;
-    format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
+    FormatSettings format_settings = getOutputFormatSetting(settings);

    /** Materialization is needed, because formats can use the functions `IDataType`,
      *  which only work with full columns.
@ -89,12 +110,46 @@ BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer &
 }


+InputFormatPtr FormatFactory::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, const Context & context, UInt64 max_block_size) const
+{
+    const auto & input_getter = getProcessorCreators(name).first;
+    if (!input_getter)
+        throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
+
+    const Settings & settings = context.getSettingsRef();
+    FormatSettings format_settings = getInputFormatSetting(settings);
+
+    RowInputFormatParams params;
+    params.max_block_size = max_block_size;
+    params.allow_errors_num = format_settings.input_allow_errors_num;
+    params.allow_errors_ratio = format_settings.input_allow_errors_ratio;
+
+    return input_getter(buf, sample, context, params, format_settings);
+}
+
+
+OutputFormatPtr FormatFactory::getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample, const Context & context) const
+{
+    const auto & output_getter = getProcessorCreators(name).second;
+    if (!output_getter)
+        throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
+
+    const Settings & settings = context.getSettingsRef();
+    FormatSettings format_settings = getOutputFormatSetting(settings);
+
+    /** TODO: Materialization is needed, because formats can use the functions `IDataType`,
+      *  which only work with full columns.
+      */
+    return output_getter(buf, sample, context, format_settings);
+}
+
+
 void FormatFactory::registerInputFormat(const String & name, InputCreator input_creator)
 {
    auto & target = dict[name].first;
    if (target)
        throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
-    target = input_creator;
+    target = std::move(input_creator);
 }

 void FormatFactory::registerOutputFormat(const String & name, OutputCreator output_creator)
@ -102,7 +157,23 @@ void FormatFactory::registerOutputFormat(const String & name, OutputCreator outp
    auto & target = dict[name].second;
    if (target)
        throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
-    target = output_creator;
+    target = std::move(output_creator);
+}
+
+void FormatFactory::registerInputFormatProcessor(const String & name, InputProcessorCreator input_creator)
+{
+    auto & target = processors_dict[name].first;
+    if (target)
+        throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
+    target = std::move(input_creator);
+}
+
+void FormatFactory::registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator)
+{
+    auto & target = processors_dict[name].second;
+    if (target)
+        throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
+    target = std::move(output_creator);
 }


@ -127,6 +198,25 @@ void registerOutputFormatParquet(FormatFactory & factory);
 void registerInputFormatProtobuf(FormatFactory & factory);
 void registerOutputFormatProtobuf(FormatFactory & factory);

+void registerInputFormatProcessorNative(FormatFactory & factory);
+void registerOutputFormatProcessorNative(FormatFactory & factory);
+void registerInputFormatProcessorRowBinary(FormatFactory & factory);
+void registerOutputFormatProcessorRowBinary(FormatFactory & factory);
+void registerInputFormatProcessorTabSeparated(FormatFactory & factory);
+void registerOutputFormatProcessorTabSeparated(FormatFactory & factory);
+void registerInputFormatProcessorValues(FormatFactory & factory);
+void registerOutputFormatProcessorValues(FormatFactory & factory);
+void registerInputFormatProcessorCSV(FormatFactory & factory);
+void registerOutputFormatProcessorCSV(FormatFactory & factory);
+void registerInputFormatProcessorTSKV(FormatFactory & factory);
+void registerOutputFormatProcessorTSKV(FormatFactory & factory);
+void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
+void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
+void registerInputFormatProcessorParquet(FormatFactory & factory);
+void registerOutputFormatProcessorParquet(FormatFactory & factory);
+void registerInputFormatProcessorProtobuf(FormatFactory & factory);
+void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
+
 /// Output only (presentational) formats.

 void registerOutputFormatPretty(FormatFactory & factory);
@ -141,9 +231,22 @@ void registerOutputFormatODBCDriver2(FormatFactory & factory);
 void registerOutputFormatNull(FormatFactory & factory);
 void registerOutputFormatMySQLWire(FormatFactory & factory);

+void registerOutputFormatProcessorPretty(FormatFactory & factory);
+void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory);
+void registerOutputFormatProcessorPrettySpace(FormatFactory & factory);
+void registerOutputFormatProcessorVertical(FormatFactory & factory);
+void registerOutputFormatProcessorJSON(FormatFactory & factory);
+void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
+void registerOutputFormatProcessorXML(FormatFactory & factory);
+void registerOutputFormatProcessorODBCDriver(FormatFactory & factory);
+void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
+void registerOutputFormatProcessorNull(FormatFactory & factory);
+void registerOutputFormatProcessorMySQLWrite(FormatFactory & factory);
+
 /// Input only formats.

 void registerInputFormatCapnProto(FormatFactory & factory);
+void registerInputFormatProcessorCapnProto(FormatFactory & factory);


 FormatFactory::FormatFactory()
@ -168,6 +271,28 @@ FormatFactory::FormatFactory()
    registerInputFormatParquet(*this);
    registerOutputFormatParquet(*this);

+    registerOutputFormatMySQLWire(*this);
+
+    registerInputFormatProcessorNative(*this);
+    registerOutputFormatProcessorNative(*this);
+    registerInputFormatProcessorRowBinary(*this);
+    registerOutputFormatProcessorRowBinary(*this);
+    registerInputFormatProcessorTabSeparated(*this);
+    registerOutputFormatProcessorTabSeparated(*this);
+    registerInputFormatProcessorValues(*this);
+    registerOutputFormatProcessorValues(*this);
+    registerInputFormatProcessorCSV(*this);
+    registerOutputFormatProcessorCSV(*this);
+    registerInputFormatProcessorTSKV(*this);
+    registerOutputFormatProcessorTSKV(*this);
+    registerInputFormatProcessorJSONEachRow(*this);
+    registerOutputFormatProcessorJSONEachRow(*this);
+    registerInputFormatProcessorProtobuf(*this);
+    registerOutputFormatProcessorProtobuf(*this);
+    registerInputFormatProcessorCapnProto(*this);
+    registerInputFormatProcessorParquet(*this);
+    registerOutputFormatProcessorParquet(*this);
+
    registerOutputFormatPretty(*this);
    registerOutputFormatPrettyCompact(*this);
    registerOutputFormatPrettySpace(*this);
@ -178,7 +303,18 @@ FormatFactory::FormatFactory()
    registerOutputFormatODBCDriver(*this);
    registerOutputFormatODBCDriver2(*this);
    registerOutputFormatNull(*this);
-    registerOutputFormatMySQLWire(*this);
+
+    registerOutputFormatProcessorPretty(*this);
+    registerOutputFormatProcessorPrettyCompact(*this);
+    registerOutputFormatProcessorPrettySpace(*this);
+    registerOutputFormatProcessorVertical(*this);
+    registerOutputFormatProcessorJSON(*this);
+    registerOutputFormatProcessorJSONCompact(*this);
+    registerOutputFormatProcessorXML(*this);
+    registerOutputFormatProcessorODBCDriver(*this);
+    registerOutputFormatProcessorODBCDriver2(*this);
+    registerOutputFormatProcessorNull(*this);
+    registerOutputFormatProcessorMySQLWrite(*this);
 }

 }
--- a/dbms/src/Formats/FormatFactory.h
+++ b/dbms/src/Formats/FormatFactory.h
@ -19,6 +19,18 @@ struct FormatSettings;
 class ReadBuffer;
 class WriteBuffer;

+class IProcessor;
+using ProcessorPtr = std::shared_ptr<IProcessor>;
+
+class IInputFormat;
+class IOutputFormat;
+
+struct RowInputFormatParams;
+
+using InputFormatPtr = std::shared_ptr<IInputFormat>;
+using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
+
+
 /** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format.
  * Note: format and compression are independent things.
  */
@ -45,9 +57,24 @@ private:
        const Context & context,
        const FormatSettings & settings)>;

+    using InputProcessorCreator = std::function<InputFormatPtr(
+            ReadBuffer & buf,
+            const Block & header,
+            const Context & context,
+            const RowInputFormatParams & params,
+            const FormatSettings & settings)>;
+
+    using OutputProcessorCreator = std::function<OutputFormatPtr(
+            WriteBuffer & buf,
+            const Block & sample,
+            const Context & context,
+            const FormatSettings & settings)>;
+
    using Creators = std::pair<InputCreator, OutputCreator>;
+    using ProcessorCreators = std::pair<InputProcessorCreator, OutputProcessorCreator>;

    using FormatsDictionary = std::unordered_map<String, Creators>;
+    using FormatProcessorsDictionary = std::unordered_map<String, ProcessorCreators>;

 public:
    BlockInputStreamPtr getInput(
@ -62,10 +89,19 @@ public:
    BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf,
        const Block & sample, const Context & context) const;

+    InputFormatPtr getInputFormat(const String & name, ReadBuffer & buf,
+        const Block & sample, const Context & context, UInt64 max_block_size) const;
+
+    OutputFormatPtr getOutputFormat(const String & name, WriteBuffer & buf,
+        const Block & sample, const Context & context) const;
+
    /// Register format by its name.
    void registerInputFormat(const String & name, InputCreator input_creator);
    void registerOutputFormat(const String & name, OutputCreator output_creator);

+    void registerInputFormatProcessor(const String & name, InputProcessorCreator input_creator);
+    void registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator);
+
    const FormatsDictionary & getAllFormats() const
    {
        return dict;
@ -73,11 +109,13 @@ public:

 private:
    FormatsDictionary dict;
+    FormatProcessorsDictionary processors_dict;

    FormatFactory();
    friend class ext::singleton<FormatFactory>;

    const Creators & getCreators(const String & name) const;
+    const ProcessorCreators & getProcessorCreators(const String & name) const;
 };

 }
--- a/dbms/src/Functions/CustomWeekTransforms.h
+++ b/dbms/src/Functions/CustomWeekTransforms.h
@ -0,0 +1,137 @@
+#pragma once
+#include <regex>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnsNumber.h>
+#include <Core/Types.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/extractTimeZoneFromFunctionArguments.h>
+#include <Common/Exception.h>
+#include <common/DateLUTImpl.h>
+
+/// The default mode value to use for the WEEK() function
+#define DEFAULT_WEEK_MODE 0
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int ILLEGAL_COLUMN;
+}
+
+/**
+ * CustomWeek Transformations.
+  */
+
+static inline UInt32 dateIsNotSupported(const char * name)
+{
+    throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+}
+
+/// This factor transformation will say that the function is monotone everywhere.
+struct ZeroTransform
+{
+    static inline UInt16 execute(UInt32, UInt8, const DateLUTImpl &) { return 0; }
+    static inline UInt16 execute(UInt16, UInt8, const DateLUTImpl &) { return 0; }
+};
+
+struct ToWeekImpl
+{
+    static constexpr auto name = "toWeek";
+
+    static inline UInt8 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
+    {
+        YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode);
+        return yw.second;
+    }
+    static inline UInt8 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
+    {
+        YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode);
+        return yw.second;
+    }
+
+    using FactorTransform = ZeroTransform;
+};
+
+struct ToYearWeekImpl
+{
+    static constexpr auto name = "toYearWeek";
+
+    static inline UInt32 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
+    {
+        YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
+        return yw.first * 100 + yw.second;
+    }
+    static inline UInt32 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
+    {
+        YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
+        return yw.first * 100 + yw.second;
+    }
+
+    using FactorTransform = ZeroTransform;
+};
+
+struct ToStartOfWeekImpl
+{
+    static constexpr auto name = "toStartOfWeek";
+
+    static inline UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
+    {
+        return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
+    }
+    static inline UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
+    {
+        return time_zone.toFirstDayNumOfWeek(DayNum(d), week_mode);
+    }
+
+    using FactorTransform = ZeroTransform;
+};
+
+template <typename FromType, typename ToType, typename Transform>
+struct Transformer
+{
+    static void
+    vector(const PaddedPODArray<FromType> & vec_from, PaddedPODArray<ToType> & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone)
+    {
+        size_t size = vec_from.size();
+        vec_to.resize(size);
+
+        for (size_t i = 0; i < size; ++i)
+            vec_to[i] = Transform::execute(vec_from[i], week_mode, time_zone);
+    }
+};
+
+
+template <typename FromType, typename ToType, typename Transform>
+struct CustomWeekTransformImpl
+{
+    static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
+    {
+        using Op = Transformer<FromType, ToType, Transform>;
+
+        UInt8 week_mode = DEFAULT_WEEK_MODE;
+        if (arguments.size() > 1)
+        {
+            if (const auto week_mode_column = checkAndGetColumnConst<ColumnUInt8>(block.getByPosition(arguments[1]).column.get()))
+                week_mode = week_mode_column->getValue<UInt8>();
+        }
+
+        const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
+        const ColumnPtr source_col = block.getByPosition(arguments[0]).column;
+        if (const auto * sources = checkAndGetColumn<ColumnVector<FromType>>(source_col.get()))
+        {
+            auto col_to = ColumnVector<ToType>::create();
+            Op::vector(sources->getData(), col_to->getData(), week_mode, time_zone);
+            block.getByPosition(result).column = std::move(col_to);
+        }
+        else
+        {
+            throw Exception(
+                "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function "
+                    + Transform::name,
+                ErrorCodes::ILLEGAL_COLUMN);
+        }
+    }
+};
+
+}
--- a/dbms/src/Functions/FunctionCustomWeekToSomething.h
+++ b/dbms/src/Functions/FunctionCustomWeekToSomething.h
@ -0,0 +1,153 @@
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <Functions/CustomWeekTransforms.h>
+#include <Functions/IFunction.h>
+#include <Functions/extractTimeZoneFromFunctionArguments.h>
+#include <IO/WriteHelpers.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+
+/// See CustomWeekTransforms.h
+template <typename ToDataType, typename Transform>
+class FunctionCustomWeekToSomething : public IFunction
+{
+public:
+    static constexpr auto name = Transform::name;
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionCustomWeekToSomething>(); }
+
+    String getName() const override { return name; }
+
+    bool isVariadic() const override { return true; }
+    size_t getNumberOfArguments() const override { return 0; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (arguments.size() == 1)
+        {
+            if (!isDateOrDateTime(arguments[0].type))
+                throw Exception(
+                    "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+                        + ". Should be a date or a date with time",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        else if (arguments.size() == 2)
+        {
+            if (!isDateOrDateTime(arguments[0].type))
+                throw Exception(
+                    "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+                        + ". Should be a date or a date with time",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            if (!isUInt8(arguments[1].type))
+                throw Exception(
+                    "Function " + getName()
+                        + " supports 1 or 2 or 3 arguments. The 1st argument "
+                          "must be of type Date or DateTime. The 2nd argument (optional) must be "
+                          "a constant UInt8 with week mode. The 3nd argument (optional) must be "
+                          "a constant string with timezone name",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        else if (arguments.size() == 3)
+        {
+            if (!isDateOrDateTime(arguments[0].type))
+                throw Exception(
+                    "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+                        + ". Should be a date or a date with time",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            if (!isUInt8(arguments[1].type))
+                throw Exception(
+                    "Function " + getName()
+                        + " supports 1 or 2 or 3 arguments. The 1st argument "
+                          "must be of type Date or DateTime. The 2nd argument (optional) must be "
+                          "a constant UInt8 with week mode. The 3nd argument (optional) must be "
+                          "a constant string with timezone name",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            if (!isString(arguments[2].type))
+                throw Exception(
+                    "Function " + getName()
+                        + " supports 1 or 2 or 3 arguments. The 1st argument "
+                          "must be of type Date or DateTime. The 2nd argument (optional) must be "
+                          "a constant UInt8 with week mode. The 3nd argument (optional) must be "
+                          "a constant string with timezone name",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            if (isDate(arguments[0].type) && std::is_same_v<ToDataType, DataTypeDate>)
+                throw Exception(
+                    "The timezone argument of function " + getName() + " is allowed only when the 1st argument has the type DateTime",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+        else
+            throw Exception(
+                "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+                    + ", should be 1 or 2 or 3",
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        return std::make_shared<ToDataType>();
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
+    {
+        const IDataType * from_type = block.getByPosition(arguments[0]).type.get();
+        WhichDataType which(from_type);
+
+        if (which.isDate())
+            CustomWeekTransformImpl<DataTypeDate::FieldType, typename ToDataType::FieldType, Transform>::execute(
+                block, arguments, result, input_rows_count);
+        else if (which.isDateTime())
+            CustomWeekTransformImpl<DataTypeDateTime::FieldType, typename ToDataType::FieldType, Transform>::execute(
+                block, arguments, result, input_rows_count);
+        else
+            throw Exception(
+                "Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName(),
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+
+
+    bool hasInformationAboutMonotonicity() const override { return true; }
+
+    Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override
+    {
+        IFunction::Monotonicity is_monotonic{true};
+        IFunction::Monotonicity is_not_monotonic;
+
+        if (std::is_same_v<typename Transform::FactorTransform, ZeroTransform>)
+        {
+            is_monotonic.is_always_monotonic = true;
+            return is_monotonic;
+        }
+
+        /// This method is called only if the function has one argument. Therefore, we do not care about the non-local time zone.
+        const DateLUTImpl & date_lut = DateLUT::instance();
+
+        if (left.isNull() || right.isNull())
+            return is_not_monotonic;
+
+        /// The function is monotonous on the [left, right] segment, if the factor transformation returns the same values for them.
+
+        if (checkAndGetDataType<DataTypeDate>(&type))
+        {
+            return Transform::FactorTransform::execute(UInt16(left.get<UInt64>()), DEFAULT_WEEK_MODE, date_lut)
+                    == Transform::FactorTransform::execute(UInt16(right.get<UInt64>()), DEFAULT_WEEK_MODE, date_lut)
+                ? is_monotonic
+                : is_not_monotonic;
+        }
+        else
+        {
+            return Transform::FactorTransform::execute(UInt32(left.get<UInt64>()), DEFAULT_WEEK_MODE, date_lut)
+                    == Transform::FactorTransform::execute(UInt32(right.get<UInt64>()), DEFAULT_WEEK_MODE, date_lut)
+                ? is_monotonic
+                : is_not_monotonic;
+        }
+    }
+};
+
+}
--- a/dbms/src/Functions/RapidJSONParser.h
+++ b/dbms/src/Functions/RapidJSONParser.h
@ -22,8 +22,10 @@ struct RapidJSONParser

    bool parse(const StringRef & json)
    {
-        document.Parse(json.data);
-        return !document.HasParseError();
+        rapidjson::MemoryStream ms(json.data, json.size);
+        rapidjson::EncodedInputStream<rapidjson::UTF8<>, rapidjson::MemoryStream> is(ms);
+        document.ParseStream(is);
+        return !document.HasParseError() && (ms.Tell() == json.size);
    }

    struct Iterator
--- a/dbms/src/Functions/registerFunctionsDateTime.cpp
+++ b/dbms/src/Functions/registerFunctionsDateTime.cpp
@ -16,6 +16,7 @@ void registerFunctionToStartOfDay(FunctionFactory &);
 void registerFunctionToMonday(FunctionFactory &);
 void registerFunctionToISOWeek(FunctionFactory &);
 void registerFunctionToISOYear(FunctionFactory &);
+void registerFunctionToCustomWeek(FunctionFactory &);
 void registerFunctionToStartOfMonth(FunctionFactory &);
 void registerFunctionToStartOfQuarter(FunctionFactory &);
 void registerFunctionToStartOfYear(FunctionFactory &);
@ -79,6 +80,7 @@ void registerFunctionsDateTime(FunctionFactory & factory)
    registerFunctionToMonday(factory);
    registerFunctionToISOWeek(factory);
    registerFunctionToISOYear(factory);
+    registerFunctionToCustomWeek(factory);
    registerFunctionToStartOfMonth(factory);
    registerFunctionToStartOfQuarter(factory);
    registerFunctionToStartOfYear(factory);
--- a/dbms/src/Functions/toCustomWeek.cpp
+++ b/dbms/src/Functions/toCustomWeek.cpp
@ -0,0 +1,25 @@
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/CustomWeekTransforms.h>
+#include <Functions/FunctionCustomWeekToSomething.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunction.h>
+
+
+namespace DB
+{
+using FunctionToWeek = FunctionCustomWeekToSomething<DataTypeUInt8, ToWeekImpl>;
+using FunctionToYearWeek = FunctionCustomWeekToSomething<DataTypeUInt32, ToYearWeekImpl>;
+using FunctionToStartOfWeek = FunctionCustomWeekToSomething<DataTypeDate, ToStartOfWeekImpl>;
+
+void registerFunctionToCustomWeek(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionToWeek>();
+    factory.registerFunction<FunctionToYearWeek>();
+    factory.registerFunction<FunctionToStartOfWeek>();
+
+    /// Compatibility aliases for mysql.
+    factory.registerAlias("week", "toWeek", FunctionFactory::CaseInsensitive);
+    factory.registerAlias("yearweek", "toYearWeek", FunctionFactory::CaseInsensitive);
+}
+
+}
--- a/dbms/src/IO/DoubleConverter.h
+++ b/dbms/src/IO/DoubleConverter.h
@ -34,9 +34,11 @@ class DoubleConverter
    DoubleConverter() = default;

 public:
-    /** @todo Add commentary on how this constant is deduced.
-     *    e.g. it's minus sign, integral zero, decimal point, up to 5 leading zeros and kBase10MaximalLength digits. */
-    static constexpr auto MAX_REPRESENTATION_LENGTH = 26;
+    /// Sign (1 byte) + DigitsBeforePoint + point (1 byte) + DigitsAfterPoint + zero byte.
+    /// See comment to DoubleToStringConverter::ToFixed method for explanation.
+    static constexpr auto MAX_REPRESENTATION_LENGTH =
+            1 + double_conversion::DoubleToStringConverter::kMaxFixedDigitsBeforePoint +
+            1 + double_conversion::DoubleToStringConverter::kMaxFixedDigitsAfterPoint + 1;
    using BufferType = char[MAX_REPRESENTATION_LENGTH];

    static const auto & instance()
--- a/dbms/src/IO/tests/CMakeLists.txt
+++ b/dbms/src/IO/tests/CMakeLists.txt
@ -26,7 +26,7 @@ add_executable (read_escaped_string read_escaped_string.cpp)
 target_link_libraries (read_escaped_string PRIVATE clickhouse_common_io)

 add_executable (async_write async_write.cpp)
-target_link_libraries (async_write PRIVATE clickhouse_compression clickhouse_common_io)
+target_link_libraries (async_write PRIVATE dbms)

 add_executable (parse_int_perf parse_int_perf.cpp)
 target_link_libraries (parse_int_perf PRIVATE clickhouse_common_io)
--- a/dbms/src/Interpreters/Aggregator.cpp
+++ b/dbms/src/Interpreters/Aggregator.cpp
@ -2053,7 +2053,6 @@ void Aggregator::mergeStream(const BlockInputStreamPtr & stream, AggregatedDataV
      * Then the calculations can be parallelized by buckets.
      * We decompose the blocks to the bucket numbers indicated in them.
      */
-    using BucketToBlocks = std::map<Int32, BlocksList>;
    BucketToBlocks bucket_to_blocks;

    /// Read all the data.
@ -2071,11 +2070,22 @@ void Aggregator::mergeStream(const BlockInputStreamPtr & stream, AggregatedDataV
        bucket_to_blocks[block.info.bucket_num].emplace_back(std::move(block));
    }

-    LOG_TRACE(log, "Read " << total_input_blocks << " blocks of partially aggregated data, total " << total_input_rows << " rows.");
+    LOG_TRACE(log, "Read " << total_input_blocks << " blocks of partially aggregated data, total " << total_input_rows
+                           << " rows.");

+    mergeBlocks(bucket_to_blocks, result, max_threads);
+}
+
+void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads)
+{
    if (bucket_to_blocks.empty())
        return;

+    UInt64 total_input_rows = 0;
+    for (auto & bucket : bucket_to_blocks)
+        for (auto & block : bucket.second)
+            total_input_rows += block.rows();
+
    /** `minus one` means the absence of information about the bucket
      * - in the case of single-level aggregation, as well as for blocks with "overflowing" values.
      * If there is at least one block with a bucket number greater or equal than zero, then there was a two-level aggregation.
--- a/dbms/src/Interpreters/Aggregator.h
+++ b/dbms/src/Interpreters/Aggregator.h
@ -857,6 +857,10 @@ public:
      */
    void mergeStream(const BlockInputStreamPtr & stream, AggregatedDataVariants & result, size_t max_threads);

+    using BucketToBlocks = std::map<Int32, BlocksList>;
+    /// Merge partially aggregated blocks separated to buckets into one data structure.
+    void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads);
+
    /// Merge several partially aggregated blocks into one.
    /// Precondition: for all blocks block.info.is_overflows flag must be the same.
    /// (either all blocks are from overflow data or none blocks are).
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@ -1779,6 +1779,11 @@ BlockOutputStreamPtr Context::getOutputFormat(const String & name, WriteBuffer &
    return FormatFactory::instance().getOutput(name, buf, sample, *this);
 }

+OutputFormatPtr Context::getOutputFormatProcessor(const String & name, WriteBuffer & buf, const Block & sample) const
+{
+    return FormatFactory::instance().getOutputFormat(name, buf, sample, *this);
+}
+

 time_t Context::getUptimeSeconds() const
 {
--- a/dbms/src/Interpreters/Context.h
+++ b/dbms/src/Interpreters/Context.h
@ -74,6 +74,8 @@ class ShellCommand;
 class ICompressionCodec;
 class SettingsConstraints;

+class IOutputFormat;
+using OutputFormatPtr = std::shared_ptr<IOutputFormat>;

 #if USE_EMBEDDED_COMPILER

@ -289,6 +291,8 @@ public:
    BlockInputStreamPtr getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size) const;
    BlockOutputStreamPtr getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const;

+    OutputFormatPtr getOutputFormatProcessor(const String & name, WriteBuffer & buf, const Block & sample) const;
+
    InterserverIOHandler & getInterserverIOHandler();

    /// How other servers can access this for downloading replicated data.
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@ -740,22 +740,27 @@ void ExpressionActions::execute(Block & block, bool dry_run) const
    }
 }

+bool ExpressionActions::hasTotalsInJoin() const
+{
+    bool has_totals_in_join = false;
+    for (const auto & action : actions)
+    {
+        if (action.join && action.join->hasTotals())
+        {
+            has_totals_in_join = true;
+            break;
+        }
+    }
+
+    return has_totals_in_join;
+}
+
 void ExpressionActions::executeOnTotals(Block & block) const
 {
    /// If there is `totals` in the subquery for JOIN, but we do not have totals, then take the block with the default values instead of `totals`.
    if (!block)
    {
-        bool has_totals_in_join = false;
-        for (const auto & action : actions)
-        {
-            if (action.join && action.join->hasTotals())
-            {
-                has_totals_in_join = true;
-                break;
-            }
-        }
-
-        if (has_totals_in_join)
+        if (hasTotalsInJoin())
        {
            for (const auto & name_and_type : input_columns)
            {
--- a/dbms/src/Interpreters/ExpressionActions.h
+++ b/dbms/src/Interpreters/ExpressionActions.h
@ -223,6 +223,9 @@ public:
    /// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns.
    void execute(Block & block, bool dry_run = false) const;

+    /// Check if joined subquery has totals.
+    bool hasTotalsInJoin() const;
+
    /** Execute the expression on the block of total values.
      * Almost the same as `execute`. The difference is only when JOIN is executed.
      */
--- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
@ -279,12 +279,14 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
    SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true);
    set->setHeader(res.in->getHeader());

+    res.in->readPrefix();
    while (Block block = res.in->read())
    {
        /// If the limits have been exceeded, give up and let the default subquery processing actions take place.
        if (!set->insertFromBlock(block))
            return;
    }
+    res.in->readSuffix();

    prepared_sets[set_key] = std::move(set);
 }
--- a/dbms/src/Interpreters/ExpressionAnalyzer.h
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.h
@ -249,7 +249,7 @@ private:
    void assertAggregation() const;

    /**
-      * Create Set from a subuquery or a table expression in the query. The created set is suitable for using the index.
+      * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index.
      * The set will not be created if its size hits the limit.
      */
    void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
--- a/dbms/src/Interpreters/IInterpreter.h
+++ b/dbms/src/Interpreters/IInterpreter.h
@ -2,6 +2,7 @@

 #include <DataStreams/BlockIO.h>

+#include <Processors/QueryPipeline.h>

 namespace DB
 {
@ -17,6 +18,10 @@ public:
      */
    virtual BlockIO execute() = 0;

+    virtual QueryPipeline executeWithProcessors() { throw Exception("executeWithProcessors not implemented", ErrorCodes::NOT_IMPLEMENTED); }
+
+    virtual bool canExecuteWithProcessors() const { return false; }
+
    virtual ~IInterpreter() {}
 };

--- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp
@ -265,6 +265,11 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S
    if (where_expression)
        select_query += " WHERE " + queryToString(where_expression);

+    auto use_processors = context.getSettingsRef().experimental_use_processors;
+    context.getSettingsRef().experimental_use_processors = false;
+
+    SCOPE_EXIT(context.getSettingsRef().experimental_use_processors = use_processors);
+
    BlockIO block_io = executeQuery(select_query, context, true);
    Block res = block_io.in->read();

--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@ -58,6 +58,26 @@
 #include <ext/map.h>
 #include <memory>

+#include <Processors/Sources/NullSource.h>
+#include <Processors/Sources/SourceFromInputStream.h>
+#include <Processors/Transforms/FilterTransform.h>
+#include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <Processors/Transforms/MergingAggregatedTransform.h>
+#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
+#include <Processors/Transforms/TotalsHavingTransform.h>
+#include <Processors/Transforms/PartialSortingTransform.h>
+#include <Processors/Transforms/LimitsCheckingTransform.h>
+#include <Processors/Transforms/MergeSortingTransform.h>
+#include <Processors/Transforms/MergingSortedTransform.h>
+#include <Processors/Transforms/DistinctTransform.h>
+#include <Processors/Transforms/LimitByTransform.h>
+#include <Processors/Transforms/ExtremesTransform.h>
+#include <Processors/Transforms/CreatingSetsTransform.h>
+#include <Processors/Transforms/RollupTransform.h>
+#include <Processors/Transforms/CubeTransform.h>
+#include <Processors/LimitTransform.h>
+

 namespace DB
 {
@ -356,6 +376,13 @@ BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams()
    return pipeline.streams;
 }

+QueryPipeline InterpreterSelectQuery::executeWithProcessors()
+{
+    QueryPipeline query_pipeline;
+    executeImpl(query_pipeline, input, options.only_analyze);
+    return query_pipeline;
+}
+
 InterpreterSelectQuery::AnalysisResult
 InterpreterSelectQuery::analyzeExpressions(QueryProcessingStage::Enum from_stage, bool dry_run, const FilterInfoPtr & filter_info)
 {
@ -555,7 +582,8 @@ InterpreterSelectQuery::analyzeExpressions(QueryProcessingStage::Enum from_stage
 }


-void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & prepared_input, bool dry_run)
+template <typename TPipeline>
+void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputStreamPtr & prepared_input, bool dry_run)
 {
    /** Streams of data. When the query is executed in parallel, we have several data streams.
     *  If there is no GROUP BY, then perform all operations before ORDER BY and LIMIT in parallel, then
@ -567,6 +595,8 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
     *  then perform the remaining operations with one resulting stream.
     */

+    constexpr bool pipeline_with_processors = std::is_same<TPipeline, QueryPipeline>::value;
+
    /// Now we will compose block streams that perform the necessary actions.
    auto & query = getSelectQuery();
    const Settings & settings = context.getSettingsRef();
@ -611,21 +641,42 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt

    if (dry_run)
    {
-        pipeline.streams.emplace_back(std::make_shared<NullBlockInputStream>(source_header));
+        if constexpr (pipeline_with_processors)
+            pipeline.init({std::make_shared<NullSource>(source_header)});
+        else
+            pipeline.streams.emplace_back(std::make_shared<NullBlockInputStream>(source_header));
+
        expressions = analyzeExpressions(QueryProcessingStage::FetchColumns, true, filter_info);

        if (storage && expressions.filter_info && expressions.prewhere_info)
            throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);

        if (expressions.prewhere_info)
-            pipeline.streams.back() = std::make_shared<FilterBlockInputStream>(
+        {
+            if constexpr (pipeline_with_processors)
+                pipeline.addSimpleTransform([&](const Block & header)
+                {
+                    return std::make_shared<FilterTransform>(
+                            header,
+                            expressions.prewhere_info->prewhere_actions,
+                            expressions.prewhere_info->prewhere_column_name,
+                            expressions.prewhere_info->remove_prewhere_column);
+                });
+            else
+                pipeline.streams.back() = std::make_shared<FilterBlockInputStream>(
                    pipeline.streams.back(), expressions.prewhere_info->prewhere_actions,
                    expressions.prewhere_info->prewhere_column_name, expressions.prewhere_info->remove_prewhere_column);
+        }
    }
    else
    {
        if (prepared_input)
-            pipeline.streams.push_back(prepared_input);
+        {
+            if constexpr (pipeline_with_processors)
+                pipeline.init({std::make_shared<SourceFromInputStream>(prepared_input)});
+            else
+                pipeline.streams.push_back(prepared_input);
+        }

        expressions = analyzeExpressions(from_stage, false, filter_info);

@ -662,25 +713,78 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
        {
            if (expressions.filter_info)
            {
-                pipeline.transform([&](auto & stream)
+                if constexpr (pipeline_with_processors)
                {
-                    stream = std::make_shared<FilterBlockInputStream>(
-                        stream,
-                        expressions.filter_info->actions,
-                        expressions.filter_info->column_name,
-                        expressions.filter_info->do_remove_column);
-                });
+                    pipeline.addSimpleTransform([&](const Block & block, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+                    {
+                        if (stream_type == QueryPipeline::StreamType::Totals)
+                            return nullptr;
+
+                        return std::make_shared<FilterTransform>(
+                            block,
+                            expressions.filter_info->actions,
+                            expressions.filter_info->column_name,
+                            expressions.filter_info->do_remove_column);
+                    });
+                }
+                else
+                {
+                    pipeline.transform([&](auto & stream)
+                    {
+                        stream = std::make_shared<FilterBlockInputStream>(
+                            stream,
+                            expressions.filter_info->actions,
+                            expressions.filter_info->column_name,
+                            expressions.filter_info->do_remove_column);
+                    });
+                }
            }

            if (expressions.hasJoin())
            {
+                Block header_before_join;
+
+                if constexpr (pipeline_with_processors)
+                {
+                    header_before_join = pipeline.getHeader();
+
+                    /// In case joined subquery has totals, and we don't, add default chunk to totals.
+                    bool default_totals = false;
+                    if (!pipeline.hasTotals())
+                    {
+                        pipeline.addDefaultTotals();
+                        default_totals = true;
+                    }
+
+                    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType type)
+                    {
+                        bool on_totals = type == QueryPipeline::StreamType::Totals;
+                        return std::make_shared<ExpressionTransform>(header, expressions.before_join, on_totals, default_totals);
+                    });
+                }
+                else
+                {
+                    header_before_join = pipeline.firstStream()->getHeader();
+                    /// Applies to all sources except stream_with_non_joined_data.
+                    for (auto & stream : pipeline.streams)
+                        stream = std::make_shared<ExpressionBlockInputStream>(stream, expressions.before_join);
+                }
+
                const auto & join = query.join()->table_join->as<ASTTableJoin &>();
                if (isRightOrFull(join.kind))
-                    pipeline.stream_with_non_joined_data = expressions.before_join->createStreamWithNonJoinedDataIfFullOrRightJoin(
-                        pipeline.firstStream()->getHeader(), settings.max_block_size);
+                {
+                    auto stream = expressions.before_join->createStreamWithNonJoinedDataIfFullOrRightJoin(
+                            header_before_join, settings.max_block_size);

-                for (auto & stream : pipeline.streams)   /// Applies to all sources except stream_with_non_joined_data.
-                    stream = std::make_shared<ExpressionBlockInputStream>(stream, expressions.before_join);
+                    if constexpr (pipeline_with_processors)
+                    {
+                        auto source = std::make_shared<SourceFromInputStream>(std::move(stream));
+                        pipeline.addDelayedStream(source);
+                    }
+                    else
+                        pipeline.stream_with_non_joined_data = std::move(stream);
+
+                }
            }

            if (expressions.has_where)
@ -810,13 +914,18 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
            if (need_second_distinct_pass
                || query.limitLength()
                || query.limitBy()
-                || pipeline.stream_with_non_joined_data)
+                || pipeline.hasDelayedStream())
            {
                need_merge_streams = true;
            }

            if (need_merge_streams)
-                executeUnion(pipeline);
+            {
+                if constexpr (pipeline_with_processors)
+                    pipeline.resize(1);
+                else
+                    executeUnion(pipeline);
+            }

            /** If there was more than one stream,
              * then DISTINCT needs to be performed once again after merging all streams.
@ -888,10 +997,13 @@ static UInt64 getLimitForSorting(const ASTSelectQuery & query, const Context & c
 }


+template <typename TPipeline>
 void InterpreterSelectQuery::executeFetchColumns(
-        QueryProcessingStage::Enum processing_stage, Pipeline & pipeline,
+        QueryProcessingStage::Enum processing_stage, TPipeline & pipeline,
        const PrewhereInfoPtr & prewhere_info, const Names & columns_to_remove_after_prewhere)
 {
+    constexpr bool pipeline_with_processors = std::is_same<TPipeline, QueryPipeline>::value;
+
    auto & query = getSelectQuery();
    const Settings & settings = context.getSettingsRef();

@ -1105,7 +1217,7 @@ void InterpreterSelectQuery::executeFetchColumns(
        throw Exception("Setting 'max_block_size' cannot be zero", ErrorCodes::PARAMETER_OUT_OF_BOUND);

    /// Initialize the initial data streams to which the query transforms are superimposed. Table or subquery or prepared input?
-    if (!pipeline.streams.empty())
+    if (pipeline.initialized())
    {
        /// Prepared input.
    }
@ -1127,7 +1239,11 @@ void InterpreterSelectQuery::executeFetchColumns(
                interpreter_subquery->ignoreWithTotals();
        }

-        pipeline.streams = interpreter_subquery->executeWithMultipleStreams();
+        if constexpr (pipeline_with_processors)
+            /// Just use pipeline from subquery.
+            pipeline = interpreter_subquery->executeWithProcessors();
+        else
+            pipeline.streams = interpreter_subquery->executeWithMultipleStreams();
    }
    else if (storage)
    {
@ -1146,27 +1262,23 @@ void InterpreterSelectQuery::executeFetchColumns(
        query_info.sets = query_analyzer->getPreparedSets();
        query_info.prewhere_info = prewhere_info;

-        pipeline.streams = storage->read(required_columns, query_info, context, processing_stage, max_block_size, max_streams);
+        auto streams = storage->read(required_columns, query_info, context, processing_stage, max_block_size, max_streams);

-        if (pipeline.streams.empty())
+        if (streams.empty())
        {
-            pipeline.streams = {std::make_shared<NullBlockInputStream>(storage->getSampleBlockForColumns(required_columns))};
+            streams = {std::make_shared<NullBlockInputStream>(storage->getSampleBlockForColumns(required_columns))};

            if (query_info.prewhere_info)
-                pipeline.transform([&](auto & stream)
-                {
-                    stream = std::make_shared<FilterBlockInputStream>(
-                        stream,
-                        prewhere_info->prewhere_actions,
-                        prewhere_info->prewhere_column_name,
-                        prewhere_info->remove_prewhere_column);
-                });
+                streams.back() = std::make_shared<FilterBlockInputStream>(
+                    streams.back(),
+                    prewhere_info->prewhere_actions,
+                    prewhere_info->prewhere_column_name,
+                    prewhere_info->remove_prewhere_column);
+
        }

-        pipeline.transform([&](auto & stream)
-        {
+        for (auto & stream : streams)
            stream->addTableLock(table_lock);
-        });

        /// Set the limits and quota for reading data, the speed and time of the query.
        {
@ -1194,15 +1306,52 @@ void InterpreterSelectQuery::executeFetchColumns(

            QuotaForIntervals & quota = context.getQuota();

-            pipeline.transform([&](auto & stream)
+            for (auto & stream : streams)
            {
                if (!options.ignore_limits)
                    stream->setLimits(limits);

                if (options.to_stage == QueryProcessingStage::Complete)
                    stream->setQuota(quota);
-            });
+            }
        }
+
+        if constexpr (pipeline_with_processors)
+        {
+            /// Unify streams. They must have same headers.
+            if (streams.size() > 1)
+            {
+                /// Unify streams in case they have different headers.
+                auto first_header = streams.at(0)->getHeader();
+                for (size_t i = 1; i < streams.size(); ++i)
+                {
+                    auto & stream = streams[i];
+                    auto header = stream->getHeader();
+                    auto mode = ConvertingBlockInputStream::MatchColumnsMode::Name;
+                    if (!blocksHaveEqualStructure(first_header, header))
+                        stream = std::make_shared<ConvertingBlockInputStream>(context, stream, first_header, mode);
+                }
+            }
+
+            Processors sources;
+            sources.reserve(streams.size());
+
+            for (auto & stream : streams)
+            {
+                bool force_add_agg_info = processing_stage == QueryProcessingStage::WithMergeableState;
+                auto source = std::make_shared<SourceFromInputStream>(stream, force_add_agg_info);
+
+                if (processing_stage == QueryProcessingStage::Complete)
+                    source->addTotalsPort();
+
+                sources.emplace_back(std::move(source));
+
+            }
+
+            pipeline.init(std::move(sources));
+        }
+        else
+            pipeline.streams = std::move(streams);
    }
    else
        throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR);
@ -1210,10 +1359,20 @@ void InterpreterSelectQuery::executeFetchColumns(
    /// Aliases in table declaration.
    if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions)
    {
-        pipeline.transform([&](auto & stream)
+        if constexpr (pipeline_with_processors)
        {
-            stream = std::make_shared<ExpressionBlockInputStream>(stream, alias_actions);
-        });
+            pipeline.addSimpleTransform([&](const Block & header)
+            {
+                return std::make_shared<ExpressionTransform>(header, alias_actions);
+            });
+        }
+        else
+        {
+            pipeline.transform([&](auto & stream)
+            {
+                stream = std::make_shared<ExpressionBlockInputStream>(stream, alias_actions);
+            });
+        }
    }
 }

@ -1226,6 +1385,13 @@ void InterpreterSelectQuery::executeWhere(Pipeline & pipeline, const ExpressionA
    });
 }

+void InterpreterSelectQuery::executeWhere(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool remove_fiter)
+{
+    pipeline.addSimpleTransform([&](const Block & block)
+    {
+        return std::make_shared<FilterTransform>(block, expression, getSelectQuery().where()->getColumnName(), remove_fiter);
+    });
+}

 void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final)
 {
@ -1294,6 +1460,76 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre
 }


+void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final)
+{
+    pipeline.addSimpleTransform([&](const Block & header)
+    {
+        return std::make_shared<ExpressionTransform>(header, expression);
+    });
+
+    Names key_names;
+    AggregateDescriptions aggregates;
+    query_analyzer->getAggregateInfo(key_names, aggregates);
+
+    Block header_before_aggregation = pipeline.getHeader();
+    ColumnNumbers keys;
+    for (const auto & name : key_names)
+        keys.push_back(header_before_aggregation.getPositionByName(name));
+    for (auto & descr : aggregates)
+        if (descr.arguments.empty())
+            for (const auto & name : descr.argument_names)
+                descr.arguments.push_back(header_before_aggregation.getPositionByName(name));
+
+    const Settings & settings = context.getSettingsRef();
+
+    /** Two-level aggregation is useful in two cases:
+      * 1. Parallel aggregation is done, and the results should be merged in parallel.
+      * 2. An aggregation is done with store of temporary data on the disk, and they need to be merged in a memory efficient way.
+      */
+    bool allow_to_use_two_level_group_by = pipeline.getNumMainStreams() > 1 || settings.max_bytes_before_external_group_by != 0;
+
+    Aggregator::Params params(header_before_aggregation, keys, aggregates,
+                              overflow_row, settings.max_rows_to_group_by, settings.group_by_overflow_mode,
+                              settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile,
+                              allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0),
+                              allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0),
+                              settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
+                              context.getTemporaryPath(), settings.max_threads);
+
+    auto transform_params = std::make_shared<AggregatingTransformParams>(params, final);
+
+    pipeline.dropTotalsIfHas();
+
+    /// If there are several sources, then we perform parallel aggregation
+    if (pipeline.getNumMainStreams() > 1)
+    {
+        pipeline.resize(max_streams);
+
+        auto many_data = std::make_shared<ManyAggregatedData>(max_streams);
+        auto merge_threads = settings.aggregation_memory_efficient_merge_threads
+                ? static_cast<size_t>(settings.aggregation_memory_efficient_merge_threads)
+                : static_cast<size_t>(settings.max_threads);
+
+        size_t counter = 0;
+        pipeline.addSimpleTransform([&](const Block & header)
+        {
+            return std::make_shared<AggregatingTransform>(header, transform_params, many_data, counter++, max_streams, merge_threads);
+        });
+
+        pipeline.resize(1);
+    }
+    else
+    {
+        pipeline.resize(1);
+
+        pipeline.addSimpleTransform([&](const Block & header)
+        {
+            return std::make_shared<AggregatingTransform>(header, transform_params);
+        });
+    }
+}
+
+
 void InterpreterSelectQuery::executeMergeAggregated(Pipeline & pipeline, bool overflow_row, bool final)
 {
    Names key_names;
@ -1345,6 +1581,67 @@ void InterpreterSelectQuery::executeMergeAggregated(Pipeline & pipeline, bool ov
    }
 }

+void InterpreterSelectQuery::executeMergeAggregated(QueryPipeline & pipeline, bool overflow_row, bool final)
+{
+    Names key_names;
+    AggregateDescriptions aggregates;
+    query_analyzer->getAggregateInfo(key_names, aggregates);
+
+    Block header_before_merge = pipeline.getHeader();
+
+    ColumnNumbers keys;
+    for (const auto & name : key_names)
+        keys.push_back(header_before_merge.getPositionByName(name));
+
+    /** There are two modes of distributed aggregation.
+      *
+      * 1. In different threads read from the remote servers blocks.
+      * Save all the blocks in the RAM. Merge blocks.
+      * If the aggregation is two-level - parallelize to the number of buckets.
+      *
+      * 2. In one thread, read blocks from different servers in order.
+      * RAM stores only one block from each server.
+      * If the aggregation is a two-level aggregation, we consistently merge the blocks of each next level.
+      *
+      * The second option consumes less memory (up to 256 times less)
+      *  in the case of two-level aggregation, which is used for large results after GROUP BY,
+      *  but it can work more slowly.
+      */
+
+    const Settings & settings = context.getSettingsRef();
+
+    Aggregator::Params params(header_before_merge, keys, aggregates, overflow_row, settings.max_threads);
+
+    auto transform_params = std::make_shared<AggregatingTransformParams>(params, final);
+
+    if (!settings.distributed_aggregation_memory_efficient)
+    {
+        /// We union several sources into one, parallelizing the work.
+        pipeline.resize(1);
+
+        /// Now merge the aggregated blocks
+        pipeline.addSimpleTransform([&](const Block & header)
+        {
+            return std::make_shared<MergingAggregatedTransform>(header, transform_params, settings.max_threads);
+        });
+    }
+    else
+    {
+        /// pipeline.resize(max_streams); - Seem we don't need it.
+        auto num_merge_threads = settings.aggregation_memory_efficient_merge_threads
+                                 ? static_cast<size_t>(settings.aggregation_memory_efficient_merge_threads)
+                                 : static_cast<size_t>(settings.max_threads);
+
+        auto pipe = createMergingAggregatedMemoryEfficientPipe(
+            pipeline.getHeader(),
+            transform_params,
+            pipeline.getNumStreams(),
+            num_merge_threads);
+
+        pipeline.addPipe(std::move(pipe));
+    }
+}
+

 void InterpreterSelectQuery::executeHaving(Pipeline & pipeline, const ExpressionActionsPtr & expression)
 {
@ -1354,6 +1651,18 @@ void InterpreterSelectQuery::executeHaving(Pipeline & pipeline, const Expression
    });
 }

+void InterpreterSelectQuery::executeHaving(QueryPipeline & pipeline, const ExpressionActionsPtr & expression)
+{
+    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+    {
+        if (stream_type == QueryPipeline::StreamType::Totals)
+            return nullptr;
+
+        /// TODO: do we need to save filter there?
+        return std::make_shared<FilterTransform>(header, expression, getSelectQuery().having()->getColumnName(), false);
+    });
+}
+

 void InterpreterSelectQuery::executeTotalsAndHaving(Pipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final)
 {
@ -1371,6 +1680,19 @@ void InterpreterSelectQuery::executeTotalsAndHaving(Pipeline & pipeline, bool ha
        final);
 }

+void InterpreterSelectQuery::executeTotalsAndHaving(QueryPipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final)
+{
+    const Settings & settings = context.getSettingsRef();
+
+    auto totals_having = std::make_shared<TotalsHavingTransform>(
+            pipeline.getHeader(), overflow_row, expression,
+            has_having ? getSelectQuery().having()->getColumnName() : "",
+            settings.totals_mode, settings.totals_auto_threshold, final);
+
+    pipeline.addTotalsHavingTransform(std::move(totals_having));
+}
+
+
 void InterpreterSelectQuery::executeRollupOrCube(Pipeline & pipeline, Modificator modificator)
 {
    executeUnion(pipeline);
@ -1401,6 +1723,44 @@ void InterpreterSelectQuery::executeRollupOrCube(Pipeline & pipeline, Modificato
        pipeline.firstStream() = std::make_shared<CubeBlockInputStream>(pipeline.firstStream(), params);
 }

+void InterpreterSelectQuery::executeRollupOrCube(QueryPipeline & pipeline, Modificator modificator)
+{
+    pipeline.resize(1);
+
+    Names key_names;
+    AggregateDescriptions aggregates;
+    query_analyzer->getAggregateInfo(key_names, aggregates);
+
+    Block header_before_transform = pipeline.getHeader();
+
+    ColumnNumbers keys;
+
+    for (const auto & name : key_names)
+        keys.push_back(header_before_transform.getPositionByName(name));
+
+    const Settings & settings = context.getSettingsRef();
+
+    Aggregator::Params params(header_before_transform, keys, aggregates,
+                              false, settings.max_rows_to_group_by, settings.group_by_overflow_mode,
+                              settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile,
+                              SettingUInt64(0), SettingUInt64(0),
+                              settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
+                              context.getTemporaryPath(), settings.max_threads);
+
+    auto transform_params = std::make_shared<AggregatingTransformParams>(params, true);
+
+    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+    {
+        if (stream_type == QueryPipeline::StreamType::Totals)
+            return nullptr;
+
+        if (modificator == Modificator::ROLLUP)
+            return std::make_shared<RollupTransform>(header, std::move(transform_params));
+        else
+            return std::make_shared<CubeTransform>(header, std::move(transform_params));
+    });
+}
+

 void InterpreterSelectQuery::executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expression)
 {
@ -1410,6 +1770,13 @@ void InterpreterSelectQuery::executeExpression(Pipeline & pipeline, const Expres
    });
 }

+void InterpreterSelectQuery::executeExpression(QueryPipeline & pipeline, const ExpressionActionsPtr & expression)
+{
+    pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr
+    {
+        return std::make_shared<ExpressionTransform>(header, expression);
+    });
+}

 static SortDescription getSortDescription(const ASTSelectQuery & query)
 {
@ -1462,6 +1829,42 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline)
        settings.max_bytes_before_external_sort, context.getTemporaryPath());
 }

+void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline)
+{
+    auto & query = getSelectQuery();
+    SortDescription order_descr = getSortDescription(query);
+    UInt64 limit = getLimitForSorting(query, context);
+
+    const Settings & settings = context.getSettingsRef();
+
+    /// TODO: Limits on sorting
+//    IBlockInputStream::LocalLimits limits;
+//    limits.mode = IBlockInputStream::LIMITS_TOTAL;
+//    limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode);
+
+
+    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type)
+    {
+        bool do_count_rows = stream_type == QueryPipeline::StreamType::Main;
+        return std::make_shared<PartialSortingTransform>(header, order_descr, limit, do_count_rows);
+    });
+
+    /// If there are several streams, we merge them into one
+    pipeline.resize(1);
+
+    /// Merge the sorted blocks.
+    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+    {
+        if (stream_type == QueryPipeline::StreamType::Totals)
+            return nullptr;
+
+        return std::make_shared<MergeSortingTransform>(
+                header, order_descr, settings.max_block_size, limit,
+                settings.max_bytes_before_remerge_sort,
+                settings.max_bytes_before_external_sort, context.getTemporaryPath());
+    });
+}
+

 void InterpreterSelectQuery::executeMergeSorted(Pipeline & pipeline)
 {
@ -1490,6 +1893,27 @@ void InterpreterSelectQuery::executeMergeSorted(Pipeline & pipeline)
    }
 }

+void InterpreterSelectQuery::executeMergeSorted(QueryPipeline & pipeline)
+{
+    auto & query = getSelectQuery();
+    SortDescription order_descr = getSortDescription(query);
+    UInt64 limit = getLimitForSorting(query, context);
+
+    const Settings & settings = context.getSettingsRef();
+
+    /// If there are several streams, then we merge them into one
+    if (pipeline.getNumStreams() > 1)
+    {
+        auto transform = std::make_shared<MergingSortedTransform>(
+            pipeline.getHeader(),
+            pipeline.getNumStreams(),
+            order_descr,
+            settings.max_block_size, limit);
+
+        pipeline.addPipe({ std::move(transform) });
+    }
+}
+

 void InterpreterSelectQuery::executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression)
 {
@ -1499,6 +1923,14 @@ void InterpreterSelectQuery::executeProjection(Pipeline & pipeline, const Expres
    });
 }

+void InterpreterSelectQuery::executeProjection(QueryPipeline & pipeline, const ExpressionActionsPtr & expression)
+{
+    pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr
+    {
+       return std::make_shared<ExpressionTransform>(header, expression);
+    });
+}
+

 void InterpreterSelectQuery::executeDistinct(Pipeline & pipeline, bool before_order, Names columns)
 {
@ -1522,6 +1954,32 @@ void InterpreterSelectQuery::executeDistinct(Pipeline & pipeline, bool before_or
    }
 }

+void InterpreterSelectQuery::executeDistinct(QueryPipeline & pipeline, bool before_order, Names columns)
+{
+    auto & query = getSelectQuery();
+    if (query.distinct)
+    {
+        const Settings & settings = context.getSettingsRef();
+
+        auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
+        UInt64 limit_for_distinct = 0;
+
+        /// If after this stage of DISTINCT ORDER BY is not executed, then you can get no more than limit_length + limit_offset of different rows.
+        if (!query.orderBy() || !before_order)
+            limit_for_distinct = limit_length + limit_offset;
+
+        SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
+
+        pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+        {
+            if (stream_type == QueryPipeline::StreamType::Totals)
+                return nullptr;
+
+            return std::make_shared<DistinctTransform>(header, limits, limit_for_distinct, columns);
+        });
+    }
+}
+

 void InterpreterSelectQuery::executeUnion(Pipeline & pipeline)
 {
@ -1558,6 +2016,24 @@ void InterpreterSelectQuery::executePreLimit(Pipeline & pipeline)
    }
 }

+/// Preliminary LIMIT - is used in every source, if there are several sources, before they are combined.
+void InterpreterSelectQuery::executePreLimit(QueryPipeline & pipeline)
+{
+    auto & query = getSelectQuery();
+    /// If there is LIMIT
+    if (query.limitLength())
+    {
+        auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
+        pipeline.addSimpleTransform([&, limit = limit_length + limit_offset](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+        {
+            if (stream_type == QueryPipeline::StreamType::Totals)
+                return nullptr;
+
+            return std::make_shared<LimitTransform>(header, limit, 0);
+        });
+    }
+}
+

 void InterpreterSelectQuery::executeLimitBy(Pipeline & pipeline)
 {
@ -1577,6 +2053,28 @@ void InterpreterSelectQuery::executeLimitBy(Pipeline & pipeline)
    });
 }

+void InterpreterSelectQuery::executeLimitBy(QueryPipeline & pipeline)
+{
+    auto & query = getSelectQuery();
+    if (!query.limitByLength() || !query.limitBy())
+        return;
+
+    Names columns;
+    for (const auto & elem : query.limitBy()->children)
+        columns.emplace_back(elem->getColumnName());
+
+    UInt64 length = getLimitUIntValue(query.limitByLength(), context);
+    UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context) : 0);
+
+    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+    {
+        if (stream_type == QueryPipeline::StreamType::Totals)
+            return nullptr;
+
+        return std::make_shared<LimitByTransform>(header, length, offset, columns);
+    });
+}
+

 // TODO: move to anonymous namespace
 bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query)
@ -1636,6 +2134,44 @@ void InterpreterSelectQuery::executeLimit(Pipeline & pipeline)
    }
 }

+void InterpreterSelectQuery::executeLimit(QueryPipeline & pipeline)
+{
+    auto & query = getSelectQuery();
+    /// If there is LIMIT
+    if (query.limitLength())
+    {
+        /** Rare case:
+          *  if there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels,
+          *  then when using LIMIT, you should read the data to the end, rather than cancel the query earlier,
+          *  because if you cancel the query, we will not get `totals` data from the remote server.
+          *
+          * Another case:
+          *  if there is WITH TOTALS and there is no ORDER BY, then read the data to the end,
+          *  otherwise TOTALS is counted according to incomplete data.
+          */
+        bool always_read_till_end = false;
+
+        if (query.group_by_with_totals && !query.orderBy())
+            always_read_till_end = true;
+
+        if (!query.group_by_with_totals && hasWithTotalsInAnySubqueryInFromClause(query))
+            always_read_till_end = true;
+
+        UInt64 limit_length;
+        UInt64 limit_offset;
+        std::tie(limit_length, limit_offset) = getLimitLengthAndOffset(query, context);
+
+        pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+        {
+            if (stream_type != QueryPipeline::StreamType::Main)
+                return nullptr;
+
+            return std::make_shared<LimitTransform>(
+                    header, limit_length, limit_offset, always_read_till_end);
+        });
+    }
+}
+

 void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline)
 {
@ -1648,6 +2184,15 @@ void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline)
    });
 }

+void InterpreterSelectQuery::executeExtremes(QueryPipeline & pipeline)
+{
+    if (!context.getSettingsRef().extremes)
+        return;
+
+    auto transform = std::make_shared<ExtremesTransform>(pipeline.getHeader());
+    pipeline.addExtremesTransform(std::move(transform));
+}
+

 void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline, SubqueriesForSets & subqueries_for_sets)
 {
@ -1656,6 +2201,19 @@ void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline
        pipeline.firstStream(), subqueries_for_sets, context);
 }

+void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, SubqueriesForSets & subqueries_for_sets)
+{
+    const Settings & settings = context.getSettingsRef();
+
+    auto creating_sets = std::make_shared<CreatingSetsTransform>(
+            pipeline.getHeader(), subqueries_for_sets,
+            SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode),
+            context);
+
+    pipeline.addCreatingSetsTransform(std::move(creating_sets));
+}
+
+
 void InterpreterSelectQuery::unifyStreams(Pipeline & pipeline)
 {
    if (pipeline.hasMoreThanOneStream())
--- a/dbms/src/Interpreters/InterpreterSelectQuery.h
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.h
@ -13,6 +13,7 @@
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/TableStructureLockHolder.h>

+#include <Processors/QueryPipeline.h>

 namespace Poco { class Logger; }

@ -69,6 +70,9 @@ public:
    /// Execute the query and return multuple streams for parallel processing.
    BlockInputStreams executeWithMultipleStreams();

+    QueryPipeline executeWithProcessors() override;
+    bool canExecuteWithProcessors() const override { return true; }
+
    Block getSampleBlock();

    void ignoreWithTotals();
@ -125,10 +129,13 @@ private:
        {
            return hasMoreThanOneStream() || union_stream;
        }
+
+        bool hasDelayedStream() const { return stream_with_non_joined_data != nullptr; }
+        bool initialized() const { return !streams.empty(); }
    };

-    void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & prepared_input, bool dry_run);
-
+    template <typename TPipeline>
+    void executeImpl(TPipeline & pipeline, const BlockInputStreamPtr & prepared_input, bool dry_run);

    struct AnalysisResult
    {
@ -177,7 +184,8 @@ private:
    /// dry_run - don't read from table, use empty header block instead.
    void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run);

-    void executeFetchColumns(QueryProcessingStage::Enum processing_stage, Pipeline & pipeline,
+    template <typename TPipeline>
+    void executeFetchColumns(QueryProcessingStage::Enum processing_stage, TPipeline & pipeline,
                             const PrewhereInfoPtr & prewhere_info, const Names & columns_to_remove_after_prewhere);

    void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool remove_filter);
@ -197,6 +205,22 @@ private:
    void executeExtremes(Pipeline & pipeline);
    void executeSubqueriesInSetsAndJoins(Pipeline & pipeline, std::unordered_map<String, SubqueryForSet> & subqueries_for_sets);

+    void executeWhere(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool remove_fiter);
+    void executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
+    void executeMergeAggregated(QueryPipeline & pipeline, bool overflow_row, bool final);
+    void executeTotalsAndHaving(QueryPipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final);
+    void executeHaving(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
+    void executeExpression(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
+    void executeOrder(QueryPipeline & pipeline);
+    void executeMergeSorted(QueryPipeline & pipeline);
+    void executePreLimit(QueryPipeline & pipeline);
+    void executeLimitBy(QueryPipeline & pipeline);
+    void executeLimit(QueryPipeline & pipeline);
+    void executeProjection(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
+    void executeDistinct(QueryPipeline & pipeline, bool before_order, Names columns);
+    void executeExtremes(QueryPipeline & pipeline);
+    void executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, std::unordered_map<String, SubqueryForSet> & subqueries_for_sets);
+
    /// If pipeline has several streams with different headers, add ConvertingBlockInputStream to first header.
    void unifyStreams(Pipeline & pipeline);

@ -208,6 +232,8 @@ private:

    void executeRollupOrCube(Pipeline & pipeline, Modificator modificator);

+    void executeRollupOrCube(QueryPipeline & pipeline, Modificator modificator);
+
    /** If there is a SETTINGS section in the SELECT query, then apply settings from it.
      *
      * Section SETTINGS - settings for a specific query.
--- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@ -12,6 +12,9 @@
 #include <Parsers/queryToString.h>
 #include <Parsers/ASTExpressionList.h>

+#include <Processors/Sources/NullSource.h>
+#include <Processors/QueryPipeline.h>
+

 namespace DB
 {
@ -101,16 +104,25 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
        for (size_t query_num = 0; query_num < num_selects; ++query_num)
            headers[query_num] = nested_interpreters[query_num]->getSampleBlock();

-        result_header = headers.front();
-        size_t num_columns = result_header.columns();
+        result_header = getCommonHeaderForUnion(headers);
+    }
+}

-        for (size_t query_num = 1; query_num < num_selects; ++query_num)
-            if (headers[query_num].columns() != num_columns)
-                throw Exception("Different number of columns in UNION ALL elements:\n"
-                    + result_header.dumpNames()
-                    + "\nand\n"
-                    + headers[query_num].dumpNames() + "\n",
-                    ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH);
+
+Block InterpreterSelectWithUnionQuery::getCommonHeaderForUnion(const Blocks & headers)
+{
+    size_t num_selects = headers.size();
+    Block common_header = headers.front();
+    size_t num_columns = common_header.columns();
+
+    for (size_t query_num = 1; query_num < num_selects; ++query_num)
+    {
+        if (headers[query_num].columns() != num_columns)
+            throw Exception("Different number of columns in UNION ALL elements:\n"
+                            + common_header.dumpNames()
+                            + "\nand\n"
+                            + headers[query_num].dumpNames() + "\n",
+                            ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH);

        for (size_t column_num = 0; column_num < num_columns; ++column_num)
        {
@ -119,10 +131,12 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
            for (size_t i = 0; i < num_selects; ++i)
                columns.push_back(&headers[i].getByPosition(column_num));

-            ColumnWithTypeAndName & result_elem = result_header.getByPosition(column_num);
+            ColumnWithTypeAndName & result_elem = common_header.getByPosition(column_num);
            result_elem = getLeastSuperColumn(columns);
        }
    }
+
+    return common_header;
 }


@ -197,6 +211,43 @@ BlockIO InterpreterSelectWithUnionQuery::execute()
 }


+QueryPipeline InterpreterSelectWithUnionQuery::executeWithProcessors()
+{
+    QueryPipeline main_pipeline;
+    std::vector<QueryPipeline> pipelines;
+    bool has_main_pipeline = false;
+
+    Blocks headers;
+    headers.reserve(nested_interpreters.size());
+
+    for (auto & interpreter : nested_interpreters)
+    {
+        if (!has_main_pipeline)
+        {
+            has_main_pipeline = true;
+            main_pipeline = interpreter->executeWithProcessors();
+            headers.emplace_back(main_pipeline.getHeader());
+        }
+        else
+        {
+            pipelines.emplace_back(interpreter->executeWithProcessors());
+            headers.emplace_back(pipelines.back().getHeader());
+        }
+    }
+
+    if (!has_main_pipeline)
+        main_pipeline.init({ std::make_shared<NullSource>(getSampleBlock()) });
+
+    if (!pipelines.empty())
+    {
+        auto common_header = getCommonHeaderForUnion(headers);
+        main_pipeline.unitePipelines(std::move(pipelines), common_header, context);
+    }
+
+    return main_pipeline;
+}
+
+
 void InterpreterSelectWithUnionQuery::ignoreWithTotals()
 {
    for (auto & interpreter : nested_interpreters)
--- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h
+++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h
@ -5,6 +5,7 @@
 #include <Interpreters/IInterpreter.h>
 #include <Interpreters/SelectQueryOptions.h>

+#include <Processors/QueryPipeline.h>

 namespace DB
 {
@ -30,6 +31,9 @@ public:
    /// Execute the query without union of streams.
    BlockInputStreams executeWithMultipleStreams();

+    QueryPipeline executeWithProcessors() override;
+    bool canExecuteWithProcessors() const override { return true; }
+
    Block getSampleBlock();

    static Block getSampleBlock(
@ -48,6 +52,8 @@ private:
    std::vector<std::unique_ptr<InterpreterSelectQuery>> nested_interpreters;

    Block result_header;
+
+    static Block getCommonHeaderForUnion(const Blocks & headers);
 };

 }
--- a/dbms/src/Interpreters/executeQuery.cpp
+++ b/dbms/src/Interpreters/executeQuery.cpp
@ -28,8 +28,11 @@
 #include <Interpreters/InterpreterSetQuery.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Interpreters/executeQuery.h>
-#include "DNSCacheUpdater.h"
+#include <Interpreters/DNSCacheUpdater.h>

+#include <Processors/Transforms/LimitsCheckingTransform.h>
+#include <Processors/Transforms/MaterializingTransform.h>
+#include <Processors/Formats/IOutputFormat.h>

 namespace DB
 {
@ -200,6 +203,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
    /// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion.
    String query(begin, query_end);
    BlockIO res;
+    QueryPipeline & pipeline = res.pipeline;

    try
    {
@ -235,7 +239,13 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
        context.initializeExternalTablesIfSet();

        auto interpreter = InterpreterFactory::get(ast, context, stage);
-        res = interpreter->execute();
+        bool use_processors = settings.experimental_use_processors && interpreter->canExecuteWithProcessors();
+
+        if (use_processors)
+            pipeline = interpreter->executeWithProcessors();
+        else
+            res = interpreter->execute();
+
        if (auto * insert_interpreter = typeid_cast<const InterpreterInsertQuery *>(&*interpreter))
            context.setInsertionTable(insert_interpreter->getDatabaseTable());

@ -245,36 +255,57 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
            if ((*process_list_entry)->isKilled())
                throw Exception("Query '" + (*process_list_entry)->getInfo().client_info.current_query_id + "' is killed in pending state",
                    ErrorCodes::QUERY_WAS_CANCELLED);
-            else
+            else if (!use_processors)
                (*process_list_entry)->setQueryStreams(res);
        }

        /// Hold element of process list till end of query execution.
        res.process_list_entry = process_list_entry;

-        if (res.in)
+        IBlockInputStream::LocalLimits limits;
+        limits.mode = IBlockInputStream::LIMITS_CURRENT;
+        limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode);
+
+        if (use_processors)
        {
-            res.in->setProgressCallback(context.getProgressCallback());
-            res.in->setProcessListElement(context.getProcessListElement());
+            pipeline.setProgressCallback(context.getProgressCallback());
+            pipeline.setProcessListElement(context.getProcessListElement());

            /// Limits on the result, the quota on the result, and also callback for progress.
            /// Limits apply only to the final result.
            if (stage == QueryProcessingStage::Complete)
            {
-                IBlockInputStream::LocalLimits limits;
-                limits.mode = IBlockInputStream::LIMITS_CURRENT;
-                limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode);
-
-                res.in->setLimits(limits);
-                res.in->setQuota(quota);
+                pipeline.resize(1);
+                pipeline.addSimpleTransform([&](const Block & header)
+                {
+                    auto transform = std::make_shared<LimitsCheckingTransform>(header, limits);
+                    transform->setQuota(quota);
+                    return transform;
+                });
            }
        }
-
-        if (res.out)
+        else
        {
-            if (auto stream = dynamic_cast<CountingBlockOutputStream *>(res.out.get()))
+            if (res.in)
            {
-                stream->setProcessListElement(context.getProcessListElement());
+                res.in->setProgressCallback(context.getProgressCallback());
+                res.in->setProcessListElement(context.getProcessListElement());
+
+                /// Limits on the result, the quota on the result, and also callback for progress.
+                /// Limits apply only to the final result.
+                if (stage == QueryProcessingStage::Complete)
+                {
+                    res.in->setLimits(limits);
+                    res.in->setQuota(quota);
+                }
+            }
+
+            if (res.out)
+            {
+                if (auto stream = dynamic_cast<CountingBlockOutputStream *>(res.out.get()))
+                {
+                    stream->setProcessListElement(context.getProcessListElement());
+                }
            }
        }

@ -304,7 +335,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
            }

            /// Also make possible for caller to log successful query finish and exception during execution.
-            res.finish_callback = [elem, &context, log_queries] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable
+            auto finish_callback = [elem, &context, log_queries] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable
            {
                QueryStatus * process_list_elem = context.getProcessListElement();

@ -373,7 +404,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                }
            };

-            res.exception_callback = [elem, &context, log_queries] () mutable
+            auto exception_callback = [elem, &context, log_queries] () mutable
            {
                context.getQuota().addError();

@ -416,6 +447,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                }
            };

+            res.finish_callback = std::move(finish_callback);
+            res.exception_callback = std::move(exception_callback);
+
            if (!internal && res.in)
            {
                std::stringstream log_str;
@ -498,6 +532,8 @@ void executeQuery(

    std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, may_have_tail);

+    auto & pipeline = streams.pipeline;
+
    try
    {
        if (streams.out)
@ -551,6 +587,63 @@ void executeQuery(

            copyData(*streams.in, *out);
        }
+
+        if (pipeline.initialized())
+        {
+            const ASTQueryWithOutput * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());
+
+            WriteBuffer * out_buf = &ostr;
+            std::optional<WriteBufferFromFile> out_file_buf;
+            if (ast_query_with_output && ast_query_with_output->out_file)
+            {
+                if (!allow_into_outfile)
+                    throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED);
+
+                const auto & out_file = typeid_cast<const ASTLiteral &>(*ast_query_with_output->out_file).value.safeGet<std::string>();
+                out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
+                out_buf = &*out_file_buf;
+            }
+
+            String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
+                                 ? *getIdentifierName(ast_query_with_output->format)
+                                 : context.getDefaultFormat();
+
+            if (ast_query_with_output && ast_query_with_output->settings_ast)
+                InterpreterSetQuery(ast_query_with_output->settings_ast, context).executeForCurrentContext();
+
+            pipeline.addSimpleTransform([](const Block & header)
+            {
+                return std::make_shared<MaterializingTransform>(header);
+            });
+
+            auto out = context.getOutputFormatProcessor(format_name, *out_buf, pipeline.getHeader());
+
+            /// Save previous progress callback if any. TODO Do it more conveniently.
+            auto previous_progress_callback = context.getProgressCallback();
+
+            /// NOTE Progress callback takes shared ownership of 'out'.
+            pipeline.setProgressCallback([out, previous_progress_callback] (const Progress & progress)
+            {
+                if (previous_progress_callback)
+                    previous_progress_callback(progress);
+                out->onProgress(progress);
+            });
+
+            if (set_content_type)
+                set_content_type(out->getContentType());
+
+            if (set_query_id)
+                set_query_id(context.getClientInfo().current_query_id);
+
+            pipeline.setOutput(std::move(out));
+
+            {
+                auto executor = pipeline.execute();
+                executor->execute(context.getSettingsRef().max_threads);
+            }
+
+            pipeline.finalize();
+        }
    }
    catch (...)
    {
--- a/dbms/src/Interpreters/executeQuery.h
+++ b/dbms/src/Interpreters/executeQuery.h
@ -3,6 +3,7 @@
 #include <Core/QueryProcessingStage.h>
 #include <DataStreams/BlockIO.h>

+#include <Processors/QueryPipeline.h>

 namespace DB
 {
@ -45,4 +46,13 @@ BlockIO executeQuery(
    bool may_have_embedded_data = false /// If insert query may have embedded data
    );

+
+QueryPipeline executeQueryWithProcessors(
+    const String & query,    /// Query text without INSERT data. The latter must be written to BlockIO::out.
+    Context & context,        /// DB, tables, data types, storage engines, functions, aggregate functions...
+    bool internal = false,    /// If true, this query is caused by another query and thus needn't be registered in the ProcessList.
+    QueryProcessingStage::Enum stage = QueryProcessingStage::Complete,    /// To which stage the query must be executed.
+    bool may_have_embedded_data = false /// If insert query may have embedded data
+);
+
 }
--- a/dbms/src/Interpreters/tests/CMakeLists.txt
+++ b/dbms/src/Interpreters/tests/CMakeLists.txt
@ -12,11 +12,11 @@ target_link_libraries (aggregate PRIVATE dbms)

 add_executable (hash_map hash_map.cpp)
 target_include_directories (hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
-target_link_libraries (hash_map PRIVATE dbms clickhouse_compression)
+target_link_libraries (hash_map PRIVATE dbms)

 add_executable (hash_map_lookup hash_map_lookup.cpp)
 target_include_directories (hash_map_lookup SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
-target_link_libraries (hash_map_lookup PRIVATE dbms clickhouse_compression)
+target_link_libraries (hash_map_lookup PRIVATE dbms)

 add_executable (hash_map3 hash_map3.cpp)
 target_include_directories(hash_map3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR})
@ -24,22 +24,22 @@ target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_

 add_executable (hash_map_string hash_map_string.cpp)
 target_include_directories (hash_map_string SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
-target_link_libraries (hash_map_string PRIVATE dbms clickhouse_compression)
+target_link_libraries (hash_map_string PRIVATE dbms)

 add_executable (hash_map_string_2 hash_map_string_2.cpp)
-target_link_libraries (hash_map_string_2 PRIVATE dbms clickhouse_compression)
+target_link_libraries (hash_map_string_2 PRIVATE dbms)

 add_executable (hash_map_string_3 hash_map_string_3.cpp)
 target_include_directories(hash_map_string_3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR})
-target_link_libraries (hash_map_string_3 PRIVATE dbms clickhouse_compression ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES})
+target_link_libraries (hash_map_string_3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES})

 add_executable (hash_map_string_small hash_map_string_small.cpp)
 target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
-target_link_libraries (hash_map_string_small PRIVATE dbms clickhouse_compression)
+target_link_libraries (hash_map_string_small PRIVATE dbms)

 add_executable (two_level_hash_map two_level_hash_map.cpp)
 target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
-target_link_libraries (two_level_hash_map PRIVATE dbms clickhouse_compression)
+target_link_libraries (two_level_hash_map PRIVATE dbms)

 add_executable (compiler_test compiler_test.cpp)
 target_link_libraries (compiler_test PRIVATE dbms)
--- a/dbms/src/Processors/CMakeLists.txt
+++ b/dbms/src/Processors/CMakeLists.txt
@ -0,0 +1,4 @@
+if (ENABLE_TESTS)
+    add_subdirectory (tests)
+endif ()
+
--- a/dbms/src/Processors/Chunk.cpp
+++ b/dbms/src/Processors/Chunk.cpp
@ -0,0 +1,155 @@
+#include <Processors/Chunk.h>
+#include <IO/WriteHelpers.h>
+#include <IO/Operators.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int POSITION_OUT_OF_BOUND;
+}
+
+Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_)
+{
+    checkNumRowsIsConsistent();
+}
+
+Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
+    : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_))
+{
+    checkNumRowsIsConsistent();
+}
+
+static Columns unmuteColumns(MutableColumns && mut_columns)
+{
+    Columns columns;
+    columns.reserve(mut_columns.size());
+    for (auto & col : mut_columns)
+        columns.emplace_back(std::move(col));
+
+    return columns;
+}
+
+Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_)
+    : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_)
+{
+}
+
+Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
+    : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_))
+{
+}
+
+Chunk Chunk::clone() const
+{
+    return Chunk(getColumns(), getNumRows());
+}
+
+void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
+{
+    columns = std::move(columns_);
+    num_rows = num_rows_;
+    checkNumRowsIsConsistent();
+}
+
+void Chunk::setColumns(MutableColumns columns_, UInt64 num_rows_)
+{
+    columns = unmuteColumns(std::move(columns_));
+    num_rows = num_rows_;
+    checkNumRowsIsConsistent();
+}
+
+void Chunk::checkNumRowsIsConsistent()
+{
+    for (auto & column : columns)
+        if (column->size() != num_rows)
+            throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " +
+                            toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR);
+}
+
+MutableColumns Chunk::mutateColumns()
+{
+    size_t num_columns = columns.size();
+    MutableColumns mut_columns(num_columns);
+    for (size_t i = 0; i < num_columns; ++i)
+        mut_columns[i] = (*std::move(columns[i])).mutate();
+
+    columns.clear();
+    num_rows = 0;
+
+    return mut_columns;
+}
+
+MutableColumns Chunk::cloneEmptyColumns() const
+{
+    size_t num_columns = columns.size();
+    MutableColumns mut_columns(num_columns);
+    for (size_t i = 0; i < num_columns; ++i)
+        mut_columns[i] = columns[i]->cloneEmpty();
+    return mut_columns;
+}
+
+Columns Chunk::detachColumns()
+{
+    num_rows = 0;
+    return std::move(columns);
+}
+
+void Chunk::erase(size_t position)
+{
+    if (columns.empty())
+        throw Exception("Chunk is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
+
+    if (position >= columns.size())
+        throw Exception("Position " + toString(position) + " out of bound in Chunk::erase(), max position = "
+                        + toString(columns.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
+
+    columns.erase(columns.begin() + position);
+}
+
+UInt64 Chunk::bytes() const
+{
+    UInt64 res = 0;
+    for (const auto & column : columns)
+        res += column->byteSize();
+
+    return res;
+}
+
+UInt64 Chunk::allocatedBytes() const
+{
+    UInt64 res = 0;
+    for (const auto & column : columns)
+        res += column->allocatedBytes();
+
+    return res;
+}
+
+std::string Chunk::dumpStructure() const
+{
+    WriteBufferFromOwnString out;
+    for (auto & column : columns)
+        out << ' ' << column->dumpStructure();
+
+    return out.str();
+}
+
+
+void ChunkMissingValues::setBit(size_t column_idx, size_t row_idx)
+{
+    RowsBitMask & mask = rows_mask_by_column_id[column_idx];
+    mask.resize(row_idx + 1);
+    mask[row_idx] = true;
+}
+
+const ChunkMissingValues::RowsBitMask & ChunkMissingValues::getDefaultsBitmask(size_t column_idx) const
+{
+    static RowsBitMask none;
+    auto it = rows_mask_by_column_id.find(column_idx);
+    if (it != rows_mask_by_column_id.end())
+        return it->second;
+    return none;
+}
+
+}
--- a/dbms/src/Processors/Chunk.h
+++ b/dbms/src/Processors/Chunk.h
@ -0,0 +1,117 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <unordered_map>
+
+namespace DB
+{
+
+class ChunkInfo
+{
+public:
+    virtual ~ChunkInfo() = default;
+    ChunkInfo() = default;
+};
+
+using ChunkInfoPtr = std::shared_ptr<const ChunkInfo>;
+
+class Chunk
+{
+public:
+    Chunk() = default;
+    Chunk(const Chunk & other) = delete;
+    Chunk(Chunk && other) noexcept
+        : columns(std::move(other.columns))
+        , num_rows(other.num_rows)
+        , chunk_info(std::move(other.chunk_info))
+    {
+        other.num_rows = 0;
+    }
+
+    Chunk(Columns columns_, UInt64 num_rows_);
+    Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
+    Chunk(MutableColumns columns_, UInt64 num_rows_);
+    Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
+
+    Chunk & operator=(const Chunk & other) = delete;
+    Chunk & operator=(Chunk && other) noexcept
+    {
+        columns = std::move(other.columns);
+        chunk_info = std::move(other.chunk_info);
+        num_rows = other.num_rows;
+        other.num_rows = 0;
+        return *this;
+    }
+
+    Chunk clone() const;
+
+    void swap(Chunk & other)
+    {
+        columns.swap(other.columns);
+        chunk_info.swap(other.chunk_info);
+        std::swap(num_rows, other.num_rows);
+    }
+
+    void clear()
+    {
+        num_rows = 0;
+        columns.clear();
+        chunk_info.reset();
+    }
+
+    const Columns & getColumns() const { return columns; }
+    void setColumns(Columns columns_, UInt64 num_rows_);
+    void setColumns(MutableColumns columns_, UInt64 num_rows_);
+    Columns detachColumns();
+    MutableColumns mutateColumns();
+    /** Get empty columns with the same types as in block. */
+    MutableColumns cloneEmptyColumns() const;
+
+    const ChunkInfoPtr & getChunkInfo() const { return chunk_info; }
+    void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); }
+
+    UInt64 getNumRows() const { return num_rows; }
+    UInt64 getNumColumns() const { return columns.size(); }
+    bool hasNoRows() const { return num_rows == 0; }
+    bool hasNoColumns() const { return columns.empty(); }
+    bool empty() const { return hasNoRows() && hasNoColumns(); }
+    operator bool() const { return !empty(); }
+
+    void erase(size_t position);
+
+    UInt64 bytes() const;
+    UInt64 allocatedBytes() const;
+
+    std::string dumpStructure() const;
+
+private:
+    Columns columns;
+    UInt64 num_rows = 0;
+    ChunkInfoPtr chunk_info;
+
+    void checkNumRowsIsConsistent();
+};
+
+using Chunks = std::vector<Chunk>;
+
+/// Block extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
+class ChunkMissingValues : public ChunkInfo
+{
+public:
+    using RowsBitMask = std::vector<bool>; /// a bit per row for a column
+
+    const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
+    void setBit(size_t column_idx, size_t row_idx);
+    bool empty() const { return rows_mask_by_column_id.empty(); }
+    size_t size() const { return rows_mask_by_column_id.size(); }
+    void clear() { rows_mask_by_column_id.clear(); }
+
+private:
+    using RowsMaskByColumnId = std::unordered_map<size_t, RowsBitMask>;
+
+    /// If rows_mask_by_column_id[column_id][row_id] is true related value in Block should be replaced with column default.
+    /// It could contain less columns and rows then related block.
+    RowsMaskByColumnId rows_mask_by_column_id;
+};
+
+}
--- a/dbms/src/Processors/ConcatProcessor.cpp
+++ b/dbms/src/Processors/ConcatProcessor.cpp
@ -0,0 +1,63 @@
+#include <Processors/ConcatProcessor.h>
+
+
+namespace DB
+{
+
+ConcatProcessor::Status ConcatProcessor::prepare()
+{
+    auto & output = outputs.front();
+
+    /// Check can output.
+
+    if (output.isFinished())
+    {
+        for (; current_input != inputs.end(); ++current_input)
+            current_input->close();
+
+        return Status::Finished;
+    }
+
+    if (!output.isNeeded())
+    {
+        if (current_input != inputs.end())
+            current_input->setNotNeeded();
+
+        return Status::PortFull;
+    }
+
+    if (!output.canPush())
+        return Status::PortFull;
+
+    /// Check can input.
+
+    if (current_input == inputs.end())
+        return Status::Finished;
+
+    if (current_input->isFinished())
+    {
+        ++current_input;
+        if (current_input == inputs.end())
+        {
+            output.finish();
+            return Status::Finished;
+        }
+    }
+
+    auto & input = *current_input;
+
+    input.setNeeded();
+
+    if (!input.hasData())
+        return Status::NeedData;
+
+    /// Move data.
+    output.push(input.pull());
+
+    /// Now, we pushed to output, and it must be full.
+    return Status::PortFull;
+}
+
+}
+
+
--- a/dbms/src/Processors/ConcatProcessor.h
+++ b/dbms/src/Processors/ConcatProcessor.h
@ -0,0 +1,35 @@
+#pragma once
+
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
+/** Has arbitary non zero number of inputs and one output.
+  * All of them have the same structure.
+  *
+  * Pulls all data from first input, then all data from second input, etc...
+  * Doesn't do any heavy calculations.
+  * Preserves an order of data.
+  */
+class ConcatProcessor : public IProcessor
+{
+public:
+    ConcatProcessor(const Block & header, size_t num_inputs)
+        : IProcessor(InputPorts(num_inputs, header), OutputPorts{header}), current_input(inputs.begin())
+    {
+    }
+
+    String getName() const override { return "Concat"; }
+
+    Status prepare() override;
+
+    OutputPort & getOutputPort() { return outputs.front(); }
+
+private:
+    InputPorts::iterator current_input;
+};
+
+}
+
--- a/dbms/src/Processors/Executors/ParallelPipelineExecutor.cpp
+++ b/dbms/src/Processors/Executors/ParallelPipelineExecutor.cpp
@ -0,0 +1,101 @@
+#include <Common/EventCounter.h>
+#include <Common/ThreadPool.h>
+#include <Processors/Executors/ParallelPipelineExecutor.h>
+#include <Processors/Executors/traverse.h>
+
+
+namespace DB
+{
+//
+//ParallelPipelineExecutor::ParallelPipelineExecutor(const std::vector<ProcessorPtr> & processors, ThreadPool & pool)
+//    : processors(processors), pool(pool)
+//{
+//}
+//
+//
+//ParallelPipelineExecutor::Status ParallelPipelineExecutor::prepare()
+//{
+//    current_processor = nullptr;
+//
+//    bool has_someone_to_wait = false;
+//
+//    for (auto & element : processors)
+//    {
+//        traverse(*element,
+//            [&] (IProcessor & processor)
+//            {
+//                {
+//                    std::lock_guard lock(mutex);
+//                    if (active_processors.count(&processor))
+//                    {
+//                        has_someone_to_wait = true;
+//                        return Status::Wait;
+//                    }
+//                }
+//
+//                Status status = processor.prepare();
+//
+//                if (status == Status::Wait)
+//                    has_someone_to_wait = true;
+//
+//                if (status == Status::Ready || status == Status::Async)
+//                {
+//                    current_processor = &processor;
+//                    current_status = status;
+//                }
+//
+//                return status;
+//            });
+//
+//        if (current_processor)
+//            break;
+//    }
+//
+//    if (current_processor)
+//        return Status::Async;
+//
+//    if (has_someone_to_wait)
+//        return Status::Wait;
+//
+//    for (auto & element : processors)
+//    {
+//        if (element->prepare() == Status::NeedData)
+//            throw Exception("Pipeline stuck: " + element->getName() + " processor needs input data but no one is going to generate it", ErrorCodes::LOGICAL_ERROR);
+//        if (element->prepare() == Status::PortFull)
+//            throw Exception("Pipeline stuck: " + element->getName() + " processor has data in output port but no one is going to consume it", ErrorCodes::LOGICAL_ERROR);
+//    }
+//
+//    return Status::Finished;
+//}
+//
+//
+//void ParallelPipelineExecutor::schedule(EventCounter & watch)
+//{
+//    if (!current_processor)
+//        throw Exception("Bad pipeline", ErrorCodes::LOGICAL_ERROR);
+//
+//    if (current_status == Status::Async)
+//    {
+//        current_processor->schedule(watch);
+//    }
+//    else
+//    {
+//        {
+//            std::lock_guard lock(mutex);
+//            active_processors.insert(current_processor);
+//        }
+//
+//        pool.schedule([processor = current_processor, &watch, this]
+//        {
+//            processor->work();
+//            {
+//                std::lock_guard lock(mutex);
+//                active_processors.erase(processor);
+//            }
+//            watch.notify();
+//        });
+//    }
+//}
+
+}
+
--- a/dbms/src/Processors/Executors/ParallelPipelineExecutor.h
+++ b/dbms/src/Processors/Executors/ParallelPipelineExecutor.h
@ -0,0 +1,41 @@
+#pragma once
+
+#include <vector>
+#include <set>
+#include <mutex>
+#include <Processors/IProcessor.h>
+
+template <typename>
+class ThreadPoolImpl;
+class ThreadFromGlobalPool;
+using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPool>;
+
+namespace DB
+{
+
+/** Wraps pipeline in a single processor.
+  * This processor has no inputs and outputs and just executes the pipeline,
+  *  performing all synchronous work within a threadpool.
+  */
+//class ParallelPipelineExecutor : public IProcessor
+//{
+//private:
+//    Processors processors;
+//    ThreadPool & pool;
+//
+//    std::set<IProcessor *> active_processors;
+//    std::mutex mutex;
+//
+//    IProcessor * current_processor = nullptr;
+//    Status current_status;
+//
+//public:
+//    ParallelPipelineExecutor(const Processors & processors, ThreadPool & pool);
+//
+//    String getName() const override { return "ParallelPipelineExecutor"; }
+//
+//    Status prepare() override;
+//    void schedule(EventCounter & watch) override;
+//};
+
+}
--- a/dbms/src/Processors/Executors/PipelineExecutor.cpp
+++ b/dbms/src/Processors/Executors/PipelineExecutor.cpp
@ -0,0 +1,653 @@
+#include <Processors/Executors/PipelineExecutor.h>
+#include <unordered_map>
+#include <queue>
+#include <IO/WriteBufferFromString.h>
+#include <Processors/printPipeline.h>
+#include <Common/EventCounter.h>
+#include <ext/scope_guard.h>
+#include <Common/CurrentThread.h>
+
+#include <boost/lockfree/queue.hpp>
+#include <Common/Stopwatch.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int TOO_MANY_ROWS_OR_BYTES;
+    extern const int QUOTA_EXPIRED;
+    extern const int QUERY_WAS_CANCELLED;
+}
+
+static bool checkCanAddAdditionalInfoToException(const DB::Exception & exception)
+{
+    /// Don't add additional info to limits and quota exceptions, and in case of kill query (to pass tests).
+    return exception.code() != ErrorCodes::TOO_MANY_ROWS_OR_BYTES
+           && exception.code() != ErrorCodes::QUOTA_EXPIRED
+           && exception.code() != ErrorCodes::QUERY_WAS_CANCELLED;
+}
+
+PipelineExecutor::PipelineExecutor(Processors & processors)
+    : processors(processors)
+    , cancelled(false)
+    , finished(false)
+    , num_processing_executors(0)
+    , expand_pipeline_task(nullptr)
+{
+    buildGraph();
+}
+
+bool PipelineExecutor::addEdges(UInt64 node)
+{
+    auto throwUnknownProcessor = [](const IProcessor * proc, const IProcessor * parent, bool from_input_port)
+    {
+        String msg = "Processor " + proc->getName() + " was found as " + (from_input_port ? "input" : "output")
+                     + " for processor " + parent->getName() + ", but not found in list of processors.";
+
+        throw Exception(msg, ErrorCodes::LOGICAL_ERROR);
+    };
+
+    const IProcessor * cur = graph[node].processor;
+
+    auto add_edge = [&](auto & from_port, const IProcessor * to_proc, Edges & edges)
+    {
+        auto it = processors_map.find(to_proc);
+        if (it == processors_map.end())
+            throwUnknownProcessor(to_proc, cur, true);
+
+        UInt64 proc_num = it->second;
+        Edge * edge_ptr = nullptr;
+
+        for (auto & edge : edges)
+            if (edge.to == proc_num)
+                edge_ptr = &edge;
+
+        if (!edge_ptr)
+        {
+            edge_ptr = &edges.emplace_back();
+            edge_ptr->to = proc_num;
+        }
+
+        from_port.setVersion(&edge_ptr->version);
+    };
+
+    bool was_edge_added = false;
+
+    auto & inputs = processors[node]->getInputs();
+    auto from_input = graph[node].backEdges.size();
+
+    if (from_input < inputs.size())
+    {
+        was_edge_added = true;
+
+        for (auto it = std::next(inputs.begin(), from_input); it != inputs.end(); ++it)
+        {
+            const IProcessor * proc = &it->getOutputPort().getProcessor();
+            add_edge(*it, proc, graph[node].backEdges);
+        }
+    }
+
+    auto & outputs = processors[node]->getOutputs();
+    auto from_output = graph[node].directEdges.size();
+
+    if (from_output < outputs.size())
+    {
+        was_edge_added = true;
+
+        for (auto it = std::next(outputs.begin(), from_output); it != outputs.end(); ++it)
+        {
+            const IProcessor * proc = &it->getInputPort().getProcessor();
+            add_edge(*it, proc, graph[node].directEdges);
+        }
+    }
+
+    return was_edge_added;
+}
+
+void PipelineExecutor::buildGraph()
+{
+    UInt64 num_processors = processors.size();
+
+    graph.reserve(num_processors);
+    for (UInt64 node = 0; node < num_processors; ++node)
+    {
+        IProcessor * proc = processors[node].get();
+        processors_map[proc] = node;
+        graph.emplace_back(proc, node);
+    }
+
+    for (UInt64 node = 0; node < num_processors; ++node)
+        addEdges(node);
+}
+
+void PipelineExecutor::addChildlessProcessorsToStack(Stack & stack)
+{
+    UInt64 num_processors = processors.size();
+    for (UInt64 proc = 0; proc < num_processors; ++proc)
+    {
+        if (graph[proc].directEdges.empty())
+        {
+            stack.push(proc);
+            graph[proc].status = ExecStatus::Preparing;
+        }
+    }
+}
+
+static void executeJob(IProcessor * processor)
+{
+    try
+    {
+        processor->work();
+    }
+    catch (Exception & exception)
+    {
+        if (checkCanAddAdditionalInfoToException(exception))
+            exception.addMessage("While executing " + processor->getName() + " ("
+                                 + toString(reinterpret_cast<std::uintptr_t>(processor)) + ") ");
+        throw;
+    }
+}
+
+void PipelineExecutor::addJob(ExecutionState * execution_state)
+{
+    auto job = [execution_state]()
+    {
+        try
+        {
+            Stopwatch watch;
+            executeJob(execution_state->processor);
+            execution_state->execution_time_ns += watch.elapsed();
+
+            ++execution_state->num_executed_jobs;
+        }
+        catch (...)
+        {
+            execution_state->exception = std::current_exception();
+        }
+    };
+
+    execution_state->job = std::move(job);
+}
+
+void PipelineExecutor::expandPipeline(Stack & stack, UInt64 pid)
+{
+    auto & cur_node = graph[pid];
+    auto new_processors = cur_node.processor->expandPipeline();
+
+    for (const auto & processor : new_processors)
+    {
+        if (processors_map.count(processor.get()))
+            throw Exception("Processor " + processor->getName() + " was already added to pipeline.",
+                    ErrorCodes::LOGICAL_ERROR);
+
+        processors_map[processor.get()] = graph.size();
+        graph.emplace_back(processor.get(), graph.size());
+    }
+
+    processors.insert(processors.end(), new_processors.begin(), new_processors.end());
+    UInt64 num_processors = processors.size();
+
+    for (UInt64 node = 0; node < num_processors; ++node)
+    {
+        if (addEdges(node))
+        {
+            if (graph[node].status == ExecStatus::Idle || graph[node].status == ExecStatus::New)
+            {
+                graph[node].status = ExecStatus::Preparing;
+                stack.push(node);
+            }
+        }
+    }
+}
+
+bool PipelineExecutor::tryAddProcessorToStackIfUpdated(Edge & edge, Stack & stack)
+{
+    /// In this method we have ownership on edge, but node can be concurrently accessed.
+
+    auto & node = graph[edge.to];
+
+    ExecStatus status = node.status.load();
+
+    /// Don't add processor if nothing was read from port.
+    if (status != ExecStatus::New && edge.version == edge.prev_version)
+        return false;
+
+    if (status == ExecStatus::Finished)
+        return false;
+
+    /// Signal that node need to be prepared.
+    node.need_to_be_prepared = true;
+    edge.prev_version = edge.version;
+
+    /// Try to get ownership for node.
+
+    /// Assume that current status is New or Idle. Otherwise, can't prepare node.
+    if (status != ExecStatus::New)
+        status = ExecStatus::Idle;
+
+    /// Statuses but New and Idle are not interesting because they own node.
+    /// Prepare will be called in owning thread before changing status.
+    while (!node.status.compare_exchange_weak(status, ExecStatus::Preparing))
+        if (!(status == ExecStatus::New || status == ExecStatus::Idle) || !node.need_to_be_prepared)
+            return false;
+
+    stack.push(edge.to);
+    return true;
+
+}
+
+bool PipelineExecutor::prepareProcessor(UInt64 pid, Stack & children, Stack & parents, size_t thread_number, bool async)
+{
+    /// In this method we have ownership on node.
+    auto & node = graph[pid];
+
+    {
+        /// Stopwatch watch;
+
+        /// Disable flag before prepare call. Otherwise, we can skip prepare request.
+        /// Prepare can be called more times than needed, but it's ok.
+        node.need_to_be_prepared = false;
+
+        auto status = node.processor->prepare();
+
+        /// node.execution_state->preparation_time_ns += watch.elapsed();
+        node.last_processor_status = status;
+    }
+
+    auto add_neighbours_to_prepare_queue = [&] ()
+    {
+        for (auto & edge : node.backEdges)
+            tryAddProcessorToStackIfUpdated(edge, parents);
+
+        for (auto & edge : node.directEdges)
+            tryAddProcessorToStackIfUpdated(edge, children);
+    };
+
+    auto try_release_ownership = [&] ()
+    {
+        /// This function can be called after expand pipeline, where node from outer scope is not longer valid.
+        auto & node_ = graph[pid];
+        ExecStatus expected = ExecStatus::Idle;
+        node_.status = ExecStatus::Idle;
+
+        if (node_.need_to_be_prepared)
+        {
+            while (!node_.status.compare_exchange_weak(expected, ExecStatus::Preparing))
+                if (!(expected == ExecStatus::Idle) || !node_.need_to_be_prepared)
+                    return;
+
+            children.push(pid);
+        }
+    };
+
+    switch (node.last_processor_status)
+    {
+        case IProcessor::Status::NeedData:
+        {
+            add_neighbours_to_prepare_queue();
+            try_release_ownership();
+
+            break;
+        }
+        case IProcessor::Status::PortFull:
+        {
+            add_neighbours_to_prepare_queue();
+            try_release_ownership();
+
+            break;
+        }
+        case IProcessor::Status::Finished:
+        {
+            add_neighbours_to_prepare_queue();
+            node.status = ExecStatus::Finished;
+            break;
+        }
+        case IProcessor::Status::Ready:
+        {
+            node.status = ExecStatus::Executing;
+            return true;
+        }
+        case IProcessor::Status::Async:
+        {
+            throw Exception("Async is temporary not supported.", ErrorCodes::LOGICAL_ERROR);
+
+//            node.status = ExecStatus::Executing;
+//            addAsyncJob(pid);
+//            break;
+        }
+        case IProcessor::Status::Wait:
+        {
+            if (!async)
+                throw Exception("Processor returned status Wait before Async.", ErrorCodes::LOGICAL_ERROR);
+            break;
+        }
+        case IProcessor::Status::ExpandPipeline:
+        {
+            executor_contexts[thread_number]->task_list.emplace_back(
+                node.execution_state.get(),
+                &parents
+            );
+
+            ExpandPipelineTask * desired = &executor_contexts[thread_number]->task_list.back();
+            ExpandPipelineTask * expected = nullptr;
+
+            while (!expand_pipeline_task.compare_exchange_strong(expected, desired))
+            {
+                doExpandPipeline(expected, true);
+                expected = nullptr;
+            }
+
+            doExpandPipeline(desired, true);
+
+            /// node is not longer valid after pipeline was expanded
+            graph[pid].need_to_be_prepared = true;
+            try_release_ownership();
+            break;
+        }
+    }
+
+    return false;
+}
+
+void PipelineExecutor::doExpandPipeline(ExpandPipelineTask * task, bool processing)
+{
+    std::unique_lock lock(task->mutex);
+
+    if (processing)
+        ++task->num_waiting_processing_threads;
+
+    task->condvar.wait(lock, [&]()
+    {
+        return task->num_waiting_processing_threads >= num_processing_executors || expand_pipeline_task != task;
+    });
+
+    /// After condvar.wait() task may point to trash. Can change it only if it is still in expand_pipeline_task.
+    if (expand_pipeline_task == task)
+    {
+        expandPipeline(*task->stack, task->node_to_expand->processors_id);
+
+        expand_pipeline_task = nullptr;
+
+        lock.unlock();
+        task->condvar.notify_all();
+    }
+}
+
+void PipelineExecutor::finish()
+{
+    {
+        std::lock_guard lock(task_queue_mutex);
+        finished = true;
+    }
+
+    task_queue_condvar.notify_all();
+}
+
+void PipelineExecutor::execute(size_t num_threads)
+{
+    try
+    {
+        executeImpl(num_threads);
+
+        /// Execution can be stopped because of exception. Check and rethrow if any.
+        for (auto & node : graph)
+            if (node.execution_state->exception)
+                std::rethrow_exception(node.execution_state->exception);
+    }
+    catch (Exception & exception)
+    {
+        if (checkCanAddAdditionalInfoToException(exception))
+            exception.addMessage("\nCurrent state:\n" + dumpPipeline());
+
+        throw;
+    }
+
+    if (cancelled)
+        return;
+
+    bool all_processors_finished = true;
+    for (auto & node : graph)
+        if (node.status != ExecStatus::Finished)
+            all_processors_finished = false;
+
+    if (!all_processors_finished)
+        throw Exception("Pipeline stuck. Current state:\n" + dumpPipeline(), ErrorCodes::LOGICAL_ERROR);
+}
+
+void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads)
+{
+    UInt64 total_time_ns = 0;
+    UInt64 execution_time_ns = 0;
+    UInt64 processing_time_ns = 0;
+    UInt64 wait_time_ns = 0;
+
+    Stopwatch total_time_watch;
+    ExecutionState * state = nullptr;
+
+    auto prepare_processor = [&](UInt64 pid, Stack & children, Stack & parents)
+    {
+        try
+        {
+            return prepareProcessor(pid, children, parents, thread_num, false);
+        }
+        catch (...)
+        {
+            graph[pid].execution_state->exception = std::current_exception();
+            finish();
+        }
+
+        return false;
+    };
+
+    using Queue = std::queue<ExecutionState *>;
+
+    auto prepare_all_processors = [&](Queue & queue, Stack & stack, Stack & children, Stack & parents)
+    {
+        while (!stack.empty() && !finished)
+        {
+            auto current_processor = stack.top();
+            stack.pop();
+
+            if (prepare_processor(current_processor, children, parents))
+                queue.push(graph[current_processor].execution_state.get());
+        }
+    };
+
+    while (!finished)
+    {
+
+        /// First, find any processor to execute.
+        /// Just travers graph and prepare any processor.
+        while (!finished)
+        {
+            std::unique_lock lock(task_queue_mutex);
+
+            if (!task_queue.empty())
+            {
+                state = task_queue.front();
+                task_queue.pop();
+                break;
+            }
+
+            ++num_waiting_threads;
+
+            if (num_waiting_threads == num_threads)
+            {
+                finished = true;
+                lock.unlock();
+                task_queue_condvar.notify_all();
+                break;
+            }
+
+            task_queue_condvar.wait(lock, [&]()
+            {
+                return finished || !task_queue.empty();
+            });
+
+            --num_waiting_threads;
+        }
+
+        if (finished)
+            break;
+
+        while (state)
+        {
+            if (finished)
+                break;
+
+            addJob(state);
+
+            {
+                Stopwatch execution_time_watch;
+                state->job();
+                execution_time_ns += execution_time_watch.elapsed();
+            }
+
+            if (state->exception)
+                finish();
+
+            if (finished)
+                break;
+
+            Stopwatch processing_time_watch;
+
+            /// Try to execute neighbour processor.
+            {
+                Stack children;
+                Stack parents;
+                Queue queue;
+
+                ++num_processing_executors;
+                while (auto task = expand_pipeline_task.load())
+                    doExpandPipeline(task, true);
+
+                /// Execute again if can.
+                if (!prepare_processor(state->processors_id, children, parents))
+                    state = nullptr;
+
+                /// Process all neighbours. Children will be on the top of stack, then parents.
+                prepare_all_processors(queue, children, children, parents);
+
+                if (!state && !queue.empty())
+                {
+                    state = queue.front();
+                    queue.pop();
+                }
+
+                prepare_all_processors(queue, parents, parents, parents);
+
+                if (!queue.empty())
+                {
+                    std::lock_guard lock(task_queue_mutex);
+
+                    while (!queue.empty() && !finished)
+                    {
+                        task_queue.push(queue.front());
+                        queue.pop();
+                    }
+
+                    task_queue_condvar.notify_all();
+                }
+
+                --num_processing_executors;
+                while (auto task = expand_pipeline_task.load())
+                    doExpandPipeline(task, false);
+            }
+
+            processing_time_ns += processing_time_watch.elapsed();
+        }
+    }
+
+    total_time_ns = total_time_watch.elapsed();
+    wait_time_ns = total_time_ns - execution_time_ns - processing_time_ns;
+
+    LOG_TRACE(log, "Thread finished."
+                     << " Total time: " << (total_time_ns / 1e9) << " sec."
+                     << " Execution time: " << (execution_time_ns / 1e9) << " sec."
+                     << " Processing time: " << (processing_time_ns / 1e9) << " sec."
+                     << " Wait time: " << (wait_time_ns / 1e9) << "sec.");
+}
+
+void PipelineExecutor::executeImpl(size_t num_threads)
+{
+    Stack stack;
+
+    executor_contexts.reserve(num_threads);
+    for (size_t i = 0; i < num_threads; ++i)
+        executor_contexts.emplace_back(std::make_unique<ExecutorContext>());
+
+    addChildlessProcessorsToStack(stack);
+
+    while (!stack.empty())
+    {
+        UInt64 proc = stack.top();
+        stack.pop();
+
+        if (prepareProcessor(proc, stack, stack, 0, false))
+        {
+            auto cur_state = graph[proc].execution_state.get();
+            task_queue.push(cur_state);
+        }
+    }
+
+    ThreadPool pool(num_threads);
+
+    SCOPE_EXIT(
+            finish();
+            pool.wait()
+    );
+
+    auto thread_group = CurrentThread::getGroup();
+
+    for (size_t i = 0; i < num_threads; ++i)
+    {
+        pool.schedule([this, thread_group, thread_num = i, num_threads]
+        {
+            /// ThreadStatus thread_status;
+
+            if (thread_group)
+                CurrentThread::attachTo(thread_group);
+
+            SCOPE_EXIT(
+                    if (thread_group)
+                        CurrentThread::detachQueryIfNotDetached();
+            );
+
+            executeSingleThread(thread_num, num_threads);
+        });
+    }
+
+    pool.wait();
+}
+
+String PipelineExecutor::dumpPipeline() const
+{
+    for (auto & node : graph)
+    {
+        if (node.execution_state)
+            node.processor->setDescription(
+                    "(" + std::to_string(node.execution_state->num_executed_jobs) + " jobs, execution time: "
+                    + std::to_string(node.execution_state->execution_time_ns / 1e9) + " sec., preparation time: "
+                    + std::to_string(node.execution_state->preparation_time_ns / 1e9) + " sec.)");
+    }
+
+    std::vector<IProcessor::Status> statuses;
+    std::vector<IProcessor *> proc_list;
+    statuses.reserve(graph.size());
+    proc_list.reserve(graph.size());
+
+    for (auto & proc : graph)
+    {
+        proc_list.emplace_back(proc.processor);
+        statuses.emplace_back(proc.last_processor_status);
+    }
+
+    WriteBufferFromOwnString out;
+    printPipeline(processors, statuses, out);
+    out.finish();
+
+    return out.str();
+}
+
+}
--- a/dbms/src/Processors/Executors/PipelineExecutor.h
+++ b/dbms/src/Processors/Executors/PipelineExecutor.h
@ -0,0 +1,195 @@
+#pragma once
+
+#include <queue>
+#include <stack>
+#include <Processors/IProcessor.h>
+#include <mutex>
+#include <Common/ThreadPool.h>
+#include <Common/EventCounter.h>
+#include <common/logger_useful.h>
+
+#include <boost/lockfree/stack.hpp>
+
+namespace DB
+{
+
+
+/// Executes query pipeline.
+class PipelineExecutor
+{
+public:
+    /// Get pipeline as a set of processors.
+    /// Processors should represent full graph. All ports must be connected, all connected nodes are mentioned in set.
+    /// Executor doesn't own processors, just stores reference.
+    /// During pipeline execution new processors can appear. They will be added to existing set.
+    ///
+    /// Explicit graph representation is built in constructor. Throws if graph is not correct.
+    explicit PipelineExecutor(Processors & processors);
+
+    /// Execute pipeline in multiple threads. Must be called once.
+    /// In case of exception during execution throws any occurred.
+    void execute(size_t num_threads);
+
+    String getName() const { return "PipelineExecutor"; }
+
+    const Processors & getProcessors() const { return processors; }
+
+    /// Cancel execution. May be called from another thread.
+    void cancel()
+    {
+        cancelled = true;
+        finish();
+    }
+
+private:
+    Processors & processors;
+
+    struct Edge
+    {
+        UInt64 to = std::numeric_limits<UInt64>::max();
+
+        /// Edge version is increased when port's state is changed (e.g. when data is pushed). See Port.h for details.
+        /// To compare version with prev_version we can decide if neighbour processor need to be prepared.
+        UInt64 version = 0;
+        UInt64 prev_version = 0;
+    };
+
+    /// Use std::list because new ports can be added to processor during execution.
+    using Edges = std::list<Edge>;
+
+    /// Status for processor.
+    /// Can be owning or not. Owning means that executor who set this status can change node's data and nobody else can.
+    enum class ExecStatus
+    {
+        New,  /// prepare wasn't called yet. Initial state. Non-owning.
+        Idle,  /// prepare returned NeedData or PortFull. Non-owning.
+        Preparing,  /// some executor is preparing processor, or processor is in task_queue. Owning.
+        Executing,  /// prepare returned Ready and task is executing. Owning.
+        Finished,  /// prepare returned Finished. Non-owning.
+        Async  /// prepare returned Async. Owning.
+    };
+
+    /// Small structure with context of executing job.
+    struct ExecutionState
+    {
+        std::exception_ptr exception;
+        std::function<void()> job;
+
+        IProcessor * processor;
+        UInt64 processors_id;
+
+        /// Counters for profiling.
+        size_t num_executed_jobs = 0;
+        UInt64 execution_time_ns = 0;
+        UInt64 preparation_time_ns = 0;
+    };
+
+    struct Node
+    {
+        IProcessor * processor = nullptr;
+        Edges directEdges;
+        Edges backEdges;
+
+        std::atomic<ExecStatus> status;
+        /// This flag can be set by any executor.
+        /// When enabled, any executor can try to atomically set Preparing state to status.
+        std::atomic_bool need_to_be_prepared;
+        /// Last state for profiling.
+        IProcessor::Status last_processor_status = IProcessor::Status::NeedData;
+
+        std::unique_ptr<ExecutionState> execution_state;
+
+        Node(IProcessor * processor_, UInt64 processor_id)
+            : processor(processor_), status(ExecStatus::New), need_to_be_prepared(false)
+        {
+            execution_state = std::make_unique<ExecutionState>();
+            execution_state->processor = processor;
+            execution_state->processors_id = processor_id;
+        }
+
+        Node(Node && other) noexcept
+            : processor(other.processor), status(other.status.load())
+            , need_to_be_prepared(other.need_to_be_prepared.load()), execution_state(std::move(other.execution_state))
+        {
+        }
+    };
+
+    using Nodes = std::vector<Node>;
+
+    Nodes graph;
+
+    using Stack = std::stack<UInt64>;
+
+    using TaskQueue = std::queue<ExecutionState *>;
+
+    /// Queue with pointers to tasks. Each thread will concurrently read from it until finished flag is set.
+    /// Stores processors need to be prepared. Preparing status is already set for them.
+    TaskQueue task_queue;
+    std::mutex task_queue_mutex;
+    std::condition_variable task_queue_condvar;
+
+    std::atomic_bool cancelled;
+    std::atomic_bool finished;
+
+    Poco::Logger * log = &Poco::Logger::get("PipelineExecutor");
+
+    /// Num threads waiting condvar. Last thread finish execution if task_queue is empty.
+    size_t num_waiting_threads = 0;
+
+    /// Things to stop execution to expand pipeline.
+    struct ExpandPipelineTask
+    {
+        ExecutionState * node_to_expand;
+        Stack * stack;
+        size_t num_waiting_processing_threads = 0;
+        std::mutex mutex;
+        std::condition_variable condvar;
+
+        ExpandPipelineTask(ExecutionState * node_to_expand_, Stack * stack_)
+            : node_to_expand(node_to_expand_), stack(stack_) {}
+    };
+
+    std::atomic<size_t> num_processing_executors;
+    std::atomic<ExpandPipelineTask *> expand_pipeline_task;
+
+    /// Context for each thread.
+    struct ExecutorContext
+    {
+        /// Will store context for all expand pipeline tasks (it's easy and we don't expect many).
+        /// This can be solved by using atomic shard ptr.
+        std::list<ExpandPipelineTask> task_list;
+    };
+
+    std::vector<std::unique_ptr<ExecutorContext>> executor_contexts;
+
+    /// Processor ptr -> node number
+    using ProcessorsMap = std::unordered_map<const IProcessor *, UInt64>;
+    ProcessorsMap processors_map;
+
+    /// Graph related methods.
+    bool addEdges(UInt64 node);
+    void buildGraph();
+    void expandPipeline(Stack & stack, UInt64 pid);
+
+    /// Pipeline execution related methods.
+    void addChildlessProcessorsToStack(Stack & stack);
+    bool tryAddProcessorToStackIfUpdated(Edge & edge, Stack & stack);
+    static void addJob(ExecutionState * execution_state);
+    // TODO: void addAsyncJob(UInt64 pid);
+
+    /// Prepare processor with pid number.
+    /// Check parents and children of current processor and push them to stacks if they also need to be prepared.
+    /// If processor wants to be expanded, ExpandPipelineTask from thread_number's execution context will be used.
+    bool prepareProcessor(size_t pid, Stack & children, Stack & parents, size_t thread_number, bool async);
+    void doExpandPipeline(ExpandPipelineTask * task, bool processing);
+
+    void executeImpl(size_t num_threads);
+    void executeSingleThread(size_t thread_num, size_t num_threads);
+    void finish();
+
+    String dumpPipeline() const;
+};
+
+using PipelineExecutorPtr = std::shared_ptr<PipelineExecutor>;
+
+}
--- a/dbms/src/Processors/Executors/SequentialPipelineExecutor.cpp
+++ b/dbms/src/Processors/Executors/SequentialPipelineExecutor.cpp
@ -0,0 +1,79 @@
+#include <Processors/Executors/SequentialPipelineExecutor.h>
+#include <Processors/Executors/traverse.h>
+
+
+namespace DB
+{
+
+//SequentialPipelineExecutor::SequentialPipelineExecutor(const Processors & processors)
+//    : processors(processors)
+//{
+//}
+//
+//
+//SequentialPipelineExecutor::Status SequentialPipelineExecutor::prepare()
+//{
+//    current_processor = nullptr;
+//
+//    bool has_someone_to_wait = false;
+//    Status found_status = Status::Finished;
+//
+//    for (auto & element : processors)
+//    {
+//        traverse(*element,
+//            [&] (IProcessor & processor)
+//            {
+//                Status status = processor.prepare();
+//
+//                if (status == Status::Wait)
+//                    has_someone_to_wait = true;
+//
+//                if (status == Status::Ready || status == Status::Async)
+//                {
+//                    current_processor = &processor;
+//                    found_status = status;
+//                }
+//
+//                return status;
+//            });
+//
+//        if (current_processor)
+//            break;
+//    }
+//
+//    if (current_processor)
+//        return found_status;
+//    if (has_someone_to_wait)
+//        return Status::Wait;
+//
+//    for (auto & element : processors)
+//    {
+//        if (element->prepare() == Status::NeedData)
+//            throw Exception("Pipeline stuck: " + element->getName() + " processor needs input data but no one is going to generate it", ErrorCodes::LOGICAL_ERROR);
+//        if (element->prepare() == Status::PortFull)
+//            throw Exception("Pipeline stuck: " + element->getName() + " processor has data in output port but no one is going to consume it", ErrorCodes::LOGICAL_ERROR);
+//    }
+//
+//    return Status::Finished;
+//}
+//
+//
+//void SequentialPipelineExecutor::work()
+//{
+//    if (!current_processor)
+//        throw Exception("Bad pipeline", ErrorCodes::LOGICAL_ERROR);
+//
+//    current_processor->work();
+//}
+//
+//
+//void SequentialPipelineExecutor::schedule(EventCounter & watch)
+//{
+//    if (!current_processor)
+//        throw Exception("Bad pipeline", ErrorCodes::LOGICAL_ERROR);
+//
+//    current_processor->schedule(watch);
+//}
+
+}
+
--- a/dbms/src/Processors/Executors/SequentialPipelineExecutor.h
+++ b/dbms/src/Processors/Executors/SequentialPipelineExecutor.h
@ -0,0 +1,31 @@
+#pragma once
+
+#include <vector>
+
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
+/** Wraps pipeline in a single processor.
+  * This processor has no inputs and outputs and just executes the pipeline,
+  *  performing all synchronous work from the current thread.
+  */
+//class SequentialPipelineExecutor : public IProcessor
+//{
+//private:
+//    Processors processors;
+//    IProcessor * current_processor = nullptr;
+//
+//public:
+//    SequentialPipelineExecutor(const Processors & processors);
+//
+//    String getName() const override { return "SequentialPipelineExecutor"; }
+//
+//    Status prepare() override;
+//    void work() override;
+//    void schedule(EventCounter & watch) override;
+//};
+
+}
--- a/dbms/src/Processors/Executors/traverse.h
+++ b/dbms/src/Processors/Executors/traverse.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
+/// Look for first Ready or Async processor by depth-first search in needed input ports and full output ports.
+/// NOTE: Pipeline must not have cycles.
+//template <typename Visit>
+//void traverse(IProcessor & processor, Visit && visit)
+//{
+//    IProcessor::Status status = visit(processor);
+//
+//    if (status == IProcessor::Status::Ready || status == IProcessor::Status::Async)
+//        return;
+//
+//    if (status == IProcessor::Status::NeedData)
+//        for (auto & input : processor.getInputs())
+//            if (input.isNeeded() && !input.hasData())
+//                traverse(input.getOutputPort().getProcessor(), std::forward<Visit>(visit));
+//
+//    if (status == IProcessor::Status::PortFull)
+//        for (auto & output : processor.getOutputs())
+//            if (output.hasData())
+//                traverse(output.getInputPort().getProcessor(), std::forward<Visit>(visit));
+//}
+
+}
--- a/dbms/src/Processors/ForkProcessor.cpp
+++ b/dbms/src/Processors/ForkProcessor.cpp
@ -0,0 +1,81 @@
+#include <Processors/ForkProcessor.h>
+
+
+namespace DB
+{
+
+ForkProcessor::Status ForkProcessor::prepare()
+{
+    auto & input = inputs.front();
+
+    /// Check can output.
+
+    bool all_finished = true;
+    bool all_can_push = true;
+    size_t num_active_outputs = 0;
+
+    for (const auto & output : outputs)
+    {
+        if (!output.isFinished())
+        {
+            all_finished = false;
+            ++num_active_outputs;
+
+            /// The order is important.
+            if (!output.canPush())
+                all_can_push = false;
+        }
+    }
+
+    if (all_finished)
+    {
+        input.close();
+        return Status::Finished;
+    }
+
+    if (!all_can_push)
+    {
+        input.setNotNeeded();
+        return Status::PortFull;
+    }
+
+    /// Check can input.
+
+    if (input.isFinished())
+    {
+        for (auto & output : outputs)
+            output.finish();
+
+        return Status::Finished;
+    }
+
+    input.setNeeded();
+
+    if (!input.hasData())
+        return Status::NeedData;
+
+    /// Move data.
+
+    auto data = input.pull();
+    size_t num_processed_outputs = 0;
+
+    for (auto & output : outputs)
+    {
+        if (!output.isFinished())  /// Skip finished outputs.
+        {
+            ++num_processed_outputs;
+            if (num_processed_outputs == num_active_outputs)
+                output.push(std::move(data));  /// Can push because no full or unneeded outputs.
+            else
+                output.push(data.clone());
+        }
+    }
+
+    /// Now, we pulled from input. It must be empty.
+    return Status::NeedData;
+}
+
+}
+
+
+
--- a/dbms/src/Processors/ForkProcessor.h
+++ b/dbms/src/Processors/ForkProcessor.h
@ -0,0 +1,35 @@
+#pragma once
+
+#include <Processors/IProcessor.h>
+
+
+namespace DB
+{
+
+/** Has one input and arbitrary non zero number of outputs.
+  * All of them have the same structure.
+  *
+  * Pulls data input and copies it to every output.
+  * You may have heard about it under the name 'tee'.
+  *
+  * Doesn't do any heavy calculations.
+  * Preserves an order of data.
+  */
+class ForkProcessor : public IProcessor
+{
+public:
+    ForkProcessor(const Block & header, size_t num_outputs)
+        : IProcessor(InputPorts{header}, OutputPorts(num_outputs, header))
+    {
+    }
+
+    String getName() const override { return "Fork"; }
+
+    Status prepare() override;
+
+    InputPort & getInputPort() { return inputs.front(); }
+};
+
+}
+
+
--- a/dbms/src/Processors/Formats/IInputFormat.h
+++ b/dbms/src/Processors/Formats/IInputFormat.h
@ -0,0 +1,32 @@
+#pragma once
+
+#include <Processors/ISource.h>
+
+
+namespace DB
+{
+
+class ReadBuffer;
+
+/** Input format is a source, that reads data from ReadBuffer.
+  */
+class IInputFormat : public ISource
+{
+protected:
+
+    /// Skip GCC warning: ‘maybe_unused’ attribute ignored
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wattributes"
+
+    ReadBuffer & in [[maybe_unused]];
+
+#pragma GCC diagnostic pop
+
+public:
+    IInputFormat(Block header, ReadBuffer & in)
+        : ISource(std::move(header)), in(in)
+    {
+    }
+};
+
+}
--- a/dbms/src/Processors/Formats/IOutputFormat.cpp
+++ b/dbms/src/Processors/Formats/IOutputFormat.cpp
@ -0,0 +1,78 @@
+#include <Processors/Formats/IOutputFormat.h>
+#include <IO/WriteBuffer.h>
+
+
+namespace DB
+{
+
+IOutputFormat::IOutputFormat(const Block & header, WriteBuffer & out)
+    : IProcessor({header, header, header}, {}), out(out)
+{
+}
+
+IOutputFormat::Status IOutputFormat::prepare()
+{
+    if (has_input)
+        return Status::Ready;
+
+    for (auto kind : {Main, Totals, Extremes})
+    {
+        auto & input = getPort(kind);
+
+        if (kind != Main && !input.isConnected())
+            continue;
+
+        if (input.isFinished())
+            continue;
+
+        input.setNeeded();
+
+        if (!input.hasData())
+            return Status::NeedData;
+
+        current_chunk = input.pull();
+        current_block_kind = kind;
+        has_input = true;
+        return Status::Ready;
+    }
+
+    finished = true;
+
+    if (!finalized)
+        return Status::Ready;
+
+    return Status::Finished;
+}
+
+void IOutputFormat::work()
+{
+    if (finished && !finalized)
+    {
+        finalize();
+        finalized = true;
+        return;
+    }
+
+    switch (current_block_kind)
+    {
+        case Main:
+            consume(std::move(current_chunk));
+            break;
+        case Totals:
+            consumeTotals(std::move(current_chunk));
+            break;
+        case Extremes:
+            consumeExtremes(std::move(current_chunk));
+            break;
+    }
+
+    has_input = false;
+}
+
+void IOutputFormat::flush()
+{
+    out.next();
+}
+
+}
+
--- a/dbms/src/Processors/Formats/IOutputFormat.h
+++ b/dbms/src/Processors/Formats/IOutputFormat.h
@ -0,0 +1,63 @@
+#pragma once
+
+#include <string>
+#include <Processors/IProcessor.h>
+#include <IO/Progress.h>
+
+
+namespace DB
+{
+
+class WriteBuffer;
+
+/** Output format have three inputs and no outputs. It writes data from WriteBuffer.
+  *
+  * First input is for main resultset, second is for "totals" and third is for "extremes".
+  * It's not necessarily to connect "totals" or "extremes" ports (they may remain dangling).
+  *
+  * Data from input ports are pulled in order: first, from main input, then totals, then extremes.
+  *
+  * By default, data for "totals" and "extremes" is ignored.
+  */
+class IOutputFormat : public IProcessor
+{
+public:
+    enum PortKind { Main = 0, Totals = 1, Extremes = 2 };
+
+protected:
+    WriteBuffer & out;
+
+    Chunk current_chunk;
+    PortKind current_block_kind = PortKind::Main;
+    bool has_input = false;
+    bool finished = false;
+    bool finalized = false;
+
+    virtual void consume(Chunk) = 0;
+    virtual void consumeTotals(Chunk) {}
+    virtual void consumeExtremes(Chunk) {}
+    virtual void finalize() {}
+
+public:
+    IOutputFormat(const Block & header, WriteBuffer & out);
+
+    Status prepare() override;
+    void work() override;
+
+    /// Flush output buffers if any.
+    virtual void flush();
+
+    /// Value for rows_before_limit_at_least field.
+    virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {}
+
+    /// Notify about progress. Method could be called from different threads.
+    /// Passed value are delta, that must be summarized.
+    virtual void onProgress(const Progress & /*progress*/) {}
+
+    /// Content-Type to set when sending HTTP response.
+    virtual std::string getContentType() const { return "text/plain; charset=UTF-8"; }
+
+    InputPort & getPort(PortKind kind) { return *std::next(inputs.begin(), kind); }
+};
+}
+
--- a/dbms/src/Processors/Formats/IRowInputFormat.cpp
+++ b/dbms/src/Processors/Formats/IRowInputFormat.cpp
@ -0,0 +1,151 @@
+#include <Processors/Formats/IRowInputFormat.h>
+#include <IO/WriteHelpers.h>    // toString
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
+    extern const int CANNOT_PARSE_QUOTED_STRING;
+    extern const int CANNOT_PARSE_DATE;
+    extern const int CANNOT_PARSE_DATETIME;
+    extern const int CANNOT_READ_ARRAY_FROM_TEXT;
+    extern const int CANNOT_PARSE_NUMBER;
+    extern const int CANNOT_PARSE_UUID;
+    extern const int TOO_LARGE_STRING_SIZE;
+    extern const int INCORRECT_NUMBER_OF_COLUMNS;
+}
+
+
+static bool isParseError(int code)
+{
+    return code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED
+        || code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING
+        || code == ErrorCodes::CANNOT_PARSE_DATE
+        || code == ErrorCodes::CANNOT_PARSE_DATETIME
+        || code == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT
+        || code == ErrorCodes::CANNOT_PARSE_NUMBER
+        || code == ErrorCodes::CANNOT_PARSE_UUID
+        || code == ErrorCodes::TOO_LARGE_STRING_SIZE;
+}
+
+
+Chunk IRowInputFormat::generate()
+{
+    if (total_rows == 0)
+        readPrefix();
+
+    const Block & header = getPort().getHeader();
+
+    size_t num_columns = header.columns();
+    MutableColumns columns = header.cloneEmptyColumns();
+    size_t prev_rows = total_rows;
+
+    auto chunk_missing_values = std::make_unique<ChunkMissingValues>();
+
+    try
+    {
+        for (size_t rows = 0; rows < params.max_block_size; ++rows)
+        {
+            try
+            {
+                ++total_rows;
+
+                RowReadExtension info;
+                if (!readRow(columns, info))
+                    break;
+
+                for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx)
+                {
+                    if (!info.read_columns[column_idx])
+                    {
+                        size_t column_size = columns[column_idx]->size();
+                        if (column_size == 0)
+                            throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS);
+                        chunk_missing_values->setBit(column_idx, column_size - 1);
+                    }
+                }
+            }
+            catch (Exception & e)
+            {
+                /// Logic for possible skipping of errors.
+
+                if (!isParseError(e.code()))
+                    throw;
+
+                if (params.allow_errors_num == 0 && params.allow_errors_ratio == 0)
+                    throw;
+
+                ++num_errors;
+                Float64 current_error_ratio = static_cast<Float64>(num_errors) / total_rows;
+
+                if (num_errors > params.allow_errors_num
+                    && current_error_ratio > params.allow_errors_ratio)
+                {
+                    e.addMessage("(Already have " + toString(num_errors) + " errors"
+                        " out of " + toString(total_rows) + " rows"
+                        ", which is " + toString(current_error_ratio) + " of all rows)");
+                    throw;
+                }
+
+                if (!allowSyncAfterError())
+                {
+                    e.addMessage("(Input format doesn't allow to skip errors)");
+                    throw;
+                }
+
+                syncAfterError();
+
+                /// Truncate all columns in block to minimal size (remove values, that was appended to only part of columns).
+
+                size_t min_size = std::numeric_limits<size_t>::max();
+                for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
+                    min_size = std::min(min_size, columns[column_idx]->size());
+
+                for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
+                {
+                    auto & column = columns[column_idx];
+                    if (column->size() > min_size)
+                        column->popBack(column->size() - min_size);
+                }
+            }
+        }
+    }
+    catch (Exception & e)
+    {
+        if (!isParseError(e.code()))
+            throw;
+
+        String verbose_diagnostic;
+        try
+        {
+            verbose_diagnostic = getDiagnosticInfo();
+        }
+        catch (...)
+        {
+            /// Error while trying to obtain verbose diagnostic. Ok to ignore.
+        }
+
+        e.addMessage("(at row " + toString(total_rows) + ")\n" + verbose_diagnostic);
+        throw;
+    }
+
+    if (columns.empty() || columns[0]->empty())
+    {
+        readSuffix();
+        return {};
+    }
+
+    Chunk chunk(std::move(columns), total_rows - prev_rows);
+    chunk.setChunkInfo(std::move(chunk_missing_values));
+    return chunk;
+}
+
+void IRowInputFormat::syncAfterError()
+{
+    throw Exception("Method syncAfterError is not implemented for input format", ErrorCodes::NOT_IMPLEMENTED);
+}
+
+}
--- a/dbms/src/Processors/Formats/IRowInputFormat.h
+++ b/dbms/src/Processors/Formats/IRowInputFormat.h
@ -0,0 +1,72 @@
+#pragma once
+
+#include <string>
+#include <Columns/IColumn.h>
+#include <Processors/Formats/IInputFormat.h>
+
+
+namespace DB
+{
+
+/// Contains extra information about read data.
+struct RowReadExtension
+{
+    /// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise.
+    /// It's used to attach defaults for partially filled rows.
+    std::vector<UInt8> read_columns;
+};
+
+/// Common parameters for generating blocks.
+struct RowInputFormatParams
+{
+    size_t max_block_size;
+
+    UInt64 allow_errors_num;
+    Float64 allow_errors_ratio;
+};
+
+///Row oriented input format: reads data row by row.
+class IRowInputFormat : public IInputFormat
+{
+public:
+    using Params = RowInputFormatParams;
+
+    IRowInputFormat(
+        Block header,
+        ReadBuffer & in_,
+        Params params)
+        : IInputFormat(std::move(header), in_), params(params)
+    {
+    }
+
+    Chunk generate() override;
+
+protected:
+    /** Read next row and append it to the columns.
+      * If no more rows - return false.
+      */
+    virtual bool readRow(MutableColumns & columns, RowReadExtension & extra) = 0;
+
+    virtual void readPrefix() {}                /// delimiter before begin of result
+    virtual void readSuffix() {}                /// delimiter after end of result
+
+    /// Skip data until next row.
+    /// This is intended for text streams, that allow skipping of errors.
+    /// By default - throws not implemented exception.
+    virtual bool allowSyncAfterError() const { return false; }
+    virtual void syncAfterError();
+
+    /// In case of parse error, try to roll back and parse last one or two rows very carefully
+    ///  and collect as much as possible diagnostic information about error.
+    /// If not implemented, returns empty string.
+    virtual std::string getDiagnosticInfo() { return {}; }
+
+private:
+    Params params;
+
+    size_t total_rows = 0;
+    size_t num_errors = 0;
+};
+
+}
+
--- a/dbms/src/Processors/Formats/IRowOutputFormat.cpp
+++ b/dbms/src/Processors/Formats/IRowOutputFormat.cpp
@ -0,0 +1,101 @@
+#include <string>
+#include <Processors/Formats/IRowOutputFormat.h>
+#include <IO/WriteHelpers.h>
+
+
+namespace DB
+{
+
+void IRowOutputFormat::consume(DB::Chunk chunk)
+{
+    writePrefixIfNot();
+
+    auto num_rows = chunk.getNumRows();
+    auto & columns = chunk.getColumns();
+
+    for (UInt64 row = 0; row < num_rows; ++row)
+    {
+        if (!first_row)
+            writeRowBetweenDelimiter();
+        first_row = false;
+
+        write(columns, row);
+    }
+}
+
+void IRowOutputFormat::consumeTotals(DB::Chunk chunk)
+{
+    writePrefixIfNot();
+    writeSuffixIfNot();
+
+    auto num_rows = chunk.getNumRows();
+    if (num_rows != 1)
+        throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR);
+
+    auto & columns = chunk.getColumns();
+
+    writeBeforeTotals();
+    writeTotals(columns, 0);
+    writeAfterTotals();
+}
+
+void IRowOutputFormat::consumeExtremes(DB::Chunk chunk)
+{
+    writePrefixIfNot();
+    writeSuffixIfNot();
+
+    auto num_rows = chunk.getNumRows();
+    auto & columns = chunk.getColumns();
+    if (num_rows != 2)
+        throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR);
+
+    writeBeforeExtremes();
+    writeMinExtreme(columns, 0);
+    writeRowBetweenDelimiter();
+    writeMaxExtreme(columns, 1);
+    writeAfterExtremes();
+}
+
+void IRowOutputFormat::finalize()
+{
+    writePrefixIfNot();
+    writeSuffixIfNot();
+    writeLastSuffix();
+}
+
+void IRowOutputFormat::write(const Columns & columns, size_t row_num)
+{
+    size_t num_columns = columns.size();
+
+    writeRowStartDelimiter();
+
+    for (size_t i = 0; i < num_columns; ++i)
+    {
+        if (i != 0)
+            writeFieldDelimiter();
+
+        writeField(*columns[i], *types[i], row_num);
+    }
+
+    writeRowEndDelimiter();
+}
+
+void IRowOutputFormat::writeMinExtreme(const DB::Columns & columns, size_t row_num)
+{
+    write(columns, row_num);
+}
+
+void IRowOutputFormat::writeMaxExtreme(const DB::Columns & columns, size_t row_num)
+{
+    write(columns, row_num);
+}
+
+void IRowOutputFormat::writeTotals(const DB::Columns & columns, size_t row_num)
+{
+    write(columns, row_num);
+}
+
+}
+
+
+
--- a/dbms/src/Processors/Formats/IRowOutputFormat.h
+++ b/dbms/src/Processors/Formats/IRowOutputFormat.h
@ -0,0 +1,80 @@
+#pragma once
+
+#include <string>
+#include <Processors/Formats/IOutputFormat.h>
+
+
+namespace DB
+{
+
+class WriteBuffer;
+
+/** Output format that writes data row by row.
+  */
+class IRowOutputFormat : public IOutputFormat
+{
+protected:
+    DataTypes types;
+
+    void consume(Chunk chunk) override;
+    void consumeTotals(Chunk chunk) override;
+    void consumeExtremes(Chunk chunk) override;
+    void finalize() override;
+
+public:
+    IRowOutputFormat(const Block & header, WriteBuffer & out_)
+        : IOutputFormat(header, out_), types(header.getDataTypes())
+    {
+    }
+
+    /** Write a row.
+      * Default implementation calls methods to write single values and delimiters
+      * (except delimiter between rows (writeRowBetweenDelimiter())).
+      */
+    virtual void write(const Columns & columns, size_t row_num);
+    virtual void writeMinExtreme(const Columns & columns, size_t row_num);
+    virtual void writeMaxExtreme(const Columns & columns, size_t row_num);
+    virtual void writeTotals(const Columns & columns, size_t row_num);
+
+    /** Write single value. */
+    virtual void writeField(const IColumn & column, const IDataType & type, size_t row_num) = 0;
+
+    /** Write delimiter. */
+    virtual void writeFieldDelimiter() {}       /// delimiter between values
+    virtual void writeRowStartDelimiter() {}    /// delimiter before each row
+    virtual void writeRowEndDelimiter() {}      /// delimiter after each row
+    virtual void writeRowBetweenDelimiter() {}  /// delimiter between rows
+    virtual void writePrefix() {}               /// delimiter before resultset
+    virtual void writeSuffix() {}               /// delimiter after resultset
+    virtual void writeBeforeTotals() {}
+    virtual void writeAfterTotals() {}
+    virtual void writeBeforeExtremes() {}
+    virtual void writeAfterExtremes() {}
+    virtual void writeLastSuffix() {}  /// Write something after resultset, totals end extremes.
+
+private:
+    bool first_row = true;
+    bool prefix_written = false;
+    bool suffix_written = false;
+
+    void writePrefixIfNot()
+    {
+        if (!prefix_written)
+            writePrefix();
+
+        prefix_written = true;
+    }
+
+    void writeSuffixIfNot()
+    {
+        if (!suffix_written)
+            writeSuffix();
+
+        suffix_written = true;
+    }
+
+};
+
+}
+
+
--- a/dbms/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
@ -0,0 +1,82 @@
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <Processors/Formats/Impl/BinaryRowInputFormat.h>
+#include <Formats/FormatFactory.h>
+
+
+namespace DB
+{
+
+BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params, bool with_names_, bool with_types_)
+    : IRowInputFormat(std::move(header), in_, params), with_names(with_names_), with_types(with_types_)
+{
+}
+
+
+bool BinaryRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
+{
+    if (in.eof())
+        return false;
+
+    size_t num_columns = columns.size();
+    for (size_t i = 0; i < num_columns; ++i)
+        getPort().getHeader().getByPosition(i).type->deserializeBinary(*columns[i], in);
+
+    return true;
+}
+
+
+void BinaryRowInputFormat::readPrefix()
+{
+    /// NOTE The header is completely ignored. This can be easily improved.
+
+    UInt64 columns = 0;
+    String tmp;
+
+    if (with_names || with_types)
+    {
+        readVarUInt(columns, in);
+    }
+
+    if (with_names)
+    {
+        for (size_t i = 0; i < columns; ++i)
+        {
+            readStringBinary(tmp, in);
+        }
+    }
+
+    if (with_types)
+    {
+        for (size_t i = 0; i < columns; ++i)
+        {
+            readStringBinary(tmp, in);
+        }
+    }
+}
+
+
+void registerInputFormatProcessorRowBinary(FormatFactory & factory)
+{
+    factory.registerInputFormatProcessor("RowBinary", [](
+        ReadBuffer & buf,
+        const Block & sample,
+        const Context &,
+        const IRowInputFormat::Params & params,
+        const FormatSettings &)
+    {
+        return std::make_shared<BinaryRowInputFormat>(buf, sample, params, false, false);
+    });
+
+    factory.registerInputFormatProcessor("RowBinaryWithNamesAndTypes", [](
+        ReadBuffer & buf,
+        const Block & sample,
+        const Context &,
+        const IRowInputFormat::Params & params,
+        const FormatSettings &)
+    {
+        return std::make_shared<BinaryRowInputFormat>(buf, sample, params, true, true);
+    });
+}
+
+}
--- a/dbms/src/Processors/Formats/Impl/BinaryRowInputFormat.h
+++ b/dbms/src/Processors/Formats/Impl/BinaryRowInputFormat.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <Processors/Formats/IRowInputFormat.h>
+
+
+namespace DB
+{
+
+class ReadBuffer;
+
+
+/** A stream for inputting data in a binary line-by-line format.
+  */
+class BinaryRowInputFormat : public IRowInputFormat
+{
+public:
+    BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params, bool with_names_, bool with_types_);
+
+    bool readRow(MutableColumns & columns, RowReadExtension &) override;
+    void readPrefix() override;
+
+    String getName() const override { return "BinaryRowInputFormat"; }
+
+private:
+    bool with_names;
+    bool with_types;
+};
+
+}
--- a/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp
@ -0,0 +1,71 @@
+#include <IO/WriteBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <Columns/IColumn.h>
+#include <DataTypes/IDataType.h>
+#include <Processors/Formats/Impl/BinaryRowOutputFormat.h>
+#include <Formats/FormatFactory.h>
+
+
+namespace DB
+{
+
+BinaryRowOutputFormat::BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_)
+    : IRowOutputFormat(header, out_), with_names(with_names_), with_types(with_types_)
+{
+}
+
+void BinaryRowOutputFormat::writePrefix()
+{
+    auto & header = getPort(PortKind::Main).getHeader();
+    size_t columns = header.columns();
+
+    if (with_names || with_types)
+    {
+        writeVarUInt(columns, out);
+    }
+
+    if (with_names)
+    {
+        for (size_t i = 0; i < columns; ++i)
+        {
+            writeStringBinary(header.safeGetByPosition(i).name, out);
+        }
+    }
+
+    if (with_types)
+    {
+        for (size_t i = 0; i < columns; ++i)
+        {
+            writeStringBinary(header.safeGetByPosition(i).type->getName(), out);
+        }
+    }
+}
+
+void BinaryRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
+{
+    type.serializeBinary(column, row_num, out);
+}
+
+
+void registerOutputFormatProcessorRowBinary(FormatFactory & factory)
+{
+    factory.registerOutputFormatProcessor("RowBinary", [](
+        WriteBuffer & buf,
+        const Block & sample,
+        const Context &,
+        const FormatSettings &)
+    {
+        return std::make_shared<BinaryRowOutputFormat>(buf, sample, false, false);
+    });
+
+    factory.registerOutputFormatProcessor("RowBinaryWithNamesAndTypes", [](
+        WriteBuffer & buf,
+        const Block & sample,
+        const Context &,
+        const FormatSettings &)
+    {
+        return std::make_shared<BinaryRowOutputFormat>(buf, sample, true, true);
+    });
+}
+
+}
--- a/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.h
+++ b/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.h
@ -0,0 +1,35 @@
+#pragma once
+
+#include <Processors/Formats/IRowOutputFormat.h>
+#include <Core/Block.h>
+
+
+namespace DB
+{
+
+class IColumn;
+class IDataType;
+class WriteBuffer;
+
+
+/** A stream for outputting data in a binary line-by-line format.
+  */
+class BinaryRowOutputFormat: public IRowOutputFormat
+{
+public:
+    BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_);
+
+    String getName() const override { return "BinaryRowOutputFormat"; }
+
+    void writeField(const IColumn & column, const IDataType & type, size_t row_num) override;
+    void writePrefix() override;
+
+    String getContentType() const override { return "application/octet-stream"; }
+
+protected:
+    bool with_names;
+    bool with_types;
+};
+
+}
+
--- a/dbms/src/Processors/Formats/Impl/CMakeLists.txt
+++ b/dbms/src/Processors/Formats/Impl/CMakeLists.txt
@ -0,0 +1,3 @@
+if (ENABLE_TESTS)
+    add_subdirectory (tests)
+endif ()
--- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@ -0,0 +1,367 @@
+#include <IO/ReadHelpers.h>
+#include <IO/Operators.h>
+
+#include <Formats/verbosePrintString.h>
+#include <Processors/Formats/Impl/CSVRowInputFormat.h>
+#include <Formats/FormatFactory.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+    extern const int LOGICAL_ERROR;
+}
+
+
+CSVRowInputFormat::CSVRowInputFormat(
+    ReadBuffer & in_, Block header, Params params, bool with_names_, const FormatSettings & format_settings)
+    : IRowInputFormat(std::move(header), in_, params), with_names(with_names_), format_settings(format_settings)
+{
+    auto & sample = getPort().getHeader();
+    size_t num_columns = sample.columns();
+    data_types.resize(num_columns);
+    for (size_t i = 0; i < num_columns; ++i)
+        data_types[i] = sample.safeGetByPosition(i).type;
+}
+
+
+static void skipEndOfLine(ReadBuffer & istr)
+{
+    /// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
+
+    if (*istr.position() == '\n')
+    {
+        ++istr.position();
+        if (!istr.eof() && *istr.position() == '\r')
+            ++istr.position();
+    }
+    else if (*istr.position() == '\r')
+    {
+        ++istr.position();
+        if (!istr.eof() && *istr.position() == '\n')
+            ++istr.position();
+        else
+            throw Exception("Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)."
+                " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.", ErrorCodes::INCORRECT_DATA);
+    }
+    else if (!istr.eof())
+        throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA);
+}
+
+
+static void skipDelimiter(ReadBuffer & istr, const char delimiter, bool is_last_column)
+{
+    if (is_last_column)
+    {
+        if (istr.eof())
+            return;
+
+        /// we support the extra delimiter at the end of the line
+        if (*istr.position() == delimiter)
+        {
+            ++istr.position();
+            if (istr.eof())
+                return;
+        }
+
+        skipEndOfLine(istr);
+    }
+    else
+        assertChar(delimiter, istr);
+}
+
+
+/// Skip `whitespace` symbols allowed in CSV.
+static inline void skipWhitespacesAndTabs(ReadBuffer & buf)
+{
+    while (!buf.eof()
+            && (*buf.position() == ' '
+                || *buf.position() == '\t'))
+        ++buf.position();
+}
+
+
+static void skipRow(ReadBuffer & istr, const FormatSettings::CSV & settings, size_t num_columns)
+{
+    String tmp;
+    for (size_t i = 0; i < num_columns; ++i)
+    {
+        skipWhitespacesAndTabs(istr);
+        readCSVString(tmp, istr, settings);
+        skipWhitespacesAndTabs(istr);
+
+        skipDelimiter(istr, settings.delimiter, i + 1 == num_columns);
+    }
+}
+
+
+void CSVRowInputFormat::readPrefix()
+{
+    /// In this format, we assume, that if first string field contain BOM as value, it will be written in quotes,
+    ///  so BOM at beginning of stream cannot be confused with BOM in first string value, and it is safe to skip it.
+    skipBOMIfExists(in);
+
+    size_t num_columns = data_types.size();
+    String tmp;
+
+    if (with_names)
+        skipRow(in, format_settings.csv, num_columns);
+}
+
+
+bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
+{
+    if (in.eof())
+        return false;
+
+    updateDiagnosticInfo();
+
+    size_t size = data_types.size();
+
+    for (size_t i = 0; i < size; ++i)
+    {
+        skipWhitespacesAndTabs(in);
+        data_types[i]->deserializeAsTextCSV(*columns[i], in, format_settings);
+        skipWhitespacesAndTabs(in);
+
+        skipDelimiter(in, format_settings.csv.delimiter, i + 1 == size);
+    }
+
+    return true;
+}
+
+
+String CSVRowInputFormat::getDiagnosticInfo()
+{
+    if (in.eof())        /// Buffer has gone, cannot extract information about what has been parsed.
+        return {};
+
+    WriteBufferFromOwnString out;
+
+    auto & header = getPort().getHeader();
+    MutableColumns columns = header.cloneEmptyColumns();
+
+    /// It is possible to display detailed diagnostics only if the last and next to last rows are still in the read buffer.
+    size_t bytes_read_at_start_of_buffer = in.count() - in.offset();
+    if (bytes_read_at_start_of_buffer != bytes_read_at_start_of_buffer_on_prev_row)
+    {
+        out << "Could not print diagnostic info because two last rows aren't in buffer (rare case)\n";
+        return out.str();
+    }
+
+    size_t max_length_of_column_name = 0;
+    for (size_t i = 0; i < header.columns(); ++i)
+        if (header.safeGetByPosition(i).name.size() > max_length_of_column_name)
+            max_length_of_column_name = header.safeGetByPosition(i).name.size();
+
+    size_t max_length_of_data_type_name = 0;
+    for (size_t i = 0; i < header.columns(); ++i)
+        if (header.safeGetByPosition(i).type->getName().size() > max_length_of_data_type_name)
+            max_length_of_data_type_name = header.safeGetByPosition(i).type->getName().size();
+
+    /// Roll back the cursor to the beginning of the previous or current row and parse all over again. But now we derive detailed information.
+
+    if (pos_of_prev_row)
+    {
+        in.position() = pos_of_prev_row;
+
+        out << "\nRow " << (row_num - 1) << ":\n";
+        if (!parseRowAndPrintDiagnosticInfo(columns, out, max_length_of_column_name, max_length_of_data_type_name))
+            return out.str();
+    }
+    else
+    {
+        if (!pos_of_current_row)
+        {
+            out << "Could not print diagnostic info because parsing of data hasn't started.\n";
+            return out.str();
+        }
+
+        in.position() = pos_of_current_row;
+    }
+
+    out << "\nRow " << row_num << ":\n";
+    parseRowAndPrintDiagnosticInfo(columns, out, max_length_of_column_name, max_length_of_data_type_name);
+    out << "\n";
+
+    return out.str();
+}
+
+
+bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
+    WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name)
+{
+    const char delimiter = format_settings.csv.delimiter;
+    auto & header = getPort().getHeader();
+
+    size_t size = data_types.size();
+    for (size_t i = 0; i < size; ++i)
+    {
+        if (i == 0 && in.eof())
+        {
+            out << "<End of stream>\n";
+            return false;
+        }
+
+        out << "Column " << i << ", " << std::string((i < 10 ? 2 : i < 100 ? 1 : 0), ' ')
+            << "name: " << header.safeGetByPosition(i).name << ", " << std::string(max_length_of_column_name - header.safeGetByPosition(i).name.size(), ' ')
+            << "type: " << data_types[i]->getName() << ", " << std::string(max_length_of_data_type_name - data_types[i]->getName().size(), ' ');
+
+        BufferBase::Position prev_position = in.position();
+        BufferBase::Position curr_position = in.position();
+        std::exception_ptr exception;
+
+        try
+        {
+            skipWhitespacesAndTabs(in);
+            prev_position = in.position();
+            data_types[i]->deserializeAsTextCSV(*columns[i], in, format_settings);
+            curr_position = in.position();
+            skipWhitespacesAndTabs(in);
+        }
+        catch (...)
+        {
+            exception = std::current_exception();
+        }
+
+        if (curr_position < prev_position)
+            throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);
+
+        if (isNumber(data_types[i]) || isDateOrDateTime(data_types[i]))
+        {
+            /// An empty string instead of a value.
+            if (curr_position == prev_position)
+            {
+                out << "ERROR: text ";
+                verbosePrintString(prev_position, std::min(prev_position + 10, in.buffer().end()), out);
+                out << " is not like " << data_types[i]->getName() << "\n";
+                return false;
+            }
+        }
+
+        out << "parsed text: ";
+        verbosePrintString(prev_position, curr_position, out);
+
+        if (exception)
+        {
+            if (data_types[i]->getName() == "DateTime")
+                out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
+            else if (data_types[i]->getName() == "Date")
+                out << "ERROR: Date must be in YYYY-MM-DD format.\n";
+            else
+                out << "ERROR\n";
+            return false;
+        }
+
+        out << "\n";
+
+        if (data_types[i]->haveMaximumSizeOfValue())
+        {
+            if (*curr_position != '\n' && *curr_position != '\r' && *curr_position != delimiter)
+            {
+                out << "ERROR: garbage after " << data_types[i]->getName() << ": ";
+                verbosePrintString(curr_position, std::min(curr_position + 10, in.buffer().end()), out);
+                out << "\n";
+
+                if (data_types[i]->getName() == "DateTime")
+                    out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
+                else if (data_types[i]->getName() == "Date")
+                    out << "ERROR: Date must be in YYYY-MM-DD format.\n";
+
+                return false;
+            }
+        }
+
+        /// Delimiters
+        if (i + 1 == size)
+        {
+            if (in.eof())
+                return false;
+
+            /// we support the extra delimiter at the end of the line
+            if (*in.position() == delimiter)
+            {
+                ++in.position();
+                if (in.eof())
+                    break;
+            }
+
+            if (!in.eof() && *in.position() != '\n' && *in.position() != '\r')
+            {
+                out << "ERROR: There is no line feed. ";
+                verbosePrintString(in.position(), in.position() + 1, out);
+                out << " found instead.\n"
+                    " It's like your file has more columns than expected.\n"
+                    "And if your file have right number of columns, maybe it have unquoted string value with comma.\n";
+
+                return false;
+            }
+
+            skipEndOfLine(in);
+        }
+        else
+        {
+            try
+            {
+                assertChar(delimiter, in);
+            }
+            catch (const DB::Exception &)
+            {
+                if (*in.position() == '\n' || *in.position() == '\r')
+                {
+                    out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected."
+                        " It's like your file has less columns than expected.\n"
+                        "And if your file have right number of columns, maybe it have unescaped quotes in values.\n";
+                }
+                else
+                {
+                    out << "ERROR: There is no delimiter (" << delimiter << "). ";
+                    verbosePrintString(in.position(), in.position() + 1, out);
+                    out << " found instead.\n";
+                }
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+
+void CSVRowInputFormat::syncAfterError()
+{
+    skipToNextLineOrEOF(in);
+}
+
+void CSVRowInputFormat::updateDiagnosticInfo()
+{
+    ++row_num;
+
+    bytes_read_at_start_of_buffer_on_prev_row = bytes_read_at_start_of_buffer_on_current_row;
+    bytes_read_at_start_of_buffer_on_current_row = in.count() - in.offset();
+
+    pos_of_prev_row = pos_of_current_row;
+    pos_of_current_row = in.position();
+}
+
+
+void registerInputFormatProcessorCSV(FormatFactory & factory)
+{
+    for (bool with_names : {false, true})
+    {
+        factory.registerInputFormatProcessor(with_names ? "CSVWithNames" : "CSV", [=](
+            ReadBuffer & buf,
+            const Block & sample,
+            const Context &,
+            IRowInputFormat::Params params,
+            const FormatSettings & settings)
+        {
+            return std::make_shared<CSVRowInputFormat>(buf, sample, params, with_names, settings);
+        });
+    }
+}
+
+}
--- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h
@ -0,0 +1,56 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <Processors/Formats/IRowInputFormat.h>
+#include <Formats/FormatSettings.h>
+
+
+namespace DB
+{
+
+class ReadBuffer;
+
+/** A stream for inputting data in csv format.
+  * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values.
+  */
+class CSVRowInputFormat : public IRowInputFormat
+{
+public:
+    /** with_names - in the first line the header with column names
+      * with_types - on the next line header with type names
+      */
+    CSVRowInputFormat(ReadBuffer & in_, Block header, Params params, bool with_names, const FormatSettings & format_settings);
+
+    String getName() const override { return "CSVRowInputFormat"; }
+
+    bool readRow(MutableColumns & columns, RowReadExtension &) override;
+    void readPrefix() override;
+    bool allowSyncAfterError() const override { return true; }
+    void syncAfterError() override;
+
+    std::string getDiagnosticInfo() override;
+
+private:
+    bool with_names;
+    DataTypes data_types;
+
+    const FormatSettings format_settings;
+
+    /// For convenient diagnostics in case of an error.
+
+    size_t row_num = 0;
+
+    /// How many bytes were read, not counting those that are still in the buffer.
+    size_t bytes_read_at_start_of_buffer_on_current_row = 0;
+    size_t bytes_read_at_start_of_buffer_on_prev_row = 0;
+
+    char * pos_of_current_row = nullptr;
+    char * pos_of_prev_row = nullptr;
+
+    void updateDiagnosticInfo();
+
+    bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
+        WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name);
+};
+
+}
--- a/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
@ -0,0 +1,87 @@
+#include <Processors/Formats/Impl/CSVRowOutputFormat.h>
+#include <Formats/FormatFactory.h>
+
+#include <IO/WriteHelpers.h>
+
+
+namespace DB
+{
+
+
+CSVRowOutputFormat::CSVRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, const FormatSettings & format_settings)
+    : IRowOutputFormat(header, out_), with_names(with_names_), format_settings(format_settings)
+{
+    auto & sample = getPort(PortKind::Main).getHeader();
+    size_t columns = sample.columns();
+    data_types.resize(columns);
+    for (size_t i = 0; i < columns; ++i)
+        data_types[i] = sample.safeGetByPosition(i).type;
+}
+
+
+void CSVRowOutputFormat::writePrefix()
+{
+    auto & sample = getPort(PortKind::Main).getHeader();
+    size_t columns = sample.columns();
+
+    if (with_names)
+    {
+        for (size_t i = 0; i < columns; ++i)
+        {
+            writeCSVString(sample.safeGetByPosition(i).name, out);
+
+            char delimiter = format_settings.csv.delimiter;
+            if (i + 1 == columns)
+                delimiter = '\n';
+
+            writeChar(delimiter, out);
+        }
+    }
+}
+
+
+void CSVRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
+{
+    type.serializeAsTextCSV(column, row_num, out, format_settings);
+}
+
+
+void CSVRowOutputFormat::writeFieldDelimiter()
+{
+    writeChar(format_settings.csv.delimiter, out);
+}
+
+
+void CSVRowOutputFormat::writeRowEndDelimiter()
+{
+    writeChar('\n', out);
+}
+
+void CSVRowOutputFormat::writeBeforeTotals()
+{
+    writeChar('\n', out);
+}
+
+void CSVRowOutputFormat::writeBeforeExtremes()
+{
+    writeChar('\n', out);
+}
+
+
+
+void registerOutputFormatProcessorCSV(FormatFactory & factory)
+{
+    for (bool with_names : {false, true})
+    {
+        factory.registerOutputFormatProcessor(with_names ? "CSVWithNames" : "CSV", [=](
+            WriteBuffer & buf,
+            const Block & sample,
+            const Context &,
+            const FormatSettings & format_settings)
+        {
+                return std::make_shared<CSVRowOutputFormat>(buf, sample, with_names, format_settings);
+        });
+    }
+}
+
+}
--- a/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.h
+++ b/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.h
@ -0,0 +1,48 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <Processors/Formats/IRowOutputFormat.h>
+#include <Formats/FormatSettings.h>
+
+
+namespace DB
+{
+
+class WriteBuffer;
+
+
+/** The stream for outputting data in csv format.
+  * Does not conform with https://tools.ietf.org/html/rfc4180 because it uses LF, not CR LF.
+  */
+class CSVRowOutputFormat : public IRowOutputFormat
+{
+public:
+    /** with_names - output in the first line a header with column names
+      * with_types - output in the next line header with the names of the types
+      */
+    CSVRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, const FormatSettings & format_settings);
+
+    String getName() const override { return "CSVRowOutputFormat"; }
+
+    void writeField(const IColumn & column, const IDataType & type, size_t row_num) override;
+    void writeFieldDelimiter() override;
+    void writeRowEndDelimiter() override;
+    void writePrefix() override;
+    void writeBeforeTotals() override;
+    void writeBeforeExtremes() override;
+
+    /// https://www.iana.org/assignments/media-types/text/csv
+    String getContentType() const override
+    {
+        return String("text/csv; charset=UTF-8; header=") + (with_names ? "present" : "absent");
+    }
+
+protected:
+
+    bool with_names;
+    const FormatSettings format_settings;
+    DataTypes data_types;
+};
+
+}
+
--- a/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
+++ b/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
@ -0,0 +1,314 @@
+#include "config_formats.h"
+#if USE_CAPNP
+
+#include <IO/ReadBuffer.h>
+#include <Interpreters/Context.h>
+#include <Processors/Formats/Impl/CapnProtoRowInputFormat.h> // Y_IGNORE
+#include <Formats/FormatFactory.h>
+#include <Formats/FormatSchemaInfo.h>
+#include <capnp/serialize.h> // Y_IGNORE
+#include <capnp/dynamic.h> // Y_IGNORE
+#include <capnp/common.h> // Y_IGNORE
+#include <boost/algorithm/string.hpp>
+#include <boost/range/join.hpp>
+#include <common/logger_useful.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_TYPE_OF_FIELD;
+    extern const int BAD_ARGUMENTS;
+    extern const int THERE_IS_NO_COLUMN;
+    extern const int LOGICAL_ERROR;
+}
+
+static CapnProtoRowInputFormat::NestedField split(const Block & header, size_t i)
+{
+    CapnProtoRowInputFormat::NestedField field = {{}, i};
+
+    // Remove leading dot in field definition, e.g. ".msg" -> "msg"
+    String name(header.safeGetByPosition(i).name);
+    if (!name.empty() && name[0] == '.')
+        name.erase(0, 1);
+
+    boost::split(field.tokens, name, boost::is_any_of("._"));
+    return field;
+}
+
+
+static Field convertNodeToField(const capnp::DynamicValue::Reader & value)
+{
+    switch (value.getType())
+    {
+        case capnp::DynamicValue::UNKNOWN:
+            throw Exception("Unknown field type", ErrorCodes::BAD_TYPE_OF_FIELD);
+        case capnp::DynamicValue::VOID:
+            return Field();
+        case capnp::DynamicValue::BOOL:
+            return value.as<bool>() ? 1u : 0u;
+        case capnp::DynamicValue::INT:
+            return value.as<int64_t>();
+        case capnp::DynamicValue::UINT:
+            return value.as<uint64_t>();
+        case capnp::DynamicValue::FLOAT:
+            return value.as<double>();
+        case capnp::DynamicValue::TEXT:
+        {
+            auto arr = value.as<capnp::Text>();
+            return String(arr.begin(), arr.size());
+        }
+        case capnp::DynamicValue::DATA:
+        {
+            auto arr = value.as<capnp::Data>().asChars();
+            return String(arr.begin(), arr.size());
+        }
+        case capnp::DynamicValue::LIST:
+        {
+            auto listValue = value.as<capnp::DynamicList>();
+            Array res(listValue.size());
+            for (auto i : kj::indices(listValue))
+                res[i] = convertNodeToField(listValue[i]);
+
+            return res;
+        }
+        case capnp::DynamicValue::ENUM:
+            return value.as<capnp::DynamicEnum>().getRaw();
+        case capnp::DynamicValue::STRUCT:
+        {
+            auto structValue = value.as<capnp::DynamicStruct>();
+            const auto & fields = structValue.getSchema().getFields();
+
+            Field field = Tuple(TupleBackend(fields.size()));
+            TupleBackend & tuple = get<Tuple &>(field).toUnderType();
+            for (auto i : kj::indices(fields))
+                tuple[i] = convertNodeToField(structValue.get(fields[i]));
+
+            return field;
+        }
+        case capnp::DynamicValue::CAPABILITY:
+            throw Exception("CAPABILITY type not supported", ErrorCodes::BAD_TYPE_OF_FIELD);
+        case capnp::DynamicValue::ANY_POINTER:
+            throw Exception("ANY_POINTER type not supported", ErrorCodes::BAD_TYPE_OF_FIELD);
+    }
+    return Field();
+}
+
+static capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::string & field)
+{
+    KJ_IF_MAYBE(child, node.findFieldByName(field))
+        return *child;
+    else
+        throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN);
+}
+
+
+void CapnProtoRowInputFormat::createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader)
+{
+    /// Columns in a table can map to fields in Cap'n'Proto or to structs.
+
+    /// Store common parents and their tokens in order to backtrack.
+    std::vector<capnp::StructSchema::Field> parents;
+    std::vector<std::string> parent_tokens;
+
+    capnp::StructSchema cur_reader = reader;
+
+    for (const auto & field : sorted_fields)
+    {
+        if (field.tokens.empty())
+            throw Exception("Logical error in CapnProtoRowInputFormat", ErrorCodes::LOGICAL_ERROR);
+
+        // Backtrack to common parent
+        while (field.tokens.size() < parent_tokens.size() + 1
+            || !std::equal(parent_tokens.begin(), parent_tokens.end(), field.tokens.begin()))
+        {
+            actions.push_back({Action::POP});
+            parents.pop_back();
+            parent_tokens.pop_back();
+
+            if (parents.empty())
+            {
+                cur_reader = reader;
+                break;
+            }
+            else
+                cur_reader = parents.back().getType().asStruct();
+        }
+
+        // Go forward
+        while (parent_tokens.size() + 1 < field.tokens.size())
+        {
+            const auto & token = field.tokens[parents.size()];
+            auto node = getFieldOrThrow(cur_reader, token);
+            if (node.getType().isStruct())
+            {
+                // Descend to field structure
+                parents.emplace_back(node);
+                parent_tokens.emplace_back(token);
+                cur_reader = node.getType().asStruct();
+                actions.push_back({Action::PUSH, node});
+            }
+            else if (node.getType().isList())
+            {
+                break; // Collect list
+            }
+            else
+                throw Exception("Field " + token + " is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD);
+        }
+
+        // Read field from the structure
+        auto node = getFieldOrThrow(cur_reader, field.tokens[parents.size()]);
+        if (node.getType().isList() && !actions.empty() && actions.back().field == node)
+        {
+            // The field list here flattens Nested elements into multiple arrays
+            // In order to map Nested types in Cap'nProto back, they need to be collected
+            // Since the field names are sorted, the order of field positions must be preserved
+            // For example, if the fields are { b @0 :Text, a @1 :Text }, the `a` would come first
+            // even though it's position is second.
+            auto & columns = actions.back().columns;
+            auto it = std::upper_bound(columns.cbegin(), columns.cend(), field.pos);
+            columns.insert(it, field.pos);
+        }
+        else
+        {
+            actions.push_back({Action::READ, node, {field.pos}});
+        }
+    }
+}
+
+CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params, const FormatSchemaInfo & info)
+    : IRowInputFormat(std::move(header), in_, params), parser(std::make_shared<SchemaParser>())
+{
+    // Parse the schema and fetch the root object
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    auto schema = parser->impl.parseDiskFile(info.schemaPath(), info.absoluteSchemaPath(), {});
+#pragma GCC diagnostic pop
+
+    root = schema.getNested(info.messageName()).asStruct();
+
+    /**
+     * The schema typically consists of fields in various nested structures.
+     * Here we gather the list of fields and sort them in a way so that fields in the same structure are adjacent,
+     * and the nesting level doesn't decrease to make traversal easier.
+     */
+    auto & sample = getPort().getHeader();
+    NestedFieldList list;
+    size_t num_columns = sample.columns();
+    for (size_t i = 0; i < num_columns; ++i)
+        list.push_back(split(sample, i));
+
+    // Order list first by value of strings then by length of string vector.
+    std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; });
+    createActions(list, root);
+}
+
+
+bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
+{
+    if (in.eof())
+        return false;
+
+    // Read from underlying buffer directly
+    auto buf = in.buffer();
+    auto base = reinterpret_cast<const capnp::word *>(in.position());
+
+    // Check if there's enough bytes in the buffer to read the full message
+    kj::Array<capnp::word> heap_array;
+    auto array = kj::arrayPtr(base, buf.size() - in.offset());
+    auto expected_words = capnp::expectedSizeInWordsFromPrefix(array);
+    if (expected_words * sizeof(capnp::word) > array.size())
+    {
+        // We'll need to reassemble the message in a contiguous buffer
+        heap_array = kj::heapArray<capnp::word>(expected_words);
+        in.readStrict(heap_array.asChars().begin(), heap_array.asChars().size());
+        array = heap_array.asPtr();
+    }
+
+
+#if CAPNP_VERSION >= 8000
+    capnp::UnalignedFlatArrayMessageReader msg(array);
+#else
+    capnp::FlatArrayMessageReader msg(array);
+#endif
+    std::vector<capnp::DynamicStruct::Reader> stack;
+    stack.push_back(msg.getRoot<capnp::DynamicStruct>(root));
+
+    for (auto action : actions)
+    {
+        switch (action.type)
+        {
+            case Action::READ:
+            {
+                Field value = convertNodeToField(stack.back().get(action.field));
+                if (action.columns.size() > 1)
+                {
+                    // Nested columns must be flattened into several arrays
+                    // e.g. Array(Tuple(x ..., y ...)) -> Array(x ...), Array(y ...)
+                    const auto & collected = DB::get<const Array &>(value);
+                    size_t size = collected.size();
+                    // The flattened array contains an array of a part of the nested tuple
+                    Array flattened(size);
+                    for (size_t column_index = 0; column_index < action.columns.size(); ++column_index)
+                    {
+                        // Populate array with a single tuple elements
+                        for (size_t off = 0; off < size; ++off)
+                        {
+                            const TupleBackend & tuple = DB::get<const Tuple &>(collected[off]).toUnderType();
+                            flattened[off] = tuple[column_index];
+                        }
+                        auto & col = columns[action.columns[column_index]];
+                        col->insert(flattened);
+                    }
+                }
+                else
+                {
+                    auto & col = columns[action.columns[0]];
+                    col->insert(value);
+                }
+
+                break;
+            }
+            case Action::POP:
+                stack.pop_back();
+                break;
+            case Action::PUSH:
+                stack.push_back(stack.back().get(action.field).as<capnp::DynamicStruct>());
+                break;
+        }
+    }
+
+    // Advance buffer position if used directly
+    if (heap_array.size() == 0)
+    {
+        auto parsed = (msg.getEnd() - base) * sizeof(capnp::word);
+        in.position() += parsed;
+    }
+
+    return true;
+}
+
+void registerInputFormatProcessorCapnProto(FormatFactory & factory)
+{
+    factory.registerInputFormatProcessor(
+        "CapnProto",
+        [](ReadBuffer & buf, const Block & sample, const Context & context, IRowInputFormat::Params params, const FormatSettings &)
+        {
+            return std::make_shared<CapnProtoRowInputFormat>(buf, sample, params, FormatSchemaInfo(context, "capnp"));
+        });
+}
+
+}
+
+#else
+
+namespace DB
+{
+    class FormatFactory;
+    void registerInputFormatProcessorCapnProto(FormatFactory &) {}
+}
+
+#endif // USE_CAPNP
--- a/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h
+++ b/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h
@ -0,0 +1,75 @@
+#pragma once
+#include <Common/config.h>
+#if USE_CAPNP
+
+#include <Core/Block.h>
+#include <Processors/Formats/IRowInputFormat.h>
+
+#include <capnp/schema-parser.h>
+
+namespace DB
+{
+
+class FormatSchemaInfo;
+class ReadBuffer;
+
+/** A stream for reading messages in Cap'n Proto format in given schema.
+  * Like Protocol Buffers and Thrift (but unlike JSON or MessagePack),
+  * Cap'n Proto messages are strongly-typed and not self-describing.
+  * The schema in this case cannot be compiled in, so it uses a runtime schema parser.
+  * See https://capnproto.org/cxx.html
+  */
+class CapnProtoRowInputFormat : public IRowInputFormat
+{
+public:
+    struct NestedField
+    {
+        std::vector<std::string> tokens;
+        size_t pos;
+    };
+    using NestedFieldList = std::vector<NestedField>;
+
+    /** schema_dir  - base path for schema files
+      * schema_file - location of the capnproto schema, e.g. "schema.capnp"
+      * root_object - name to the root object, e.g. "Message"
+      */
+    CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params, const FormatSchemaInfo & info);
+
+    String getName() const override { return "CapnProtoRowInputFormat"; }
+
+    bool readRow(MutableColumns & columns, RowReadExtension &) override;
+
+private:
+    // Build a traversal plan from a sorted list of fields
+    void createActions(const NestedFieldList & sortedFields, capnp::StructSchema reader);
+
+    /* Action for state machine for traversing nested structures. */
+    using BlockPositionList = std::vector<size_t>;
+    struct Action
+    {
+        enum Type { POP, PUSH, READ };
+        Type type;
+        capnp::StructSchema::Field field = {};
+        BlockPositionList columns = {};
+    };
+
+    // Wrapper for classes that could throw in destructor
+    // https://github.com/capnproto/capnproto/issues/553
+    template <typename T>
+    struct DestructorCatcher
+    {
+        T impl;
+        template <typename ... Arg>
+        DestructorCatcher(Arg && ... args) : impl(kj::fwd<Arg>(args)...) {}
+        ~DestructorCatcher() noexcept try { } catch (...) { return; }
+    };
+    using SchemaParser = DestructorCatcher<capnp::SchemaParser>;
+
+    std::shared_ptr<SchemaParser> parser;
+    capnp::StructSchema root;
+    std::vector<Action> actions;
+};
+
+}
+
+#endif // USE_CAPNP
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 9807685d51db467e097ad5eb8d5c2c16922794b2`
				`@ -0,0 +1 @@`
				`Subproject commit d56efcc7a52739518dbe7df9e743073e00951fa1`
				`@ -1 +0,0 @@`
				`Subproject commit ec86b1c6a2c6b8ba316f429db9a6d4122dd12710`