Merge branch 'master' into joins

This commit is contained in:
alexey-milovidov 2020-01-26 00:03:56 +03:00 committed by GitHub
commit 196ed889b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
203 changed files with 4534 additions and 662 deletions

4
.gitmodules vendored
View File

@ -140,3 +140,7 @@
[submodule "contrib/ryu"]
path = contrib/ryu
url = https://github.com/ClickHouse-Extras/ryu.git
[submodule "contrib/avro"]
path = contrib/avro
url = https://github.com/ClickHouse-Extras/avro.git
ignore = untracked

45
AUTHORS
View File

@ -1,43 +1,2 @@
The following authors have created the source code of "ClickHouse"
published and distributed by YANDEX LLC as the owner:
Alexander Makarov <asealback@yandex-team.ru>
Alexander Prudaev <aprudaev@yandex-team.ru>
Alexey Arno <af-arno@yandex-team.ru>
Alexey Milovidov <milovidov@yandex-team.ru>
Alexey Tronov <vkusny@yandex-team.ru>
Alexey Vasiliev <loudhorr@yandex-team.ru>
Alexey Zatelepin <ztlpn@yandex-team.ru>
Amy Krishnevsky <krishnevsky@yandex-team.ru>
Andrey M <hertz@yandex-team.ru>
Andrey Mironov <hertz@yandex-team.ru>
Andrey Urusov <drobus@yandex-team.ru>
Anton Tikhonov <rokerjoker@yandex-team.ru>
Dmitry Bilunov <kmeaw@yandex-team.ru>
Dmitry Galuza <galuza@yandex-team.ru>
Eugene Konkov <konkov@yandex-team.ru>
Evgeniy Gatov <egatov@yandex-team.ru>
Ilya Khomutov <robert@yandex-team.ru>
Ilya Korolev <breeze@yandex-team.ru>
Ivan Blinkov <blinkov@yandex-team.ru>
Maxim Nikulin <mnikulin@yandex-team.ru>
Michael Kolupaev <mkolupaev@yandex-team.ru>
Michael Razuvaev <razuvaev@yandex-team.ru>
Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Nikolay Vasiliev <lonlylocly@yandex-team.ru>
Nikolay Volosatov <bamx23@yandex-team.ru>
Pavel Artemkin <stanly@yandex-team.ru>
Pavel Kartaviy <kartavyy@yandex-team.ru>
Roman Nozdrin <drrtuy@yandex-team.ru>
Roman Peshkurov <peshkurov@yandex-team.ru>
Sergey Fedorov <fets@yandex-team.ru>
Sergey Lazarev <hamilkar@yandex-team.ru>
Sergey Magidovich <mgsergio@yandex-team.ru>
Sergey Serebryanik <serebrserg@yandex-team.ru>
Sergey Veletskiy <velom@yandex-team.ru>
Vasily Okunev <okunev@yandex-team.ru>
Vitaliy Lyudvichenko <vludv@yandex-team.ru>
Vladimir Chebotarev <chebotarev@yandex-team.ru>
Vsevolod Orlov <vorloff@yandex-team.ru>
Vyacheslav Alipov <alipov@yandex-team.ru>
Yuriy Galitskiy <orantius@yandex-team.ru>
To see the list of authors who created the source code of ClickHouse, published and distributed by YANDEX LLC as the owner,
run "SELECT * FROM system.contributors;" query on any ClickHouse server.

View File

@ -352,7 +352,7 @@ include (cmake/find/simdjson.cmake)
include (cmake/find/rapidjson.cmake)
include (cmake/find/fastops.cmake)
include (cmake/find/orc.cmake)
include (cmake/find/replxx.cmake)
include (cmake/find/avro.cmake)
find_contrib_lib(cityhash)
find_contrib_lib(farmhash)

View File

@ -19,12 +19,12 @@ In order for us (YANDEX LLC) to accept patches and other contributions from you,
By adopting the CLA, you state the following:
* You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA,
* You has read the terms and conditions of the CLA and agree with them in full,
* You have read the terms and conditions of the CLA and agree with them in full,
* You are legally able to provide and license your contributions as stated,
* We may use your contributions for our open source projects and for any other our project too,
* We rely on your assurances concerning the rights of third parties in relation to your contributes.
* We rely on your assurances concerning the rights of third parties in relation to your contributions.
If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you has already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA.
If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you have already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA.
If you have already adopted terms and conditions of the CLA, you are able to provide your contributes. When you submit your pull request, please add the following information into it:

View File

@ -1,4 +1,4 @@
Copyright 2016-2019 Yandex LLC
Copyright 2016-2020 Yandex LLC
Apache License
Version 2.0, January 2004
@ -188,7 +188,7 @@ Copyright 2016-2019 Yandex LLC
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016-2019 Yandex LLC
Copyright 2016-2020 Yandex LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

28
cmake/find/avro.cmake Normal file
View File

@ -0,0 +1,28 @@
option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES})
if (ENABLE_AVRO)
option (USE_INTERNAL_AVRO_LIBRARY "Set to FALSE to use system avro library instead of bundled" ${NOT_UNBUNDLED})
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/avro/lang/c++/CMakeLists.txt")
if(USE_INTERNAL_AVRO_LIBRARY)
message(WARNING "submodule contrib/avro is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set(MISSING_INTERNAL_AVRO_LIBRARY 1)
set(USE_INTERNAL_AVRO_LIBRARY 0)
endif()
if (NOT USE_INTERNAL_AVRO_LIBRARY)
elseif(NOT MISSING_INTERNAL_AVRO_LIBRARY)
include(cmake/find/snappy.cmake)
set(AVROCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/avro/lang/c++/include")
set(AVROCPP_LIBRARY avrocpp)
endif ()
if (AVROCPP_LIBRARY AND AVROCPP_INCLUDE_DIR)
set(USE_AVRO 1)
endif()
endif()
message (STATUS "Using avro=${USE_AVRO}: ${AVROCPP_INCLUDE_DIR} : ${AVROCPP_LIBRARY}")

View File

@ -31,6 +31,7 @@ if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY)
set (Boost_SYSTEM_LIBRARY boost_system_internal)
set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal)
set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY})
set (Boost_IOSTREAMS_LIBRARY boost_iostreams_internal)
set (Boost_REGEX_LIBRARY boost_regex_internal)
set (Boost_INCLUDE_DIRS)
@ -48,4 +49,4 @@ if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY)
list (APPEND Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/boost")
endif ()
message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_REGEX_LIBRARY}")
message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_IOSTREAMS_LIBRARY},${Boost_REGEX_LIBRARY}")

View File

@ -14,6 +14,7 @@ if (NOT ENABLE_LIBRARIES)
set (ENABLE_POCO_REDIS ${ENABLE_LIBRARIES} CACHE BOOL "")
set (ENABLE_POCO_ODBC ${ENABLE_LIBRARIES} CACHE BOOL "")
set (ENABLE_POCO_SQL ${ENABLE_LIBRARIES} CACHE BOOL "")
set (ENABLE_POCO_JSON ${ENABLE_LIBRARIES} CACHE BOOL "")
endif ()
set (POCO_COMPONENTS Net XML SQL Data)
@ -34,6 +35,9 @@ if (NOT DEFINED ENABLE_POCO_ODBC OR ENABLE_POCO_ODBC)
list (APPEND POCO_COMPONENTS DataODBC)
list (APPEND POCO_COMPONENTS SQLODBC)
endif ()
if (NOT DEFINED ENABLE_POCO_JSON OR ENABLE_POCO_JSON)
list (APPEND POCO_COMPONENTS JSON)
endif ()
if (NOT USE_INTERNAL_POCO_LIBRARY)
find_package (Poco COMPONENTS ${POCO_COMPONENTS})
@ -112,6 +116,11 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY)
endif ()
endif ()
if (NOT DEFINED ENABLE_POCO_JSON OR ENABLE_POCO_JSON)
set (Poco_JSON_LIBRARY PocoJSON)
set (Poco_JSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/poco/JSON/include/")
endif ()
if (OPENSSL_FOUND AND (NOT DEFINED ENABLE_POCO_NETSSL OR ENABLE_POCO_NETSSL))
set (Poco_NetSSL_LIBRARY PocoNetSSL ${OPENSSL_LIBRARIES})
set (Poco_Crypto_LIBRARY PocoCrypto ${OPENSSL_LIBRARIES})
@ -145,8 +154,11 @@ endif ()
if (Poco_SQLODBC_LIBRARY AND ODBC_FOUND)
set (USE_POCO_SQLODBC 1)
endif ()
if (Poco_JSON_LIBRARY)
set (USE_POCO_JSON 1)
endif ()
message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY},${Poco_Redis_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}")
message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY},${Poco_Redis_LIBRARY},${Poco_JSON_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}, JSON=${USE_POCO_JSON}")
# How to make sutable poco:
# use branch:

View File

@ -1,40 +0,0 @@
option (ENABLE_REPLXX "Enable replxx support" ${NOT_UNBUNDLED})
if (ENABLE_REPLXX)
option (USE_INTERNAL_REPLXX "Use internal replxx library" ${NOT_UNBUNDLED})
if (USE_INTERNAL_REPLXX AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/replxx/README.md")
message (WARNING "submodule contrib/replxx is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_REPLXX 0)
endif ()
if (NOT USE_INTERNAL_REPLXX)
find_library(LIBRARY_REPLXX NAMES replxx replxx-static)
find_path(INCLUDE_REPLXX replxx.hxx)
add_library(replxx UNKNOWN IMPORTED)
set_property(TARGET replxx PROPERTY IMPORTED_LOCATION ${LIBRARY_REPLXX})
target_include_directories(replxx PUBLIC ${INCLUDE_REPLXX})
set(CMAKE_REQUIRED_LIBRARIES replxx)
check_cxx_source_compiles(
"
#include <replxx.hxx>
int main() {
replxx::Replxx rx;
}
"
EXTERNAL_REPLXX_WORKS
)
if (NOT EXTERNAL_REPLXX_WORKS)
message (FATAL_ERROR "replxx is unusable: ${LIBRARY_REPLXX} ${INCLUDE_REPLXX}")
endif ()
endif ()
set(USE_REPLXX 1)
message (STATUS "Using replxx")
else ()
set(USE_REPLXX 0)
endif ()

View File

@ -53,6 +53,7 @@ if (SANITIZE)
set (USE_CAPNP 0 CACHE BOOL "")
set (USE_INTERNAL_ORC_LIBRARY 0 CACHE BOOL "")
set (USE_ORC 0 CACHE BOOL "")
set (USE_AVRO 0 CACHE BOOL "")
set (ENABLE_SSL 0 CACHE BOOL "")
elseif (SANITIZE STREQUAL "thread")

View File

@ -146,6 +146,20 @@ if (ENABLE_ICU AND USE_INTERNAL_ICU_LIBRARY)
add_subdirectory (icu-cmake)
endif ()
if(USE_INTERNAL_SNAPPY_LIBRARY)
set(SNAPPY_BUILD_TESTS 0 CACHE INTERNAL "")
if (NOT MAKE_STATIC_LIBRARIES)
set(BUILD_SHARED_LIBS 1) # TODO: set at root dir
endif()
add_subdirectory(snappy)
set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy")
if(SANITIZE STREQUAL "undefined")
target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined)
endif()
endif()
if (USE_INTERNAL_PARQUET_LIBRARY)
if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
# We dont use arrow's cmakefiles because they uses too many depends and download some libs in compile time
@ -189,20 +203,6 @@ if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
endif()
else()
if(USE_INTERNAL_SNAPPY_LIBRARY)
set(SNAPPY_BUILD_TESTS 0 CACHE INTERNAL "")
if (NOT MAKE_STATIC_LIBRARIES)
set(BUILD_SHARED_LIBS 1) # TODO: set at root dir
endif()
add_subdirectory(snappy)
set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy")
if(SANITIZE STREQUAL "undefined")
target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined)
endif()
endif()
add_subdirectory(arrow-cmake)
# The library is large - avoid bloat.
@ -212,6 +212,10 @@ else()
endif()
endif()
if (USE_INTERNAL_AVRO_LIBRARY)
add_subdirectory(avro-cmake)
endif()
if (USE_INTERNAL_POCO_LIBRARY)
set (POCO_VERBOSE_MESSAGES 0 CACHE INTERNAL "")
set (save_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
@ -332,6 +336,4 @@ if (USE_FASTOPS)
add_subdirectory (fastops-cmake)
endif()
if (USE_INTERNAL_REPLXX)
add_subdirectory (replxx-cmake)
endif()
add_subdirectory(replxx-cmake)

1
contrib/avro vendored Submodule

@ -0,0 +1 @@
Subproject commit 5b2752041c8d2f75eb5c1dbec8b4c25fc0e24d12

View File

@ -0,0 +1,70 @@
set(AVROCPP_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++)
set(AVROCPP_INCLUDE_DIR ${AVROCPP_ROOT_DIR}/api)
set(AVROCPP_SOURCE_DIR ${AVROCPP_ROOT_DIR}/impl)
set (CMAKE_CXX_STANDARD 17)
if (EXISTS ${AVROCPP_ROOT_DIR}/../../share/VERSION.txt)
file(READ "${AVROCPP_ROOT_DIR}/../../share/VERSION.txt"
AVRO_VERSION)
endif()
string(REPLACE "\n" "" AVRO_VERSION ${AVRO_VERSION})
set (AVRO_VERSION_MAJOR ${AVRO_VERSION})
set (AVRO_VERSION_MINOR "0")
set (AVROCPP_SOURCE_FILES
${AVROCPP_SOURCE_DIR}/Compiler.cc
${AVROCPP_SOURCE_DIR}/Node.cc
${AVROCPP_SOURCE_DIR}/LogicalType.cc
${AVROCPP_SOURCE_DIR}/NodeImpl.cc
${AVROCPP_SOURCE_DIR}/ResolverSchema.cc
${AVROCPP_SOURCE_DIR}/Schema.cc
${AVROCPP_SOURCE_DIR}/Types.cc
${AVROCPP_SOURCE_DIR}/ValidSchema.cc
${AVROCPP_SOURCE_DIR}/Zigzag.cc
${AVROCPP_SOURCE_DIR}/BinaryEncoder.cc
${AVROCPP_SOURCE_DIR}/BinaryDecoder.cc
${AVROCPP_SOURCE_DIR}/Stream.cc
${AVROCPP_SOURCE_DIR}/FileStream.cc
${AVROCPP_SOURCE_DIR}/Generic.cc
${AVROCPP_SOURCE_DIR}/GenericDatum.cc
${AVROCPP_SOURCE_DIR}/DataFile.cc
${AVROCPP_SOURCE_DIR}/parsing/Symbol.cc
${AVROCPP_SOURCE_DIR}/parsing/ValidatingCodec.cc
${AVROCPP_SOURCE_DIR}/parsing/JsonCodec.cc
${AVROCPP_SOURCE_DIR}/parsing/ResolvingDecoder.cc
${AVROCPP_SOURCE_DIR}/json/JsonIO.cc
${AVROCPP_SOURCE_DIR}/json/JsonDom.cc
${AVROCPP_SOURCE_DIR}/Resolver.cc
${AVROCPP_SOURCE_DIR}/Validator.cc
)
add_library (avrocpp ${AVROCPP_SOURCE_FILES})
set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR})
target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR})
target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS})
target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY})
if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
target_compile_definitions (avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
target_include_directories (avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
target_link_libraries (avrocpp ${SNAPPY_LIBRARY})
endif ()
if (COMPILER_GCC)
set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor)
elseif (COMPILER_CLANG)
set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor)
endif ()
target_compile_options(avrocpp PRIVATE ${SUPPRESS_WARNINGS})
# create a symlink to include headers with <avro/...>
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
COMMAND ${CMAKE_COMMAND} -E make_directory ${AVROCPP_ROOT_DIR}/include
COMMAND ${CMAKE_COMMAND} -E create_symlink ${AVROCPP_ROOT_DIR}/api ${AVROCPP_ROOT_DIR}/include/avro
)
add_dependencies(avrocpp avro_symlink_headers)

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 830e51edb59c4f37a8638138581e1e56c29ac44f
Subproject commit 86be2aef20bee2356b744e5569eed6eaded85dbe

View File

@ -37,3 +37,8 @@ target_link_libraries(boost_filesystem_internal PRIVATE boost_system_internal)
if (USE_INTERNAL_PARQUET_LIBRARY)
add_boost_lib(regex)
endif()
if (USE_INTERNAL_AVRO_LIBRARY)
add_boost_lib(iostreams)
target_link_libraries(boost_iostreams_internal PUBLIC ${ZLIB_LIBRARIES})
endif()

View File

@ -23,6 +23,10 @@ typedef unsigned __int64 uint64_t;
#endif // !defined(_MSC_VER)
#ifdef __cplusplus
extern "C" {
#endif
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
@ -32,3 +36,7 @@ void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
//-----------------------------------------------------------------------------
#ifdef __cplusplus
}
#endif

View File

@ -1,18 +1,57 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx")
option (ENABLE_REPLXX "Enable replxx support" ${ENABLE_LIBRARIES})
set(SRCS
${LIBRARY_DIR}/src/conversion.cxx
${LIBRARY_DIR}/src/escape.cxx
${LIBRARY_DIR}/src/history.cxx
${LIBRARY_DIR}/src/io.cxx
${LIBRARY_DIR}/src/prompt.cxx
${LIBRARY_DIR}/src/replxx.cxx
${LIBRARY_DIR}/src/replxx_impl.cxx
${LIBRARY_DIR}/src/util.cxx
${LIBRARY_DIR}/src/wcwidth.cpp
${LIBRARY_DIR}/src/ConvertUTF.cpp
)
if (ENABLE_REPLXX)
option (USE_INTERNAL_REPLXX "Use internal replxx library" ${NOT_UNBUNDLED})
add_library(replxx ${SRCS})
target_include_directories(replxx PUBLIC ${LIBRARY_DIR}/include)
target_compile_options(replxx PUBLIC -Wno-documentation)
if (USE_INTERNAL_REPLXX)
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx")
set(SRCS
${LIBRARY_DIR}/src/conversion.cxx
${LIBRARY_DIR}/src/ConvertUTF.cpp
${LIBRARY_DIR}/src/escape.cxx
${LIBRARY_DIR}/src/history.cxx
${LIBRARY_DIR}/src/io.cxx
${LIBRARY_DIR}/src/prompt.cxx
${LIBRARY_DIR}/src/replxx_impl.cxx
${LIBRARY_DIR}/src/replxx.cxx
${LIBRARY_DIR}/src/util.cxx
${LIBRARY_DIR}/src/wcwidth.cpp
)
add_library (replxx ${SRCS})
target_include_directories(replxx PUBLIC ${LIBRARY_DIR}/include)
else ()
find_library(LIBRARY_REPLXX NAMES replxx replxx-static)
find_path(INCLUDE_REPLXX replxx.hxx)
add_library(replxx UNKNOWN IMPORTED)
set_property(TARGET replxx PROPERTY IMPORTED_LOCATION ${LIBRARY_REPLXX})
target_include_directories(replxx PUBLIC ${INCLUDE_REPLXX})
set(CMAKE_REQUIRED_LIBRARIES replxx)
check_cxx_source_compiles(
"
#include <replxx.hxx>
int main() {
replxx::Replxx rx;
}
"
EXTERNAL_REPLXX_WORKS
)
if (NOT EXTERNAL_REPLXX_WORKS)
message (FATAL_ERROR "replxx is unusable: ${LIBRARY_REPLXX} ${INCLUDE_REPLXX}")
endif ()
endif ()
target_compile_options(replxx PUBLIC -Wno-documentation)
target_compile_definitions(replxx PUBLIC USE_REPLXX=1)
message (STATUS "Using replxx")
else ()
add_library(replxx INTERFACE)
target_compile_definitions(replxx INTERFACE USE_REPLXX=0)
message (STATUS "Not using replxx (Beware! Runtime fallback to readline is possible!)")
endif ()

View File

@ -504,6 +504,10 @@ if (USE_POCO_NETSSL)
dbms_target_link_libraries (PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY})
endif()
if (USE_POCO_JSON)
dbms_target_link_libraries (PRIVATE ${Poco_JSON_LIBRARY})
endif()
dbms_target_link_libraries (PRIVATE ${Poco_Foundation_LIBRARY})
if (USE_ICU)
@ -522,6 +526,11 @@ if (USE_PARQUET)
endif ()
endif ()
if (USE_AVRO)
dbms_target_link_libraries(PRIVATE ${AVROCPP_LIBRARY})
dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${AVROCPP_INCLUDE_DIR})
endif ()
if (OPENSSL_CRYPTO_LIBRARY)
dbms_target_link_libraries (PRIVATE ${OPENSSL_CRYPTO_LIBRARY})
target_link_libraries (clickhouse_common_io PRIVATE ${OPENSSL_CRYPTO_LIBRARY})

View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do
echo -n "["
for i in $(seq 1 $TRIES); do
while true; do
RES=$(command time -f %e -o /dev/stdout curl -sS --location-trusted -H "Authorization: OAuth $YT_TOKEN" "$YT_PROXY.yt.yandex.net/query?default_format=Null&database=*$YT_CLIQUE_ID" --data-binary @- <<< "$query" 2>/dev/null) && break;
done
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do
echo -n "["
for i in $(seq 1 $TRIES); do
while true; do
RES=$(command time -f %e -o time ./yql --clickhouse --syntax-version 1 -f empty <<< "USE chyt.hume; PRAGMA max_memory_usage = 100000000000; PRAGMA max_memory_usage_for_all_queries = 100000000000; $query" >/dev/null 2>&1 && cat time) && break;
done
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -4,7 +4,7 @@ set(CLICKHOUSE_CLIENT_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/Suggest.cpp
)
set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY})
set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${Boost_PROGRAM_OPTIONS_LIBRARY})
include(CheckSymbolExists)
check_symbol_exists(readpassphrase readpassphrase.h HAVE_READPASSPHRASE)

View File

@ -2,6 +2,12 @@
#include "ConnectionParameters.h"
#include "Suggest.h"
#if USE_REPLXX
# include <common/ReplxxLineReader.h>
#else
# include <common/LineReader.h>
#endif
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
@ -19,7 +25,6 @@
#include <Poco/File.h>
#include <Poco/Util/Application.h>
#include <common/find_symbols.h>
#include <common/config_common.h>
#include <common/LineReader.h>
#include <Common/ClickHouseRevision.h>
#include <Common/Stopwatch.h>
@ -496,7 +501,11 @@ private:
if (!history_file.empty() && !Poco::File(history_file).exists())
Poco::File(history_file).createFile();
LineReader lr(&Suggest::instance(), history_file, '\\', config().has("multiline") ? ';' : 0);
#if USE_REPLXX
ReplxxLineReader lr(Suggest::instance(), history_file, '\\', config().has("multiline") ? ';' : 0);
#else
LineReader lr(history_file, '\\', config().has("multiline") ? ';' : 0);
#endif
do
{
@ -504,6 +513,12 @@ private:
if (input.empty())
break;
if (input.ends_with("\\G"))
{
input.resize(input.size() - 2);
has_vertical_output_suffix = true;
}
try
{
if (!process(input))

View File

@ -111,7 +111,7 @@ void LocalServer::tryInitPath()
/// In case of empty path set paths to helpful directories
std::string cd = Poco::Path::current();
context->setTemporaryPath(cd + "tmp");
context->setTemporaryStorage(cd + "tmp");
context->setFlagsPath(cd + "flags");
context->setUserFilesPath(""); // user's files are everywhere
}

View File

@ -17,6 +17,7 @@
#include <Common/setThreadName.h>
#include <Common/config.h>
#include <Common/SettingsChanges.h>
#include <Disks/DiskSpaceMonitor.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <IO/ReadBufferFromIStream.h>
@ -351,7 +352,8 @@ void HTTPHandler::processQuery(
if (buffer_until_eof)
{
std::string tmp_path_template = context.getTemporaryPath() + "http_buffers/";
const std::string tmp_path(context.getTemporaryVolume()->getNextDisk()->getPath());
const std::string tmp_path_template(tmp_path + "http_buffers/");
auto create_tmp_disk_buffer = [tmp_path_template] (const WriteBufferPtr &)
{
@ -590,7 +592,11 @@ void HTTPHandler::processQuery(
customizeContext(context);
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
[&response] (const String & content_type) { response.setContentType(content_type); },
[&response] (const String & content_type, const String & format)
{
response.setContentType(content_type);
response.add("X-ClickHouse-Format", format);
},
[&response] (const String & current_query_id) { response.add("X-ClickHouse-Query-Id", current_query_id); });
if (used_output.hasDelayed())

View File

@ -282,7 +282,8 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
else
{
bool with_output = false;
std::function<void(const String &)> set_content_type = [&with_output](const String &) -> void {
std::function<void(const String &, const String &)> set_content_type_and_format = [&with_output](const String &, const String &) -> void
{
with_output = true;
};
@ -305,7 +306,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
ReadBufferFromString replacement(replacement_query);
Context query_context = connection_context;
executeQuery(should_replace ? replacement : payload, *out, true, query_context, set_content_type, nullptr);
executeQuery(should_replace ? replacement : payload, *out, true, query_context, set_content_type_and_format, {});
if (!with_output)
packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true);

View File

@ -77,6 +77,31 @@ namespace CurrentMetrics
extern const Metric VersionInteger;
}
namespace
{
void setupTmpPath(Logger * log, const std::string & path)
{
LOG_DEBUG(log, "Setting up " << path << " to store temporary data in it");
Poco::File(path).createDirectories();
/// Clearing old temporary files.
Poco::DirectoryIterator dir_end;
for (Poco::DirectoryIterator it(path); it != dir_end; ++it)
{
if (it->isFile() && startsWith(it.name(), "tmp"))
{
LOG_DEBUG(log, "Removing old temporary file " << it->path());
it->remove();
}
else
LOG_DEBUG(log, "Skipped file in temporary path " << it->path());
}
}
}
namespace DB
{
@ -331,22 +356,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
DateLUT::instance();
LOG_TRACE(log, "Initialized DateLUT with time zone '" << DateLUT::instance().getTimeZone() << "'.");
/// Directory with temporary data for processing of heavy queries.
/// Storage with temporary data for processing of heavy queries.
{
std::string tmp_path = config().getString("tmp_path", path + "tmp/");
global_context->setTemporaryPath(tmp_path);
Poco::File(tmp_path).createDirectories();
/// Clearing old temporary files.
Poco::DirectoryIterator dir_end;
for (Poco::DirectoryIterator it(tmp_path); it != dir_end; ++it)
{
if (it->isFile() && startsWith(it.name(), "tmp"))
{
LOG_DEBUG(log, "Removing old temporary file " << it->path());
it->remove();
}
}
std::string tmp_policy = config().getString("tmp_policy", "");
const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy);
for (const DiskPtr & disk : volume->disks)
setupTmpPath(log, disk->getPath());
}
/** Directory with 'flags': files indicating temporary settings for the server set by system administrator.
@ -864,7 +881,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
for (auto & server : servers)
server->start();
setTextLog(global_context->getTextLog());
{
String level_str = config().getString("text_log.level", "");
int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str);
setTextLog(global_context->getTextLog(), level);
}
buildLoggers(config(), logger());
main_config_reloader->start();

View File

@ -591,11 +591,9 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads)
}
});
/// Wait in case of exception. Delete pipeline to release memory.
/// Wait in case of exception happened outside of pool.
SCOPE_EXIT(
/// Clear queue in case if somebody is waiting lazy_format to push.
lazy_format->finish();
lazy_format->clearQueue();
try
{
@ -604,72 +602,58 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads)
catch (...)
{
/// If exception was thrown during pipeline execution, skip it while processing other exception.
tryLogCurrentException(log);
}
/// pipeline = QueryPipeline()
);
while (true)
while (!lazy_format->isFinished() && !exception)
{
Block block;
while (true)
if (isQueryCancelled())
{
if (isQueryCancelled())
{
/// A packet was received requesting to stop execution of the request.
executor->cancel();
break;
}
else
{
if (after_send_progress.elapsed() / 1000 >= query_context->getSettingsRef().interactive_delay)
{
/// Some time passed and there is a progress.
after_send_progress.restart();
sendProgress();
}
sendLogs();
if ((block = lazy_format->getBlock(query_context->getSettingsRef().interactive_delay / 1000)))
break;
if (lazy_format->isFinished())
break;
if (exception)
{
pool.wait();
break;
}
}
}
/** If data has run out, we will send the profiling data and total values to
* the last zero block to be able to use
* this information in the suffix output of stream.
* If the request was interrupted, then `sendTotals` and other methods could not be called,
* because we have not read all the data yet,
* and there could be ongoing calculations in other threads at the same time.
*/
if (!block && !isQueryCancelled())
{
pool.wait();
pipeline.finalize();
sendTotals(lazy_format->getTotals());
sendExtremes(lazy_format->getExtremes());
sendProfileInfo(lazy_format->getProfileInfo());
sendProgress();
sendLogs();
}
sendData(block);
if (!block)
/// A packet was received requesting to stop execution of the request.
executor->cancel();
break;
}
if (after_send_progress.elapsed() / 1000 >= query_context->getSettingsRef().interactive_delay)
{
/// Some time passed and there is a progress.
after_send_progress.restart();
sendProgress();
}
sendLogs();
if (auto block = lazy_format->getBlock(query_context->getSettingsRef().interactive_delay / 1000))
{
if (!state.io.null_format)
sendData(block);
}
}
/// Finish lazy_format before waiting. Otherwise some thread may write into it, and waiting will lock.
lazy_format->finish();
pool.wait();
/** If data has run out, we will send the profiling data and total values to
* the last zero block to be able to use
* this information in the suffix output of stream.
* If the request was interrupted, then `sendTotals` and other methods could not be called,
* because we have not read all the data yet,
* and there could be ongoing calculations in other threads at the same time.
*/
if (!isQueryCancelled())
{
pipeline.finalize();
sendTotals(lazy_format->getTotals());
sendExtremes(lazy_format->getExtremes());
sendProfileInfo(lazy_format->getProfileInfo());
sendProgress();
sendLogs();
}
sendData({});
}
state.io.onFinish();

View File

@ -3,27 +3,27 @@
NOTE: User and query level settings are set up in "users.xml" file.
-->
<yandex>
<!-- The list of hosts allowed to use in URL-related storage engines and table functions.
If this section is not present in configuration, all hosts are allowed.
-->
<remote_url_allow_hosts>
<!-- Host should be specified exactly as in URL. The name is checked before DNS resolution.
Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
If port is explicitly specified in URL, the host:port is checked as a whole.
If host specified here without port, any port with this host allowed.
"yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
-->
<!-- The list of hosts allowed to use in URL-related storage engines and table functions.
If this section is not present in configuration, all hosts are allowed.
-->
<remote_url_allow_hosts>
<!-- Host should be specified exactly as in URL. The name is checked before DNS resolution.
Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
If port is explicitly specified in URL, the host:port is checked as a whole.
If host specified here without port, any port with this host allowed.
"yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
-->
<!-- Regular expression can be specified. RE2 engine is used for regexps.
Regexps are not aligned: don't forget to add ^ and $. Also don't forget to escape dot (.) metacharacter
(forgetting to do so is a common source of error).
-->
</remote_url_allow_hosts>
<!-- Regular expression can be specified. RE2 engine is used for regexps.
Regexps are not aligned: don't forget to add ^ and $. Also don't forget to escape dot (.) metacharacter
(forgetting to do so is a common source of error).
-->
</remote_url_allow_hosts>
<logger>
<!-- Possible levels: https://github.com/pocoproject/poco/blob/develop/Foundation/include/Poco/Logger.h#L105 -->
<!-- Possible levels: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105 -->
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
@ -34,6 +34,7 @@
<!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<!-- For HTTPS and SSL over native protocol. -->
<!--
<https_port>8443</https_port>
@ -132,6 +133,17 @@
<!-- Path to temporary data for processing hard queries. -->
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<!-- Policy from the <storage_configuration> for the temporary files.
If not set <tmp_path> is used, otherwise <tmp_path> is ignored.
Notes:
- move_factor is ignored
- keep_free_space_bytes is ignored
- max_data_part_size_bytes is ignored
- you must have exactly one volume in that policy
-->
<!-- <tmp_policy>tmp</tmp_policy> -->
<!-- Directory with user provided files that are accessible by 'file' table function. -->
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
@ -343,6 +355,11 @@
toStartOfHour(event_time)
-->
<partition_by>toYYYYMM(event_date)</partition_by>
<!-- Instead of partition_by, you can provide full engine expression (starting with ENGINE = ) with parameters,
Example: <engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<!-- Interval of flushing data. -->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
@ -377,10 +394,12 @@
<!-- Uncomment to write text log into table.
Text log contains all information from usual server log but stores it in structured and efficient way.
The level of the messages that goes to the table can be limited (<level>), if not specified all messages will go to the table.
<text_log>
<database>system</database>
<table>text_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<level></level>
</text_log>
-->

View File

@ -49,7 +49,7 @@
In first line will be password and in second - corresponding SHA256.
How to generate double SHA1:
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | openssl dgst -sha1 -binary | openssl dgst -sha1
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-'
In first line will be password and in second - corresponding double SHA1.
-->
<password></password>

View File

@ -309,7 +309,7 @@ protected:
/// Uses a DFA based approach in order to better handle patterns without
/// time assertions.
///
/// NOTE: This implementation relies on the assumption that the pattern are *small*.
/// NOTE: This implementation relies on the assumption that the pattern is *small*.
///
/// This algorithm performs in O(mn) (with m the number of DFA states and N the number
/// of events) with a memory consumption and memory allocations in O(m). It means that

View File

@ -50,16 +50,21 @@
*
* P.S. This is also required, because tcmalloc can not allocate a chunk of
* memory greater than 16 GB.
*
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
* to override it during linkage when using ClickHouse as a library in
* third-party applications which may already use own allocator doing mmaps
* in the implementation of alloc/realloc.
*/
#ifdef NDEBUG
static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
#else
/**
* In debug build, use small mmap threshold to reproduce more memory
* stomping bugs. Along with ASLR it will hopefully detect more issues than
* ASan. The program may fail due to the limit on number of memory mappings.
*/
static constexpr size_t MMAP_THRESHOLD = 4096;
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 4096;
#endif
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;

View File

@ -478,6 +478,7 @@ namespace ErrorCodes
extern const int FILE_ALREADY_EXISTS = 504;
extern const int CANNOT_DELETE_DIRECTORY = 505;
extern const int UNEXPECTED_ERROR_CODE = 506;
extern const int UNABLE_TO_SKIP_UNUSED_SHARDS = 507;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -195,7 +195,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
<< ", e.displayText() = " << e.displayText()
<< (with_stacktrace ? getExceptionStackTraceString(e) : "")
<< (with_extra_info ? getExtraExceptionInfo(e) : "")
<< " (version " << VERSION_STRING << VERSION_OFFICIAL;
<< " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
}
catch (...) {}
}

View File

@ -1,12 +1,13 @@
#include <re2/re2.h>
#include <Common/RemoteHostFilter.h>
#include <Poco/URI.h>
#include <Formats/FormatFactory.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Formats/FormatFactory.h>
#include <Common/RemoteHostFilter.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes

View File

@ -1,17 +1,19 @@
#pragma once
#include <string>
#include <vector>
#include <unordered_set>
#include <Poco/URI.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace Poco { class URI; }
namespace Poco { namespace Util { class AbstractConfiguration; } }
namespace DB
{
class RemoteHostFilter
{
/**
* This class checks if url is allowed.
* This class checks if URL is allowed.
* If primary_hosts and regexp_hosts are empty all urls are allowed.
*/
public:
@ -25,6 +27,7 @@ private:
std::unordered_set<std::string> primary_hosts; /// Allowed primary (<host>) URL from config.xml
std::vector<std::string> regexp_hosts; /// Allowed regexp (<hots_regexp>) URL from config.xml
bool checkForDirectEntry(const std::string & str) const; /// Checks if the primary_hosts and regexp_hosts contain str. If primary_hosts and regexp_hosts are empty return true.
/// Checks if the primary_hosts and regexp_hosts contain str. If primary_hosts and regexp_hosts are empty return true.
bool checkForDirectEntry(const std::string & str) const;
};
}

View File

@ -23,7 +23,14 @@ namespace DB
static thread_local void * stack_address = nullptr;
static thread_local size_t max_stack_size = 0;
void checkStackSize()
/** It works fine when interpreters are instantiated by ClickHouse code in properly prepared threads,
* but there are cases when ClickHouse runs as a library inside another application.
* If application is using user-space lightweight threads with manually allocated stacks,
* current implementation is not reasonable, as it has no way to properly check the remaining
* stack size without knowing the details of how stacks are allocated.
* We mark this function as weak symbol to be able to replace it in another ClickHouse-based products.
*/
__attribute__((__weak__)) void checkStackSize()
{
using namespace DB;

View File

@ -53,6 +53,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingUInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
M(SettingUInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
M(SettingUInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algo support it). 0 means unlimited.", 0) \
M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. By default, it is determined automatically.", 0) \
M(SettingMaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(SettingMaxThreads, max_alter_threads, 0, "The maximum number of threads to execute the ALTER requests. By default, it is determined automatically.", 0) \
M(SettingUInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
@ -111,6 +112,7 @@ struct Settings : public SettingsCollection<Settings>
\
M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \
M(SettingBool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
M(SettingUInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
\
M(SettingBool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \
M(SettingUInt64, min_chunk_bytes_for_parallel_parsing, (1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \
@ -187,6 +189,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
M(SettingBool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
M(SettingBool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
M(SettingString, input_format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \
\
M(SettingBool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \
\
@ -198,6 +201,8 @@ struct Settings : public SettingsCollection<Settings>
M(SettingUInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
M(SettingBool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \
M(SettingUInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \
M(SettingString, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
M(SettingUInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
\
M(SettingBool, use_client_time_zone, false, "Use client timezone for interpreting DateTime string values, instead of adopting server timezone.", 0) \
\
@ -360,7 +365,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.", 0) \
M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql' and 'odbc' table functions.", 0) \
\
M(SettingBool, experimental_use_processors, false, "Use processors pipeline.", 0) \
M(SettingBool, experimental_use_processors, true, "Use processors pipeline.", 0) \
\
M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.", 0) \
M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \

View File

@ -62,7 +62,7 @@ void SettingNumber<Type>::set(const Field & x)
template <typename Type>
void SettingNumber<Type>::set(const String & x)
{
set(completeParse<Type>(x));
set(parseWithSizeSuffix<Type>(x));
}
template <>

View File

@ -31,7 +31,6 @@ enum class TypeIndex
Float64,
Date,
DateTime,
DateTime32 = DateTime,
DateTime64,
String,
FixedString,
@ -158,8 +157,6 @@ using Decimal32 = Decimal<Int32>;
using Decimal64 = Decimal<Int64>;
using Decimal128 = Decimal<Int128>;
// TODO (nemkov): consider making a strong typedef
//using DateTime32 = time_t;
using DateTime64 = Decimal64;
template <> struct TypeName<Decimal32> { static const char * get() { return "Decimal32"; } };

View File

@ -10,5 +10,6 @@
#cmakedefine01 USE_POCO_DATAODBC
#cmakedefine01 USE_POCO_MONGODB
#cmakedefine01 USE_POCO_REDIS
#cmakedefine01 USE_POCO_JSON
#cmakedefine01 USE_INTERNAL_LLVM_LIBRARY
#cmakedefine01 USE_SSL

View File

@ -66,6 +66,8 @@ struct BlockIO
finish_callback = rhs.finish_callback;
exception_callback = rhs.exception_callback;
null_format = rhs.null_format;
return *this;
}
};

View File

@ -12,5 +12,6 @@ class IBlockOutputStream;
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
using BlockInputStreams = std::vector<BlockInputStreamPtr>;
using BlockOutputStreamPtr = std::shared_ptr<IBlockOutputStream>;
using BlockOutputStreams = std::vector<BlockOutputStreamPtr>;
}

View File

@ -7,6 +7,7 @@
#include <IO/WriteBufferFromFile.h>
#include <Compression/CompressedWriteBuffer.h>
#include <Interpreters/sortBlock.h>
#include <Disks/DiskSpaceMonitor.h>
namespace ProfileEvents
@ -21,10 +22,10 @@ namespace DB
MergeSortingBlockInputStream::MergeSortingBlockInputStream(
const BlockInputStreamPtr & input, SortDescription & description_,
size_t max_merged_block_size_, UInt64 limit_, size_t max_bytes_before_remerge_,
size_t max_bytes_before_external_sort_, const std::string & tmp_path_, size_t min_free_disk_space_)
size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_, size_t min_free_disk_space_)
: description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_),
max_bytes_before_remerge(max_bytes_before_remerge_),
max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_path(tmp_path_),
max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_),
min_free_disk_space(min_free_disk_space_)
{
children.push_back(input);
@ -78,10 +79,14 @@ Block MergeSortingBlockInputStream::readImpl()
*/
if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort)
{
if (!enoughSpaceInDirectory(tmp_path, sum_bytes_in_blocks + min_free_disk_space))
throw Exception("Not enough space for external sort in " + tmp_path, ErrorCodes::NOT_ENOUGH_SPACE);
size_t size = sum_bytes_in_blocks + min_free_disk_space;
auto reservation = tmp_volume->reserve(size);
if (!reservation)
throw Exception("Not enough space for external sort in temporary storage", ErrorCodes::NOT_ENOUGH_SPACE);
const std::string tmp_path(reservation->getDisk()->getPath());
temporary_files.emplace_back(createTemporaryFile(tmp_path));
const std::string & path = temporary_files.back()->path();
MergeSortingBlocksBlockInputStream block_in(blocks, description, max_merged_block_size, limit);

View File

@ -18,6 +18,9 @@ namespace DB
struct TemporaryFileStream;
class Volume;
using VolumePtr = std::shared_ptr<Volume>;
namespace ErrorCodes
{
extern const int NOT_ENOUGH_SPACE;
@ -77,7 +80,7 @@ public:
MergeSortingBlockInputStream(const BlockInputStreamPtr & input, SortDescription & description_,
size_t max_merged_block_size_, UInt64 limit_,
size_t max_bytes_before_remerge_,
size_t max_bytes_before_external_sort_, const std::string & tmp_path_,
size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_,
size_t min_free_disk_space_);
String getName() const override { return "MergeSorting"; }
@ -97,7 +100,7 @@ private:
size_t max_bytes_before_remerge;
size_t max_bytes_before_external_sort;
const std::string tmp_path;
VolumePtr tmp_volume;
size_t min_free_disk_space;
Logger * log = &Logger::get("MergeSortingBlockInputStream");

View File

@ -21,9 +21,19 @@ class NullAndDoCopyBlockInputStream : public IBlockInputStream
{
public:
NullAndDoCopyBlockInputStream(const BlockInputStreamPtr & input_, BlockOutputStreamPtr output_)
: input(input_), output(output_)
{
children.push_back(input_);
input_streams.push_back(input_);
output_streams.push_back(output_);
for (auto & input_stream : input_streams)
children.push_back(input_stream);
}
NullAndDoCopyBlockInputStream(const BlockInputStreams & input_, BlockOutputStreams & output_)
: input_streams(input_), output_streams(output_)
{
for (auto & input_stream : input_)
children.push_back(input_stream);
}
/// Suppress readPrefix and readSuffix, because they are called by copyData.
@ -39,13 +49,20 @@ public:
protected:
Block readImpl() override
{
copyData(*input, *output);
/// We do not use cancel flag here.
/// If query was cancelled, it will be processed by child streams.
/// Part of the data will be processed.
if (input_streams.size() == 1 && output_streams.size() == 1)
copyData(*input_streams.at(0), *output_streams.at(0));
else
copyData(input_streams, output_streams);
return Block();
}
private:
BlockInputStreamPtr input;
BlockOutputStreamPtr output;
BlockInputStreams input_streams;
BlockOutputStreams output_streams;
};
}

View File

@ -56,9 +56,24 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
StoragePtr inner_table = materialized_view->getTargetTable();
auto inner_table_id = inner_table->getStorageID();
query = materialized_view->getInnerQuery();
std::unique_ptr<ASTInsertQuery> insert = std::make_unique<ASTInsertQuery>();
insert->database = inner_table_id.database_name;
insert->table = inner_table_id.table_name;
/// Get list of columns we get from select query.
auto header = InterpreterSelectQuery(query, *views_context, SelectQueryOptions().analyze())
.getSampleBlock();
/// Insert only columns returned by select.
auto list = std::make_shared<ASTExpressionList>();
for (auto & column : header)
/// But skip columns which storage doesn't have.
if (inner_table->hasColumn(column.name))
list->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
insert->columns = std::move(list);
ASTPtr insert_query_ptr(insert.release());
InterpreterInsertQuery interpreter(insert_query_ptr, *views_context);
BlockIO io = interpreter.execute();

View File

@ -70,7 +70,7 @@ bool TTLBlockInputStream::isTTLExpired(time_t ttl)
Block TTLBlockInputStream::readImpl()
{
/// Skip all data if table ttl is expired for part
if (storage.hasTableTTL() && isTTLExpired(old_ttl_infos.table_ttl.max))
if (storage.hasRowsTTL() && isTTLExpired(old_ttl_infos.table_ttl.max))
{
rows_removed = data_part->rows_count;
return {};
@ -80,7 +80,7 @@ Block TTLBlockInputStream::readImpl()
if (!block)
return block;
if (storage.hasTableTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min)))
if (storage.hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min)))
removeRowsWithExpiredTableTTL(block);
removeValuesWithExpiredColumnTTL(block);
@ -106,10 +106,10 @@ void TTLBlockInputStream::readSuffixImpl()
void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
{
storage.ttl_table_entry.expression->execute(block);
storage.rows_ttl_entry.expression->execute(block);
const IColumn * ttl_column =
block.getByName(storage.ttl_table_entry.result_column).column.get();
block.getByName(storage.rows_ttl_entry.result_column).column.get();
const auto & column_names = header.getNames();
MutableColumns result_columns;

View File

@ -1,6 +1,10 @@
#include <thread>
#include <DataStreams/IBlockInputStream.h>
#include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/copyData.h>
#include <DataStreams/ParallelInputsProcessor.h>
#include <Common/ConcurrentBoundedQueue.h>
#include <Common/ThreadPool.h>
namespace DB
@ -51,6 +55,79 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCall
inline void doNothing(const Block &) {}
namespace
{
struct ParallelInsertsHandler
{
using CencellationHook = std::function<void()>;
explicit ParallelInsertsHandler(BlockOutputStreams & output_streams, CencellationHook cancellation_hook_, size_t num_threads)
: outputs(output_streams.size()), cancellation_hook(std::move(cancellation_hook_))
{
exceptions.resize(num_threads);
for (auto & output : output_streams)
outputs.push(output.get());
}
void onBlock(Block & block, size_t /*thread_num*/)
{
IBlockOutputStream * out = nullptr;
outputs.pop(out);
out->write(block);
outputs.push(out);
}
void onFinishThread(size_t /*thread_num*/) {}
void onFinish() {}
void onException(std::exception_ptr & exception, size_t thread_num)
{
exceptions[thread_num] = exception;
cancellation_hook();
}
void rethrowFirstException()
{
for (auto & exception : exceptions)
if (exception)
std::rethrow_exception(exception);
}
ConcurrentBoundedQueue<IBlockOutputStream *> outputs;
std::vector<std::exception_ptr> exceptions;
CencellationHook cancellation_hook;
};
}
static void copyDataImpl(BlockInputStreams & inputs, BlockOutputStreams & outputs)
{
for (auto & output : outputs)
output->writePrefix();
using Processor = ParallelInputsProcessor<ParallelInsertsHandler>;
Processor * processor_ptr = nullptr;
ParallelInsertsHandler handler(outputs, [&processor_ptr]() { processor_ptr->cancel(false); }, inputs.size());
ParallelInputsProcessor<ParallelInsertsHandler> processor(inputs, nullptr, inputs.size(), handler);
processor_ptr = &processor;
processor.process();
processor.wait();
handler.rethrowFirstException();
/// readPrefix is called in ParallelInputsProcessor.
for (auto & input : inputs)
input->readSuffix();
for (auto & output : outputs)
output->writeSuffix();
}
void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<bool> * is_cancelled)
{
auto is_cancelled_pred = [is_cancelled] ()
@ -61,6 +138,10 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<boo
copyDataImpl(from, to, is_cancelled_pred, doNothing);
}
void copyData(BlockInputStreams & inputs, BlockOutputStreams & outputs)
{
copyDataImpl(inputs, outputs);
}
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled)
{

View File

@ -16,6 +16,8 @@ class Block;
*/
void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<bool> * is_cancelled = nullptr);
void copyData(BlockInputStreams & inputs, BlockOutputStreams & outputs);
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled);
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,

View File

@ -111,6 +111,12 @@ Volume::Volume(
<< " < " << formatReadableSizeWithBinarySuffix(MIN_PART_SIZE) << ")");
}
DiskPtr Volume::getNextDisk()
{
size_t start_from = last_used.fetch_add(1u, std::memory_order_relaxed);
size_t index = start_from % disks.size();
return disks[index];
}
ReservationPtr Volume::reserve(UInt64 expected_size)
{

View File

@ -67,6 +67,13 @@ public:
const String & config_prefix,
const DiskSelector & disk_selector);
/// Next disk (round-robin)
///
/// - Used with policy for temporary data
/// - Ignores all limitations
/// - Shares last access with reserve()
DiskPtr getNextDisk();
/// Uses Round-robin to choose disk for reservation.
/// Returns valid reservation or nullptr if there is no space left on any disk.
ReservationPtr reserve(UInt64 bytes) override;

View File

@ -68,6 +68,7 @@ static FormatSettings getInputFormatSetting(const Settings & settings, const Con
format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter;
format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter;
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
format_settings.avro.schema_registry_url = settings.input_format_avro_schema_registry_url;
return format_settings;
}
@ -99,6 +100,8 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co
format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter;
format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter;
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
format_settings.avro.output_codec = settings.output_format_avro_codec;
format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval;
return format_settings;
}
@ -325,6 +328,8 @@ FormatFactory::FormatFactory()
registerInputFormatProcessorORC(*this);
registerInputFormatProcessorParquet(*this);
registerOutputFormatProcessorParquet(*this);
registerInputFormatProcessorAvro(*this);
registerOutputFormatProcessorAvro(*this);
registerInputFormatProcessorTemplate(*this);
registerOutputFormatProcessorTemplate(*this);

View File

@ -166,6 +166,8 @@ void registerInputFormatProcessorORC(FormatFactory & factory);
void registerOutputFormatProcessorParquet(FormatFactory & factory);
void registerInputFormatProcessorProtobuf(FormatFactory & factory);
void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
void registerInputFormatProcessorAvro(FormatFactory & factory);
void registerOutputFormatProcessorAvro(FormatFactory & factory);
void registerInputFormatProcessorTemplate(FormatFactory & factory);
void registerOutputFormatProcessorTemplate(FormatFactory &factory);

View File

@ -110,6 +110,16 @@ struct FormatSettings
};
Custom custom;
struct Avro
{
String schema_registry_url;
String output_codec;
UInt64 output_sync_interval = 16 * 1024;
};
Avro avro;
};
}

View File

@ -16,11 +16,11 @@ namespace ErrorCodes
ParsedTemplateFormatString::ParsedTemplateFormatString(const FormatSchemaInfo & schema, const ColumnIdxGetter & idx_by_name)
{
ReadBufferFromFile schema_file(schema.absoluteSchemaPath(), 4096);
String format_string;
readStringUntilEOF(format_string, schema_file);
try
{
ReadBufferFromFile schema_file(schema.absoluteSchemaPath(), 4096);
String format_string;
readStringUntilEOF(format_string, schema_file);
parse(format_string, idx_by_name);
}
catch (DB::Exception & e)
@ -193,7 +193,7 @@ const char * ParsedTemplateFormatString::readMayBeQuotedColumnNameInto(const cha
String ParsedTemplateFormatString::dump() const
{
WriteBufferFromOwnString res;
res << "Delimiter " << 0 << ": ";
res << "\nDelimiter " << 0 << ": ";
verbosePrintString(delimiters.front().c_str(), delimiters.front().c_str() + delimiters.front().size(), res);
size_t num_columns = std::max(formats.size(), format_idx_to_column_idx.size());

View File

@ -2,6 +2,7 @@
// .h autogenerated by cmake!
#cmakedefine01 USE_AVRO
#cmakedefine01 USE_CAPNP
#cmakedefine01 USE_SNAPPY
#cmakedefine01 USE_PARQUET

View File

@ -746,6 +746,23 @@ inline void readBinary(Decimal128 & x, ReadBuffer & buf) { readPODBinary(x, buf)
inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); }
template <typename T>
inline std::enable_if_t<is_arithmetic_v<T> && (sizeof(T) <= 8), void>
readBinaryBigEndian(T & x, ReadBuffer & buf) /// Assuming little endian architecture.
{
readPODBinary(x, buf);
if constexpr (sizeof(x) == 1)
return;
else if constexpr (sizeof(x) == 2)
x = __builtin_bswap16(x);
else if constexpr (sizeof(x) == 4)
x = __builtin_bswap32(x);
else if constexpr (sizeof(x) == 8)
x = __builtin_bswap64(x);
}
/// Generic methods to read value in text tab-separated format.
template <typename T>
inline std::enable_if_t<is_integral_v<T>, void>
@ -955,28 +972,78 @@ inline T parse(const char * data, size_t size)
return res;
}
/// Read something from text format, but expect complete parse of given text
/// For example: 723145 -- ok, 213MB -- not ok
template <typename T>
inline T completeParse(const char * data, size_t size)
inline std::enable_if_t<!is_integral_v<T>, void>
readTextWithSizeSuffix(T & x, ReadBuffer & buf) { readText(x, buf); }
template <typename T>
inline std::enable_if_t<is_integral_v<T>, void>
readTextWithSizeSuffix(T & x, ReadBuffer & buf)
{
readIntText(x, buf);
if (buf.eof())
return;
/// Updates x depending on the suffix
auto finish = [&buf, &x] (UInt64 base, int power_of_two) mutable
{
++buf.position();
if (buf.eof())
{
x *= base; /// For decimal suffixes, such as k, M, G etc.
}
else if (*buf.position() == 'i')
{
x = (x << power_of_two); /// For binary suffixes, such as ki, Mi, Gi, etc.
++buf.position();
}
return;
};
switch (*buf.position())
{
case 'k': [[fallthrough]];
case 'K':
finish(1000, 10);
break;
case 'M':
finish(1000000, 20);
break;
case 'G':
finish(1000000000, 30);
break;
case 'T':
finish(1000000000000ULL, 40);
break;
default:
return;
}
return;
}
/// Read something from text format and trying to parse the suffix.
/// If the suffix is not valid gives an error
/// For example: 723145 -- ok, 213MB -- not ok, but 213Mi -- ok
template <typename T>
inline T parseWithSizeSuffix(const char * data, size_t size)
{
T res;
ReadBufferFromMemory buf(data, size);
readText(res, buf);
readTextWithSizeSuffix(res, buf);
assertEOF(buf);
return res;
}
template <typename T>
inline T completeParse(const String & s)
inline T parseWithSizeSuffix(const String & s)
{
return completeParse<T>(s.data(), s.size());
return parseWithSizeSuffix<T>(s.data(), s.size());
}
template <typename T>
inline T completeParse(const char * data)
inline T parseWithSizeSuffix(const char * data)
{
return completeParse<T>(data, strlen(data));
return parseWithSizeSuffix<T>(data, strlen(data));
}
template <typename T>

View File

@ -101,7 +101,13 @@ inline bool readDigits(ReadBuffer & buf, T & x, unsigned int & digits, int & exp
{
++buf.position();
Int32 addition_exp = 0;
readIntText(addition_exp, buf);
if (!tryReadIntText(addition_exp, buf))
{
if constexpr (_throw_on_error)
throw Exception("Cannot parse exponent while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);
else
return false;
}
exponent += addition_exp;
stop = true;
continue;

View File

@ -28,6 +28,7 @@
#include <common/config_common.h>
#include <AggregateFunctions/AggregateFunctionArray.h>
#include <AggregateFunctions/AggregateFunctionState.h>
#include <Disks/DiskSpaceMonitor.h>
namespace ProfileEvents
@ -681,22 +682,25 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
&& current_memory_usage > static_cast<Int64>(params.max_bytes_before_external_group_by)
&& worth_convert_to_two_level)
{
if (!enoughSpaceInDirectory(params.tmp_path, current_memory_usage + params.min_free_disk_space))
throw Exception("Not enough space for external aggregation in " + params.tmp_path, ErrorCodes::NOT_ENOUGH_SPACE);
size_t size = current_memory_usage + params.min_free_disk_space;
auto reservation = params.tmp_volume->reserve(size);
if (!reservation)
throw Exception("Not enough space for external aggregation in temporary storage", ErrorCodes::NOT_ENOUGH_SPACE);
writeToTemporaryFile(result);
const std::string tmp_path(reservation->getDisk()->getPath());
writeToTemporaryFile(result, tmp_path);
}
return true;
}
void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants)
void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path)
{
Stopwatch watch;
size_t rows = data_variants.size();
auto file = createTemporaryFile(params.tmp_path);
auto file = createTemporaryFile(tmp_path);
const std::string & path = file->path();
WriteBufferFromFile file_buf(path);
CompressedWriteBuffer compressed_buf(file_buf);
@ -753,6 +757,10 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants)
<< (uncompressed_bytes / elapsed_seconds / 1048576.0) << " MiB/sec. uncompressed, "
<< (compressed_bytes / elapsed_seconds / 1048576.0) << " MiB/sec. compressed)");
}
void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants)
{
return writeToTemporaryFile(data_variants, params.tmp_volume->getNextDisk()->getPath());
}
template <typename Method>

View File

@ -46,6 +46,8 @@ namespace ErrorCodes
class IBlockOutputStream;
class Volume;
using VolumePtr = std::shared_ptr<Volume>;
/** Different data structures that can be used for aggregation
* For efficiency, the aggregation data itself is put into the pool.
@ -860,7 +862,7 @@ public:
/// Return empty result when aggregating without keys on empty set.
bool empty_result_for_aggregation_by_empty_set;
const std::string tmp_path;
VolumePtr tmp_volume;
/// Settings is used to determine cache size. No threads are created.
size_t max_threads;
@ -873,7 +875,7 @@ public:
size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_,
size_t max_bytes_before_external_group_by_,
bool empty_result_for_aggregation_by_empty_set_,
const std::string & tmp_path_, size_t max_threads_,
VolumePtr tmp_volume_, size_t max_threads_,
size_t min_free_disk_space_)
: src_header(src_header_),
keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()),
@ -881,7 +883,7 @@ public:
group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_),
max_bytes_before_external_group_by(max_bytes_before_external_group_by_),
empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_),
tmp_path(tmp_path_), max_threads(max_threads_),
tmp_volume(tmp_volume_), max_threads(max_threads_),
min_free_disk_space(min_free_disk_space_)
{
}
@ -889,7 +891,7 @@ public:
/// Only parameters that matter during merge.
Params(const Block & intermediate_header_,
const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_)
: Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, "", max_threads_, 0)
: Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0)
{
intermediate_header = intermediate_header_;
}
@ -955,6 +957,7 @@ public:
void setCancellationHook(const CancellationHook cancellation_hook);
/// For external aggregation.
void writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path);
void writeToTemporaryFile(AggregatedDataVariants & data_variants);
bool hasTemporaryFiles() const { return !temporary_files.empty(); }

View File

@ -19,7 +19,7 @@ namespace ErrorCodes
extern const int PARAMETER_OUT_OF_BOUND;
}
AnalyzedJoin::AnalyzedJoin(const Settings & settings, const String & tmp_path_)
AnalyzedJoin::AnalyzedJoin(const Settings & settings, VolumePtr tmp_volume_)
: size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode})
, default_max_bytes(settings.default_max_bytes_in_join)
, join_use_nulls(settings.join_use_nulls)
@ -27,7 +27,7 @@ AnalyzedJoin::AnalyzedJoin(const Settings & settings, const String & tmp_path_)
, partial_merge_join(settings.partial_merge_join)
, partial_merge_join_optimizations(settings.partial_merge_join_optimizations)
, partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks)
, tmp_path(tmp_path_)
, tmp_volume(tmp_volume_)
{}
void AnalyzedJoin::addUsingKey(const ASTPtr & ast)

View File

@ -21,6 +21,9 @@ class Block;
struct Settings;
class Volume;
using VolumePtr = std::shared_ptr<Volume>;
class AnalyzedJoin
{
/** Query of the form `SELECT expr(x) AS k FROM t1 ANY LEFT JOIN (SELECT expr(x) AS k FROM t2) USING k`
@ -62,10 +65,10 @@ class AnalyzedJoin
/// Original name -> name. Only ranamed columns.
std::unordered_map<String, String> renames;
String tmp_path;
VolumePtr tmp_volume;
public:
AnalyzedJoin(const Settings &, const String & tmp_path);
AnalyzedJoin(const Settings &, VolumePtr tmp_volume);
/// for StorageJoin
AnalyzedJoin(SizeLimits limits, bool use_nulls, ASTTableJoin::Kind kind, ASTTableJoin::Strictness strictness,
@ -82,7 +85,7 @@ public:
ASTTableJoin::Kind kind() const { return table_join.kind; }
ASTTableJoin::Strictness strictness() const { return table_join.strictness; }
const SizeLimits & sizeLimits() const { return size_limits; }
const String & getTemporaryPath() const { return tmp_path; }
VolumePtr getTemporaryVolume() { return tmp_volume; }
bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }
bool forceNullableLeft() const { return join_use_nulls && isRightOrFull(table_join.kind); }

View File

@ -22,6 +22,7 @@
#include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/CompressionCodecSelector.h>
#include <Disks/DiskLocal.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/ActionLocksManager.h>
#include <Core/Settings.h>
@ -95,6 +96,7 @@ namespace ErrorCodes
extern const int SCALAR_ALREADY_EXISTS;
extern const int UNKNOWN_SCALAR;
extern const int NOT_ENOUGH_PRIVILEGES;
extern const int UNKNOWN_POLICY;
}
@ -123,12 +125,14 @@ struct ContextShared
String interserver_scheme; /// http or https
String path; /// Path to the data directory, with a slash at the end.
String tmp_path; /// The path to the temporary files that occur when processing the request.
String flags_path; /// Path to the directory with some control flags for server maintenance.
String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function.
String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries.
ConfigurationPtr config; /// Global configuration settings.
String tmp_path; /// Path to the temporary files that occur when processing the request.
mutable VolumePtr tmp_volume; /// Volume for the the temporary files that occur when processing the request.
Databases databases; /// List of databases and tables in them.
mutable std::optional<EmbeddedDictionaries> embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization.
mutable std::optional<ExternalDictionariesLoader> external_dictionaries_loader;
@ -151,9 +155,9 @@ struct ContextShared
std::unique_ptr<DDLWorker> ddl_worker; /// Process ddl commands from zk.
/// Rules for selecting the compression settings, depending on the size of the part.
mutable std::unique_ptr<CompressionCodecSelector> compression_codec_selector;
/// Storage disk chooser
/// Storage disk chooser for MergeTree engines
mutable std::unique_ptr<DiskSelector> merge_tree_disk_selector;
/// Storage policy chooser
/// Storage policy chooser for MergeTree engines
mutable std::unique_ptr<StoragePolicySelector> merge_tree_storage_policy_selector;
std::optional<MergeTreeSettings> merge_tree_settings; /// Settings of MergeTree* engines.
@ -527,12 +531,6 @@ String Context::getPath() const
return shared->path;
}
String Context::getTemporaryPath() const
{
auto lock = getLock();
return shared->tmp_path;
}
String Context::getFlagsPath() const
{
auto lock = getLock();
@ -551,13 +549,19 @@ String Context::getDictionariesLibPath() const
return shared->dictionaries_lib_path;
}
VolumePtr Context::getTemporaryVolume() const
{
auto lock = getLock();
return shared->tmp_volume;
}
void Context::setPath(const String & path)
{
auto lock = getLock();
shared->path = path;
if (shared->tmp_path.empty())
if (shared->tmp_path.empty() && !shared->tmp_volume)
shared->tmp_path = shared->path + "tmp/";
if (shared->flags_path.empty())
@ -570,10 +574,31 @@ void Context::setPath(const String & path)
shared->dictionaries_lib_path = shared->path + "dictionaries_lib/";
}
void Context::setTemporaryPath(const String & path)
VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name)
{
auto lock = getLock();
shared->tmp_path = path;
if (policy_name.empty())
{
shared->tmp_path = path;
if (!shared->tmp_path.ends_with('/'))
shared->tmp_path += '/';
auto disk = std::make_shared<DiskLocal>("_tmp_default", shared->tmp_path, 0);
shared->tmp_volume = std::make_shared<Volume>("_tmp_default", std::vector<DiskPtr>{disk}, 0);
}
else
{
StoragePolicyPtr tmp_policy = getStoragePolicySelector()[policy_name];
if (tmp_policy->getVolumes().size() != 1)
throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
shared->tmp_volume = tmp_policy->getVolume(0);
}
if (!shared->tmp_volume->disks.size())
throw Exception("No disks volume for temporary files", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
return shared->tmp_volume;
}
void Context::setFlagsPath(const String & path)

View File

@ -91,6 +91,9 @@ class StoragePolicySelector;
class IOutputFormat;
using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
class Volume;
using VolumePtr = std::shared_ptr<Volume>;
#if USE_EMBEDDED_COMPILER
class CompiledExpressionCache;
@ -195,17 +198,19 @@ public:
~Context();
String getPath() const;
String getTemporaryPath() const;
String getFlagsPath() const;
String getUserFilesPath() const;
String getDictionariesLibPath() const;
VolumePtr getTemporaryVolume() const;
void setPath(const String & path);
void setTemporaryPath(const String & path);
void setFlagsPath(const String & path);
void setUserFilesPath(const String & path);
void setDictionariesLibPath(const String & path);
VolumePtr setTemporaryStorage(const String & path, const String & policy_name = "");
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
/// Global application configuration settings.

View File

@ -540,6 +540,7 @@ public:
Strings getAllTriedToLoadNames() const
{
std::lock_guard lock{mutex};
Strings names;
for (auto & [name, info] : infos)
if (info.triedToLoad())

View File

@ -96,63 +96,95 @@ Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const
BlockIO InterpreterInsertQuery::execute()
{
const Settings & settings = context.getSettingsRef();
const auto & query = query_ptr->as<ASTInsertQuery &>();
checkAccess(query);
BlockIO res;
StoragePtr table = getTable(query);
auto table_lock = table->lockStructureForShare(true, context.getInitialQueryId());
/// We create a pipeline of several streams, into which we will write data.
BlockOutputStreamPtr out;
/// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
/// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
if (table->noPushingToViews() && !no_destination)
out = table->write(query_ptr, context);
else
out = std::make_shared<PushingToViewsBlockOutputStream>(table, context, query_ptr, no_destination);
/// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
/// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash)
{
out = std::make_shared<SquashingBlockOutputStream>(
out, out->getHeader(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
}
auto query_sample_block = getSampleBlock(query, table);
/// Actually we don't know structure of input blocks from query/table,
/// because some clients break insertion protocol (columns != header)
out = std::make_shared<AddingDefaultBlockOutputStream>(
out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context);
if (const auto & constraints = table->getConstraints(); !constraints.empty())
out = std::make_shared<CheckConstraintsBlockOutputStream>(query.table,
out, query_sample_block, table->getConstraints(), context);
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
out_wrapper->setProcessListElement(context.getProcessListElement());
out = std::move(out_wrapper);
BlockIO res;
/// What type of query: INSERT or INSERT SELECT?
BlockInputStreams in_streams;
size_t out_streams_size = 1;
if (query.select)
{
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
/// BlockIO may hold StoragePtrs to temporary tables
res = interpreter_select.execute();
res.out = nullptr;
if (table->supportsParallelInsert() && settings.max_insert_threads > 0)
{
in_streams = interpreter_select.executeWithMultipleStreams(res.pipeline);
out_streams_size = std::min(size_t(settings.max_insert_threads), in_streams.size());
}
else
{
res = interpreter_select.execute();
in_streams.emplace_back(res.in);
res.in = nullptr;
res.out = nullptr;
}
}
res.in = std::make_shared<ConvertingBlockInputStream>(context, res.in, out->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Position);
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, out);
BlockOutputStreams out_streams;
auto query_sample_block = getSampleBlock(query, table);
for (size_t i = 0; i < out_streams_size; i++)
{
/// We create a pipeline of several streams, into which we will write data.
BlockOutputStreamPtr out;
/// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
/// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
if (table->noPushingToViews() && !no_destination)
out = table->write(query_ptr, context);
else
out = std::make_shared<PushingToViewsBlockOutputStream>(table, context, query_ptr, no_destination);
/// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
/// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash)
{
out = std::make_shared<SquashingBlockOutputStream>(
out, out->getHeader(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
}
/// Actually we don't know structure of input blocks from query/table,
/// because some clients break insertion protocol (columns != header)
out = std::make_shared<AddingDefaultBlockOutputStream>(
out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context);
if (const auto & constraints = table->getConstraints(); !constraints.empty())
out = std::make_shared<CheckConstraintsBlockOutputStream>(query.table,
out, query_sample_block, table->getConstraints(), context);
auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
out_wrapper->setProcessListElement(context.getProcessListElement());
out = std::move(out_wrapper);
out_streams.emplace_back(std::move(out));
}
/// What type of query: INSERT or INSERT SELECT?
if (query.select)
{
for (auto & in_stream : in_streams)
{
in_stream = std::make_shared<ConvertingBlockInputStream>(
context, in_stream, out_streams.at(0)->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Position);
}
Block in_header = in_streams.at(0)->getHeader();
if (in_streams.size() > 1)
{
for (size_t i = 1; i < in_streams.size(); ++i)
assertBlocksHaveEqualStructure(in_streams[i]->getHeader(), in_header, "INSERT SELECT");
}
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(in_streams, out_streams);
if (!allow_materialized)
{
Block in_header = res.in->getHeader();
for (const auto & column : table->getColumns())
if (column.default_desc.kind == ColumnDefaultKind::Materialized && in_header.has(column.name))
throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
@ -160,12 +192,12 @@ BlockIO InterpreterInsertQuery::execute()
}
else if (query.data && !query.has_tail) /// can execute without additional data
{
// res.out = std::move(out_streams.at(0));
res.in = std::make_shared<InputStreamFromASTInsertQuery>(query_ptr, nullptr, query_sample_block, context, nullptr);
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, out);
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, out_streams.at(0));
}
else
res.out = std::move(out);
res.out = std::move(out_streams.at(0));
res.pipeline.addStorageHolder(table);
return res;

View File

@ -1871,7 +1871,7 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre
allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0),
allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0),
settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
context->getTemporaryPath(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
context->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
/// If there are several sources, then we perform parallel aggregation
if (pipeline.streams.size() > 1)
@ -1937,7 +1937,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const
allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0),
allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0),
settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
context->getTemporaryPath(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
context->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
auto transform_params = std::make_shared<AggregatingTransformParams>(params, final);
@ -2163,7 +2163,7 @@ void InterpreterSelectQuery::executeRollupOrCube(Pipeline & pipeline, Modificato
false, settings.max_rows_to_group_by, settings.group_by_overflow_mode,
SettingUInt64(0), SettingUInt64(0),
settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
context->getTemporaryPath(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
context->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
if (modificator == Modificator::ROLLUP)
pipeline.firstStream() = std::make_shared<RollupBlockInputStream>(pipeline.firstStream(), params);
@ -2192,7 +2192,7 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPipeline & pipeline, Modif
false, settings.max_rows_to_group_by, settings.group_by_overflow_mode,
SettingUInt64(0), SettingUInt64(0),
settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
context->getTemporaryPath(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
context->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
auto transform_params = std::make_shared<AggregatingTransformParams>(params, true);
@ -2276,7 +2276,7 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline, InputSortingInfoP
sorting_stream, output_order_descr, settings.max_block_size, limit,
settings.max_bytes_before_remerge_sort,
settings.max_bytes_before_external_sort / pipeline.streams.size(),
context->getTemporaryPath(), settings.min_free_disk_space_for_temporary_data);
context->getTemporaryVolume(), settings.min_free_disk_space_for_temporary_data);
stream = merging_stream;
});
@ -2358,7 +2358,8 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting
return std::make_shared<MergeSortingTransform>(
header, output_order_descr, settings.max_block_size, limit,
settings.max_bytes_before_remerge_sort / pipeline.getNumStreams(),
settings.max_bytes_before_external_sort, context->getTemporaryPath(), settings.min_free_disk_space_for_temporary_data);
settings.max_bytes_before_external_sort, context->getTemporaryVolume(),
settings.min_free_disk_space_for_temporary_data);
});
/// If there are several streams, we merge them into one

View File

@ -13,6 +13,7 @@
#include <DataStreams/OneBlockInputStream.h>
#include <DataStreams/TemporaryFileStream.h>
#include <DataStreams/ConcatBlockInputStream.h>
#include <Disks/DiskSpaceMonitor.h>
namespace DB
{
@ -406,6 +407,8 @@ void MiniLSM::insert(const BlocksList & blocks)
if (blocks.empty())
return;
const std::string path(volume->getNextDisk()->getPath());
SortedFiles sorted_blocks;
if (blocks.size() > 1)
{
@ -434,6 +437,7 @@ void MiniLSM::merge(std::function<void(const Block &)> callback)
BlockInputStreams inputs = makeSortedInputStreams(sorted_files, sample_block);
MergingSortedBlockInputStream sorted_stream(inputs, sort_description, rows_in_block);
const std::string path(volume->getNextDisk()->getPath());
SortedFiles out;
flushStreamToFiles(path, sample_block, sorted_stream, out, callback);
@ -484,7 +488,7 @@ MergeJoin::MergeJoin(std::shared_ptr<AnalyzedJoin> table_join_, const Block & ri
makeSortAndMerge(table_join->keyNamesLeft(), left_sort_description, left_merge_description);
makeSortAndMerge(table_join->keyNamesRight(), right_sort_description, right_merge_description);
lsm = std::make_unique<MiniLSM>(table_join->getTemporaryPath(), right_sample_block, right_sort_description, max_rows_in_right_block);
lsm = std::make_unique<MiniLSM>(table_join->getTemporaryVolume(), right_sample_block, right_sort_description, max_rows_in_right_block);
}
void MergeJoin::setTotals(const Block & totals_block)

View File

@ -17,20 +17,23 @@ class AnalyzedJoin;
class MergeJoinCursor;
struct MergeJoinEqualRange;
class Volume;
using VolumePtr = std::shared_ptr<Volume>;
struct MiniLSM
{
using SortedFiles = std::vector<std::unique_ptr<TemporaryFile>>;
const String & path;
VolumePtr volume;
const Block & sample_block;
const SortDescription & sort_description;
const size_t rows_in_block;
const size_t max_size;
std::vector<SortedFiles> sorted_files;
MiniLSM(const String & path_, const Block & sample_block_, const SortDescription & description,
MiniLSM(VolumePtr volume_, const Block & sample_block_, const SortDescription & description,
size_t rows_in_block_, size_t max_size_ = 16)
: path(path_)
: volume(volume_)
, sample_block(sample_block_)
, sort_description(description)
, rows_in_block(rows_in_block_)

View File

@ -49,6 +49,7 @@ Block QueryLogElement::createBlock()
{std::make_shared<DataTypeUInt64>(), "memory_usage"},
{std::make_shared<DataTypeString>(), "query"},
{std::make_shared<DataTypeInt32>(), "exception_code"},
{std::make_shared<DataTypeString>(), "exception"},
{std::make_shared<DataTypeString>(), "stack_trace"},
@ -107,6 +108,7 @@ void QueryLogElement::appendToBlock(Block & block) const
columns[i++]->insert(memory_usage);
columns[i++]->insertData(query.data(), query.size());
columns[i++]->insert(exception_code);
columns[i++]->insertData(exception.data(), exception.size());
columns[i++]->insertData(stack_trace.data(), stack_trace.size());

View File

@ -54,6 +54,7 @@ struct QueryLogElement
String query;
Int32 exception_code{}; // because ErrorCodes are int
String exception;
String stack_trace;

View File

@ -816,7 +816,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
SyntaxAnalyzerResult result;
result.storage = storage;
result.source_columns = source_columns_;
result.analyzed_join = std::make_shared<AnalyzedJoin>(settings, context.getTemporaryPath()); /// TODO: move to select_query logic
result.analyzed_join = std::make_shared<AnalyzedJoin>(settings, context.getTemporaryVolume()); /// TODO: move to select_query logic
if (storage)
collectSourceColumns(storage->getColumns(), result.source_columns, (select_query != nullptr));

View File

@ -12,6 +12,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace
{
@ -31,8 +36,19 @@ std::shared_ptr<TSystemLog> createSystemLog(
String database = config.getString(config_prefix + ".database", default_database_name);
String table = config.getString(config_prefix + ".table", default_table_name);
String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)");
String engine = "ENGINE = MergeTree PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time)";
String engine;
if (config.has(config_prefix + ".engine"))
{
if (config.has(config_prefix + ".partition_by"))
throw Exception("If 'engine' is specified for system table, PARTITION BY parameters should be specified directly inside 'engine' and 'partition_by' setting doesn't make sense", ErrorCodes::BAD_ARGUMENTS);
engine = config.getString(config_prefix + ".engine");
}
else
{
String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)");
engine = "ENGINE = MergeTree PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024";
}
size_t flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS);

View File

@ -163,6 +163,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c
elem.query_start_time = current_time;
elem.query = query_for_logging;
elem.exception_code = getCurrentExceptionCode();
elem.exception = getCurrentExceptionMessage(false);
elem.client_info = context.getClientInfo();
@ -496,6 +497,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
elem.event_time = time(nullptr);
elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time);
elem.exception_code = getCurrentExceptionCode();
elem.exception = getCurrentExceptionMessage(false);
QueryStatus * process_list_elem = context.getProcessListElement();
@ -573,14 +575,17 @@ BlockIO executeQuery(
BlockIO streams;
std::tie(ast, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context,
internal, stage, !may_have_embedded_data, nullptr, allow_processors);
if (streams.in)
if (const auto * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
{
const auto * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());
String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
? getIdentifierName(ast_query_with_output->format) : context.getDefaultFormat();
String format_name = ast_query_with_output->format
? getIdentifierName(ast_query_with_output->format)
: context.getDefaultFormat();
if (format_name == "Null")
streams.null_format = true;
}
return streams;
}
@ -590,7 +595,7 @@ void executeQuery(
WriteBuffer & ostr,
bool allow_into_outfile,
Context & context,
std::function<void(const String &)> set_content_type,
std::function<void(const String &, const String &)> set_content_type_and_format,
std::function<void(const String &)> set_query_id)
{
PODArray<char> parse_buf;
@ -680,8 +685,8 @@ void executeQuery(
out->onProgress(progress);
});
if (set_content_type)
set_content_type(out->getContentType());
if (set_content_type_and_format)
set_content_type_and_format(out->getContentType(), format_name);
if (set_query_id)
set_query_id(context.getClientInfo().current_query_id);
@ -742,8 +747,8 @@ void executeQuery(
out->onProgress(progress);
});
if (set_content_type)
set_content_type(out->getContentType());
if (set_content_type_and_format)
set_content_type_and_format(out->getContentType(), format_name);
if (set_query_id)
set_query_id(context.getClientInfo().current_query_id);

View File

@ -19,7 +19,7 @@ void executeQuery(
WriteBuffer & ostr, /// Where to write query output to.
bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file.
Context & context, /// DB, tables, data types, storage engines, functions, aggregate functions...
std::function<void(const String &)> set_content_type, /// If non-empty callback is passed, it will be called with the Content-Type of the result.
std::function<void(const String &, const String &)> set_content_type_and_format, /// If non-empty callback is passed, it will be called with the Content-Type and the Format of the result.
std::function<void(const String &)> set_query_id /// If non-empty callback is passed, it will be called with the query id.
);

View File

@ -79,7 +79,7 @@ int main(int argc, char ** argv)
Aggregator::Params params(
stream->getHeader(), {0, 1}, aggregate_descriptions,
false, 0, OverflowMode::THROW, 0, 0, 0, false, "", 1, 0);
false, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, 1, 0);
Aggregator aggregator(params);

View File

@ -0,0 +1,671 @@
#include "AvroRowInputFormat.h"
#if USE_AVRO
#include <numeric>
#include <Core/Defines.h>
#include <Core/Field.h>
#include <IO/Operators.h>
#include <IO/ReadHelpers.h>
#include <IO/HTTPCommon.h>
#include <Formats/verbosePrintString.h>
#include <Formats/FormatFactory.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <avro/Compiler.hh>
#include <avro/DataFile.hh>
#include <avro/Decoder.hh>
#include <avro/Encoder.hh>
#include <avro/Generic.hh>
#include <avro/GenericDatum.hh>
#include <avro/Node.hh>
#include <avro/NodeConcepts.hh>
#include <avro/NodeImpl.hh>
#include <avro/Reader.hh>
#include <avro/Schema.hh>
#include <avro/Specific.hh>
#include <avro/ValidSchema.hh>
#include <avro/Writer.hh>
#include <Poco/BinaryReader.h>
#include <Poco/Buffer.h>
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Parser.h>
#include <Poco/MemoryStream.h>
#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/Poco.h>
#include <Poco/URI.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int THERE_IS_NO_COLUMN;
extern const int INCORRECT_DATA;
extern const int ILLEGAL_COLUMN;
extern const int TYPE_MISMATCH;
}
class InputStreamReadBufferAdapter : public avro::InputStream
{
public:
InputStreamReadBufferAdapter(ReadBuffer & in_) : in(in_) {}
bool next(const uint8_t ** data, size_t * len) override
{
if (in.eof())
{
*len = 0;
return false;
}
*data = reinterpret_cast<const uint8_t *>(in.position());
*len = in.available();
in.position() += in.available();
return true;
}
void backup(size_t len) override { in.position() -= len; }
void skip(size_t len) override { in.tryIgnore(len); }
size_t byteCount() const override { return in.count(); }
private:
ReadBuffer & in;
};
static void deserializeNoop(IColumn &, avro::Decoder &)
{
}
/// Insert value with conversion to the column of target type.
template <typename T>
static void insertNumber(IColumn & column, WhichDataType type, T value)
{
switch (type.idx)
{
case TypeIndex::UInt8:
assert_cast<ColumnUInt8 &>(column).insertValue(value);
break;
case TypeIndex::Date: [[fallthrough]];
case TypeIndex::UInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(value);
break;
case TypeIndex::DateTime: [[fallthrough]];
case TypeIndex::UInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(value);
break;
case TypeIndex::DateTime64: [[fallthrough]];
case TypeIndex::UInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(value);
break;
case TypeIndex::Int8:
assert_cast<ColumnInt8 &>(column).insertValue(value);
break;
case TypeIndex::Int16:
assert_cast<ColumnInt16 &>(column).insertValue(value);
break;
case TypeIndex::Int32:
assert_cast<ColumnInt32 &>(column).insertValue(value);
break;
case TypeIndex::Int64:
assert_cast<ColumnInt64 &>(column).insertValue(value);
break;
case TypeIndex::Float32:
assert_cast<ColumnFloat32 &>(column).insertValue(value);
break;
case TypeIndex::Float64:
assert_cast<ColumnFloat64 &>(column).insertValue(value);
break;
default:
throw Exception("Type is not compatible with Avro", ErrorCodes::ILLEGAL_COLUMN);
}
}
AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type)
{
WhichDataType target(target_type);
switch (root_node->type())
{
case avro::AVRO_STRING: [[fallthrough]];
case avro::AVRO_BYTES:
if (target.isString() || target.isFixedString())
{
return [tmp = std::string()](IColumn & column, avro::Decoder & decoder) mutable
{
decoder.decodeString(tmp);
column.insertData(tmp.c_str(), tmp.length());
};
}
break;
case avro::AVRO_INT:
return [target](IColumn & column, avro::Decoder & decoder)
{
insertNumber(column, target, decoder.decodeInt());
};
case avro::AVRO_LONG:
if (target.isDateTime64())
{
auto date_time_scale = assert_cast<const DataTypeDateTime64 &>(*target_type).getScale();
auto logical_type = root_node->logicalType().type();
if ((logical_type == avro::LogicalType::TIMESTAMP_MILLIS && date_time_scale == 3)
|| (logical_type == avro::LogicalType::TIMESTAMP_MICROS && date_time_scale == 6))
{
return [](IColumn & column, avro::Decoder & decoder)
{
assert_cast<DataTypeDateTime64::ColumnType &>(column).insertValue(decoder.decodeLong());
};
}
}
else
{
return [target](IColumn & column, avro::Decoder & decoder)
{
insertNumber(column, target, decoder.decodeLong());
};
}
break;
case avro::AVRO_FLOAT:
return [target](IColumn & column, avro::Decoder & decoder)
{
insertNumber(column, target, decoder.decodeFloat());
};
case avro::AVRO_DOUBLE:
return [target](IColumn & column, avro::Decoder & decoder)
{
insertNumber(column, target, decoder.decodeDouble());
};
case avro::AVRO_BOOL:
return [target](IColumn & column, avro::Decoder & decoder)
{
insertNumber(column, target, decoder.decodeBool());
};
case avro::AVRO_ARRAY:
if (target.isArray())
{
auto nested_source_type = root_node->leafAt(0);
auto nested_target_type = assert_cast<const DataTypeArray &>(*target_type).getNestedType();
auto nested_deserialize = createDeserializeFn(nested_source_type, nested_target_type);
return [nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
ColumnArray::Offsets & offsets = column_array.getOffsets();
IColumn & nested_column = column_array.getData();
size_t total = 0;
for (size_t n = decoder.arrayStart(); n != 0; n = decoder.arrayNext())
{
total += n;
for (size_t i = 0; i < n; i++)
{
nested_deserialize(nested_column, decoder);
}
}
offsets.push_back(offsets.back() + total);
};
}
break;
case avro::AVRO_UNION:
{
auto nullable_deserializer = [root_node, target_type](size_t non_null_union_index)
{
auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type));
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
size_t union_index = decoder.decodeUnionIndex();
if (union_index == non_null_union_index)
{
nested_deserialize(col.getNestedColumn(), decoder);
col.getNullMapData().push_back(0);
}
else
{
col.insertDefault();
}
};
};
if (root_node->leaves() == 2 && target.isNullable())
{
if (root_node->leafAt(0)->type() == avro::AVRO_NULL)
return nullable_deserializer(1);
if (root_node->leafAt(1)->type() == avro::AVRO_NULL)
return nullable_deserializer(0);
}
break;
}
case avro::AVRO_NULL:
if (target.isNullable())
{
auto nested_type = removeNullable(target_type);
if (nested_type->getTypeId() == TypeIndex::Nothing)
{
return [](IColumn &, avro::Decoder & decoder)
{
decoder.decodeNull();
};
}
else
{
return [](IColumn & column, avro::Decoder & decoder)
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
decoder.decodeNull();
col.insertDefault();
};
}
}
break;
case avro::AVRO_ENUM:
if (target.isString())
{
std::vector<std::string> symbols;
for (size_t i = 0; i < root_node->names(); i++)
{
symbols.push_back(root_node->nameAt(i));
}
return [symbols](IColumn & column, avro::Decoder & decoder)
{
size_t enum_index = decoder.decodeEnum();
const auto & enum_symbol = symbols[enum_index];
column.insertData(enum_symbol.c_str(), enum_symbol.length());
};
}
if (target.isEnum())
{
const auto & enum_type = dynamic_cast<const IDataTypeEnum &>(*target_type);
std::vector<Field> symbol_mapping;
for (size_t i = 0; i < root_node->names(); i++)
{
symbol_mapping.push_back(enum_type.castToValue(root_node->nameAt(i)));
}
return [symbol_mapping](IColumn & column, avro::Decoder & decoder)
{
size_t enum_index = decoder.decodeEnum();
column.insert(symbol_mapping[enum_index]);
};
}
break;
case avro::AVRO_FIXED:
{
size_t fixed_size = root_node->fixedSize();
if (target.isFixedString() && target_type->getSizeOfValueInMemory() == fixed_size)
{
return [tmp_fixed = std::vector<uint8_t>(fixed_size)](IColumn & column, avro::Decoder & decoder) mutable
{
decoder.decodeFixed(tmp_fixed.size(), tmp_fixed);
column.insertData(reinterpret_cast<const char *>(tmp_fixed.data()), tmp_fixed.size());
};
}
break;
}
case avro::AVRO_MAP: [[fallthrough]];
case avro::AVRO_RECORD: [[fallthrough]];
default:
break;
}
throw Exception(
"Type " + target_type->getName() + " is not compatible with Avro " + avro::ValidSchema(root_node).toJson(false),
ErrorCodes::ILLEGAL_COLUMN);
}
AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node)
{
switch (root_node->type())
{
case avro::AVRO_STRING:
return [](avro::Decoder & decoder) { decoder.skipString(); };
case avro::AVRO_BYTES:
return [](avro::Decoder & decoder) { decoder.skipBytes(); };
case avro::AVRO_INT:
return [](avro::Decoder & decoder) { decoder.decodeInt(); };
case avro::AVRO_LONG:
return [](avro::Decoder & decoder) { decoder.decodeLong(); };
case avro::AVRO_FLOAT:
return [](avro::Decoder & decoder) { decoder.decodeFloat(); };
case avro::AVRO_DOUBLE:
return [](avro::Decoder & decoder) { decoder.decodeDouble(); };
case avro::AVRO_BOOL:
return [](avro::Decoder & decoder) { decoder.decodeBool(); };
case avro::AVRO_ARRAY:
{
auto nested_skip_fn = createSkipFn(root_node->leafAt(0));
return [nested_skip_fn](avro::Decoder & decoder)
{
for (size_t n = decoder.arrayStart(); n != 0; n = decoder.arrayNext())
{
for (size_t i = 0; i < n; ++i)
{
nested_skip_fn(decoder);
}
}
};
}
case avro::AVRO_UNION:
{
std::vector<SkipFn> union_skip_fns;
for (size_t i = 0; i < root_node->leaves(); i++)
{
union_skip_fns.push_back(createSkipFn(root_node->leafAt(i)));
}
return [union_skip_fns](avro::Decoder & decoder) { union_skip_fns[decoder.decodeUnionIndex()](decoder); };
}
case avro::AVRO_NULL:
return [](avro::Decoder & decoder) { decoder.decodeNull(); };
case avro::AVRO_ENUM:
return [](avro::Decoder & decoder) { decoder.decodeEnum(); };
case avro::AVRO_FIXED:
{
auto fixed_size = root_node->fixedSize();
return [fixed_size](avro::Decoder & decoder) { decoder.skipFixed(fixed_size); };
}
case avro::AVRO_MAP:
{
auto value_skip_fn = createSkipFn(root_node->leafAt(1));
return [value_skip_fn](avro::Decoder & decoder)
{
for (size_t n = decoder.mapStart(); n != 0; n = decoder.mapNext())
{
for (size_t i = 0; i < n; ++i)
{
decoder.skipString();
value_skip_fn(decoder);
}
}
};
}
case avro::AVRO_RECORD:
{
std::vector<SkipFn> field_skip_fns;
for (size_t i = 0; i < root_node->leaves(); i++)
{
field_skip_fns.push_back(createSkipFn(root_node->leafAt(i)));
}
return [field_skip_fns](avro::Decoder & decoder)
{
for (auto & skip_fn : field_skip_fns)
skip_fn(decoder);
};
}
default:
throw Exception("Unsupported Avro type " + root_node->name().fullname() + " (" + toString(int(root_node->type())) + ")", ErrorCodes::ILLEGAL_COLUMN);
}
}
AvroDeserializer::AvroDeserializer(const ColumnsWithTypeAndName & columns, avro::ValidSchema schema)
{
auto schema_root = schema.root();
if (schema_root->type() != avro::AVRO_RECORD)
{
throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH);
}
field_mapping.resize(schema_root->leaves(), -1);
for (size_t i = 0; i < schema_root->leaves(); ++i)
{
skip_fns.push_back(createSkipFn(schema_root->leafAt(i)));
deserialize_fns.push_back(&deserializeNoop);
}
for (size_t i = 0; i < columns.size(); ++i)
{
const auto & column = columns[i];
size_t field_index = 0;
if (!schema_root->nameIndex(column.name, field_index))
{
throw Exception("Field " + column.name + " not found in Avro schema", ErrorCodes::THERE_IS_NO_COLUMN);
}
auto field_schema = schema_root->leafAt(field_index);
try
{
deserialize_fns[field_index] = createDeserializeFn(field_schema, column.type);
}
catch (Exception & e)
{
e.addMessage("column " + column.name);
throw;
}
field_mapping[field_index] = i;
}
}
void AvroDeserializer::deserializeRow(MutableColumns & columns, avro::Decoder & decoder)
{
for (size_t i = 0; i < field_mapping.size(); i++)
{
if (field_mapping[i] >= 0)
{
deserialize_fns[i](*columns[field_mapping[i]], decoder);
}
else
{
skip_fns[i](decoder);
}
}
}
AvroRowInputFormat::AvroRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_)
: IRowInputFormat(header_, in_, params_)
, file_reader(std::make_unique<InputStreamReadBufferAdapter>(in_))
, deserializer(header_.getColumnsWithTypeAndName(), file_reader.dataSchema())
{
file_reader.init();
}
bool AvroRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
{
if (file_reader.hasMore())
{
file_reader.decr();
deserializer.deserializeRow(columns, file_reader.decoder());
return true;
}
return false;
}
#if USE_POCO_JSON
class AvroConfluentRowInputFormat::SchemaRegistry
{
public:
SchemaRegistry(const std::string & base_url_)
{
if (base_url_.empty())
{
throw Exception("Empty Schema Registry URL", ErrorCodes::BAD_ARGUMENTS);
}
try
{
base_url = base_url_;
}
catch (const Poco::SyntaxException & e)
{
throw Exception("Invalid Schema Registry URL: " + e.displayText(), ErrorCodes::BAD_ARGUMENTS);
}
}
avro::ValidSchema getSchema(uint32_t id) const
{
try
{
try
{
/// TODO Host checking to prevent SSRF
Poco::URI url(base_url, "/schemas/ids/" + std::to_string(id));
/// One second for connect/send/receive. Just in case.
ConnectionTimeouts timeouts({1, 0}, {1, 0}, {1, 0});
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery());
auto session = makePooledHTTPSession(url, timeouts, 1);
session->sendRequest(request);
Poco::Net::HTTPResponse response;
auto & response_body = session->receiveResponse(response);
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
{
throw Exception("HTTP code " + std::to_string(response.getStatus()), ErrorCodes::INCORRECT_DATA);
}
Poco::JSON::Parser parser;
auto json_body = parser.parse(response_body).extract<Poco::JSON::Object::Ptr>();
auto schema = json_body->getValue<std::string>("schema");
return avro::compileJsonSchemaFromString(schema);
}
catch (const Exception &)
{
throw;
}
catch (const Poco::Exception & e)
{
throw Exception(Exception::CreateFromPoco, e);
}
catch (const avro::Exception & e)
{
throw Exception(e.what(), ErrorCodes::INCORRECT_DATA);
}
}
catch (Exception & e)
{
e.addMessage("while fetching schema id = " + std::to_string(id));
throw;
}
}
private:
Poco::URI base_url;
};
static uint32_t readConfluentSchemaId(ReadBuffer & in)
{
uint8_t magic;
uint32_t schema_id;
readBinaryBigEndian(magic, in);
readBinaryBigEndian(schema_id, in);
if (magic != 0x00)
{
throw Exception("Invalid magic byte before AvroConfluent schema identifier."
" Must be zero byte, found " + std::to_string(int(magic)) + " instead", ErrorCodes::INCORRECT_DATA);
}
return schema_id;
}
AvroConfluentRowInputFormat::AvroConfluentRowInputFormat(
const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_)
: IRowInputFormat(header_.cloneEmpty(), in_, params_)
, header_columns(header_.getColumnsWithTypeAndName())
, schema_registry(std::make_unique<SchemaRegistry>(format_settings_.avro.schema_registry_url))
, input_stream(std::make_unique<InputStreamReadBufferAdapter>(in))
, decoder(avro::binaryDecoder())
{
decoder->init(*input_stream);
}
bool AvroConfluentRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
{
if (in.eof())
{
return false;
}
SchemaId schema_id = readConfluentSchemaId(in);
auto & deserializer = getOrCreateDeserializer(schema_id);
deserializer.deserializeRow(columns, *decoder);
decoder->drain();
return true;
}
AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(SchemaId schema_id)
{
auto it = deserializer_cache.find(schema_id);
if (it == deserializer_cache.end())
{
auto schema = schema_registry->getSchema(schema_id);
AvroDeserializer deserializer(header_columns, schema);
it = deserializer_cache.emplace(schema_id, deserializer).first;
}
return it->second;
}
#endif
void registerInputFormatProcessorAvro(FormatFactory & factory)
{
factory.registerInputFormatProcessor("Avro", [](
ReadBuffer & buf,
const Block & sample,
const RowInputFormatParams & params,
const FormatSettings &)
{
return std::make_shared<AvroRowInputFormat>(sample, buf, params);
});
#if USE_POCO_JSON
/// AvroConfluent format is disabled for the following reasons:
/// 1. There is no test for it.
/// 2. RemoteHostFilter is not used to prevent CSRF attacks.
#if 0
factory.registerInputFormatProcessor("AvroConfluent",[](
ReadBuffer & buf,
const Block & sample,
const RowInputFormatParams & params,
const FormatSettings & settings)
{
return std::make_shared<AvroConfluentRowInputFormat>(sample, buf, params, settings);
});
#endif
#endif
}
}
#else
namespace DB
{
class FormatFactory;
void registerInputFormatProcessorAvro(FormatFactory &)
{
}
}
#endif

View File

@ -0,0 +1,79 @@
#pragma once
#include "config_formats.h"
#include "config_core.h"
#if USE_AVRO
#include <unordered_map>
#include <vector>
#include <Core/Block.h>
#include <Formats/FormatSchemaInfo.h>
#include <Processors/Formats/IRowInputFormat.h>
#include <avro/DataFile.hh>
#include <avro/Decoder.hh>
#include <avro/Schema.hh>
#include <avro/ValidSchema.hh>
namespace DB
{
class AvroDeserializer
{
public:
AvroDeserializer(const ColumnsWithTypeAndName & columns, avro::ValidSchema schema);
void deserializeRow(MutableColumns & columns, avro::Decoder & decoder);
private:
using DeserializeFn = std::function<void(IColumn & column, avro::Decoder & decoder)>;
using SkipFn = std::function<void(avro::Decoder & decoder)>;
static DeserializeFn createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type);
static SkipFn createSkipFn(avro::NodePtr root_node);
/// Map from field index in Avro schema to column number in block header. Or -1 if there is no corresponding column.
std::vector<int> field_mapping;
/// How to skip the corresponding field in Avro schema.
std::vector<SkipFn> skip_fns;
/// How to deserialize the corresponding field in Avro schema.
std::vector<DeserializeFn> deserialize_fns;
};
class AvroRowInputFormat : public IRowInputFormat
{
public:
AvroRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_);
virtual bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
String getName() const override { return "AvroRowInputFormat"; }
private:
avro::DataFileReaderBase file_reader;
AvroDeserializer deserializer;
};
#if USE_POCO_JSON
class AvroConfluentRowInputFormat : public IRowInputFormat
{
public:
AvroConfluentRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_);
virtual bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
String getName() const override { return "AvroConfluentRowInputFormat"; }
private:
const ColumnsWithTypeAndName header_columns;
class SchemaRegistry;
std::unique_ptr<SchemaRegistry> schema_registry;
using SchemaId = uint32_t;
std::unordered_map<SchemaId, AvroDeserializer> deserializer_cache;
AvroDeserializer & getOrCreateDeserializer(SchemaId schema_id);
avro::InputStreamPtr input_stream;
avro::DecoderPtr decoder;
};
#endif
}
#endif

View File

@ -0,0 +1,396 @@
#include "AvroRowOutputFormat.h"
#if USE_AVRO
#include <Core/Defines.h>
#include <Core/Field.h>
#include <IO/Operators.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <Formats/verbosePrintString.h>
#include <Formats/FormatFactory.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <avro/Compiler.hh>
#include <avro/DataFile.hh>
#include <avro/Decoder.hh>
#include <avro/Encoder.hh>
#include <avro/Generic.hh>
#include <avro/GenericDatum.hh>
#include <avro/Node.hh>
#include <avro/NodeConcepts.hh>
#include <avro/NodeImpl.hh>
#include <avro/Reader.hh>
#include <avro/Schema.hh>
#include <avro/Specific.hh>
#include <avro/ValidSchema.hh>
#include <avro/Writer.hh>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_TYPE_OF_FIELD;
extern const int BAD_ARGUMENTS;
extern const int THERE_IS_NO_COLUMN;
extern const int LOGICAL_ERROR;
extern const int INCORRECT_DATA;
extern const int CANNOT_READ_ALL_DATA;
}
class OutputStreamWriteBufferAdapter : public avro::OutputStream
{
public:
OutputStreamWriteBufferAdapter(WriteBuffer & out_) : out(out_) {}
virtual bool next(uint8_t ** data, size_t * len) override
{
out.nextIfAtEnd();
*data = reinterpret_cast<uint8_t *>(out.position());
*len = out.available();
out.position() += out.available();
return true;
}
virtual void backup(size_t len) override { out.position() -= len; }
virtual uint64_t byteCount() const override { return out.count(); }
virtual void flush() override { out.next(); }
private:
WriteBuffer & out;
};
AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment)
{
++type_name_increment;
switch (data_type->getTypeId())
{
case TypeIndex::UInt8:
return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeInt(assert_cast<const ColumnUInt8 &>(column).getElement(row_num));
}};
case TypeIndex::Int8:
return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeInt(assert_cast<const ColumnInt8 &>(column).getElement(row_num));
}};
case TypeIndex::UInt16:
return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeInt(assert_cast<const ColumnUInt16 &>(column).getElement(row_num));
}};
case TypeIndex::Int16:
return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeInt(assert_cast<const ColumnInt16 &>(column).getElement(row_num));
}};
case TypeIndex::UInt32: [[fallthrough]];
case TypeIndex::DateTime:
return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeInt(assert_cast<const ColumnUInt32 &>(column).getElement(row_num));
}};
case TypeIndex::Int32:
return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeInt(assert_cast<const ColumnInt32 &>(column).getElement(row_num));
}};
case TypeIndex::UInt64:
return {avro::LongSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeLong(assert_cast<const ColumnUInt64 &>(column).getElement(row_num));
}};
case TypeIndex::Int64:
return {avro::LongSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeLong(assert_cast<const ColumnInt64 &>(column).getElement(row_num));
}};
case TypeIndex::Float32:
return {avro::FloatSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeFloat(assert_cast<const ColumnFloat32 &>(column).getElement(row_num));
}};
case TypeIndex::Float64:
return {avro::DoubleSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
encoder.encodeDouble(assert_cast<const ColumnFloat64 &>(column).getElement(row_num));
}};
case TypeIndex::Date:
{
auto schema = avro::IntSchema();
schema.root()->setLogicalType(avro::LogicalType(avro::LogicalType::DATE));
return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
UInt16 date = assert_cast<const DataTypeDate::ColumnType &>(column).getElement(row_num);
encoder.encodeInt(date);
}};
}
case TypeIndex::DateTime64:
{
auto schema = avro::LongSchema();
const auto & provided_type = assert_cast<const DataTypeDateTime64 &>(*data_type);
if (provided_type.getScale() == 3)
schema.root()->setLogicalType(avro::LogicalType(avro::LogicalType::TIMESTAMP_MILLIS));
else if (provided_type.getScale() == 6)
schema.root()->setLogicalType(avro::LogicalType(avro::LogicalType::TIMESTAMP_MICROS));
else
break;
return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
const auto & col = assert_cast<const DataTypeDateTime64::ColumnType &>(column);
encoder.encodeLong(col.getElement(row_num));
}};
}
case TypeIndex::String:
return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
const StringRef & s = assert_cast<const ColumnString &>(column).getDataAt(row_num);
encoder.encodeBytes(reinterpret_cast<const uint8_t *>(s.data), s.size);
}};
case TypeIndex::FixedString:
{
auto size = data_type->getSizeOfValueInMemory();
auto schema = avro::FixedSchema(size, "fixed_" + toString(type_name_increment));
return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
const StringRef & s = assert_cast<const ColumnFixedString &>(column).getDataAt(row_num);
encoder.encodeFixed(reinterpret_cast<const uint8_t *>(s.data), s.size);
}};
}
case TypeIndex::Enum8:
{
auto schema = avro::EnumSchema("enum8_" + toString(type_name_increment)); /// type names must be different for different types.
std::unordered_map<DataTypeEnum8::FieldType, size_t> enum_mapping;
const auto & enum_values = assert_cast<const DataTypeEnum8 &>(*data_type).getValues();
for (size_t i = 0; i < enum_values.size(); ++i)
{
schema.addSymbol(enum_values[i].first);
enum_mapping.emplace(enum_values[i].second, i);
}
return {schema, [enum_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
auto enum_value = assert_cast<const DataTypeEnum8::ColumnType &>(column).getElement(row_num);
encoder.encodeEnum(enum_mapping.at(enum_value));
}};
}
case TypeIndex::Enum16:
{
auto schema = avro::EnumSchema("enum16" + toString(type_name_increment));
std::unordered_map<DataTypeEnum16::FieldType, size_t> enum_mapping;
const auto & enum_values = assert_cast<const DataTypeEnum16 &>(*data_type).getValues();
for (size_t i = 0; i < enum_values.size(); ++i)
{
schema.addSymbol(enum_values[i].first);
enum_mapping.emplace(enum_values[i].second, i);
}
return {schema, [enum_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
auto enum_value = assert_cast<const DataTypeEnum16::ColumnType &>(column).getElement(row_num);
encoder.encodeEnum(enum_mapping.at(enum_value));
}};
}
case TypeIndex::Array:
{
const auto & array_type = assert_cast<const DataTypeArray &>(*data_type);
auto nested_mapping = createSchemaWithSerializeFn(array_type.getNestedType(), type_name_increment);
auto schema = avro::ArraySchema(nested_mapping.schema);
return {schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
size_t row_count = next_offset - offset;
const IColumn & nested_column = column_array.getData();
encoder.arrayStart();
if (row_count > 0)
{
encoder.setItemCount(row_count);
}
for (size_t i = offset; i < next_offset; ++i)
{
nested_mapping.serialize(nested_column, i, encoder);
}
encoder.arrayEnd();
}};
}
case TypeIndex::Nullable:
{
auto nested_type = removeNullable(data_type);
auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment);
if (nested_type->getTypeId() == TypeIndex::Nothing)
{
return nested_mapping;
}
else
{
avro::UnionSchema union_schema;
union_schema.addType(avro::NullSchema());
union_schema.addType(nested_mapping.schema);
return {union_schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
if (!col.isNullAt(row_num))
{
encoder.encodeUnionIndex(1);
nested_mapping.serialize(col.getNestedColumn(), row_num, encoder);
}
else
{
encoder.encodeUnionIndex(0);
encoder.encodeNull();
}
}};
}
}
case TypeIndex::LowCardinality:
{
const auto & nested_type = removeLowCardinality(data_type);
auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment);
return {nested_mapping.schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
const auto & col = assert_cast<const ColumnLowCardinality &>(column);
nested_mapping.serialize(*col.getDictionary().getNestedColumn(), col.getIndexAt(row_num), encoder);
}};
}
case TypeIndex::Nothing:
return {avro::NullSchema(), [](const IColumn &, size_t, avro::Encoder & encoder) { encoder.encodeNull(); }};
default:
break;
}
throw Exception("Type " + data_type->getName() + " is not supported for Avro output", ErrorCodes::ILLEGAL_COLUMN);
}
AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns)
{
avro::RecordSchema record_schema("row");
size_t type_name_increment = 0;
for (auto & column : columns)
{
try
{
auto field_mapping = createSchemaWithSerializeFn(column.type, type_name_increment);
serialize_fns.push_back(field_mapping.serialize);
//TODO: verify name starts with A-Za-z_
record_schema.addField(column.name, field_mapping.schema);
}
catch (Exception & e)
{
e.addMessage("column " + column.name);
throw;
}
}
schema.setSchema(record_schema);
}
void AvroSerializer::serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder)
{
size_t num_columns = columns.size();
for (size_t i = 0; i < num_columns; ++i)
{
serialize_fns[i](*columns[i], row_num, encoder);
}
}
static avro::Codec getCodec(const std::string & codec_name)
{
if (codec_name == "")
{
#ifdef SNAPPY_CODEC_AVAILABLE
return avro::Codec::SNAPPY_CODEC;
#else
return avro::Codec::DEFLATE_CODEC;
#endif
}
if (codec_name == "null") return avro::Codec::NULL_CODEC;
if (codec_name == "deflate") return avro::Codec::DEFLATE_CODEC;
#ifdef SNAPPY_CODEC_AVAILABLE
if (codec_name == "snappy") return avro::Codec::SNAPPY_CODEC;
#endif
throw Exception("Avro codec " + codec_name + " is not available", ErrorCodes::BAD_ARGUMENTS);
}
AvroRowOutputFormat::AvroRowOutputFormat(
WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
: IRowOutputFormat(header_, out_, callback)
, settings(settings_)
, serializer(header_.getColumnsWithTypeAndName())
, file_writer(
std::make_unique<OutputStreamWriteBufferAdapter>(out_),
serializer.getSchema(),
settings.avro.output_sync_interval,
getCodec(settings.avro.output_codec))
{
}
AvroRowOutputFormat::~AvroRowOutputFormat() = default;
void AvroRowOutputFormat::writePrefix()
{
file_writer.syncIfNeeded();
}
void AvroRowOutputFormat::write(const Columns & columns, size_t row_num)
{
file_writer.syncIfNeeded();
serializer.serializeRow(columns, row_num, file_writer.encoder());
file_writer.incr();
}
void AvroRowOutputFormat::writeSuffix()
{
file_writer.close();
}
void registerOutputFormatProcessorAvro(FormatFactory & factory)
{
factory.registerOutputFormatProcessor("Avro", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback callback,
const FormatSettings & settings)
{
return std::make_shared<AvroRowOutputFormat>(buf, sample, callback, settings);
});
}
}
#else
namespace DB
{
class FormatFactory;
void registerOutputFormatProcessorAvro(FormatFactory &)
{
}
}
#endif

View File

@ -0,0 +1,62 @@
#pragma once
#include "config_formats.h"
#if USE_AVRO
#include <unordered_map>
#include <Core/Block.h>
#include <Formats/FormatSchemaInfo.h>
#include <Formats/FormatSettings.h>
#include <IO/WriteBuffer.h>
#include <Processors/Formats/IRowOutputFormat.h>
#include <avro/DataFile.hh>
#include <avro/Schema.hh>
#include <avro/ValidSchema.hh>
namespace DB
{
class WriteBuffer;
class AvroSerializer
{
public:
AvroSerializer(const ColumnsWithTypeAndName & columns);
const avro::ValidSchema & getSchema() const { return schema; }
void serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder);
private:
using SerializeFn = std::function<void(const IColumn & column, size_t row_num, avro::Encoder & encoder)>;
struct SchemaWithSerializeFn
{
avro::Schema schema;
SerializeFn serialize;
};
/// Type names for different complex types (e.g. enums, fixed strings) must be unique. We use simple incremental number to give them different names.
static SchemaWithSerializeFn createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment);
std::vector<SerializeFn> serialize_fns;
avro::ValidSchema schema;
};
class AvroRowOutputFormat : public IRowOutputFormat
{
public:
AvroRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
virtual ~AvroRowOutputFormat() override;
String getName() const override { return "AvroRowOutputFormat"; }
void write(const Columns & columns, size_t row_num) override;
void writeField(const IColumn &, const IDataType &, size_t) override {}
virtual void writePrefix() override;
virtual void writeSuffix() override;
private:
FormatSettings settings;
AvroSerializer serializer;
avro::DataFileWriterBase file_writer;
};
}
#endif

View File

@ -20,14 +20,18 @@ public:
Block getTotals();
Block getExtremes();
bool isFinished() { return finished_processing; }
bool isFinished() { return finished_processing && queue.size() == 0; }
BlockStreamProfileInfo & getProfileInfo() { return info; }
void setRowsBeforeLimit(size_t rows_before_limit) override;
void finish() { finished_processing = true; }
void clearQueue() { queue.clear(); }
void finish()
{
finished_processing = true;
/// Clear queue in case if somebody is waiting lazy_format to push.
queue.clear();
}
protected:
void consume(Chunk chunk) override

View File

@ -4,6 +4,20 @@
namespace DB
{
/// Transform which can generate several chunks on every consumed.
/// It can be assumed that class is used in following way:
///
/// for (chunk : input_chunks)
/// {
/// transform.consume(chunk);
///
/// while (transform.canGenerate())
/// {
/// transformed_chunk = transform.generate();
/// ... (process transformed chunk)
/// }
/// }
///
class IInflatingTransform : public IProcessor
{
protected:

View File

@ -29,11 +29,14 @@ public:
void init(Pipe pipe); /// Simple init for single pipe
bool initialized() { return !processors.empty(); }
/// Type of logical data stream for simple transform.
/// Sometimes it's important to know which part of pipeline we are working for.
/// Example: ExpressionTransform need special logic for totals.
enum class StreamType
{
Main = 0,
Totals,
Extremes,
Main = 0, /// Stream for query data. There may be several streams of this type.
Totals, /// Stream for totals. No more then one.
Extremes, /// Stream for extremes. No more then one.
};
using ProcessorGetter = std::function<ProcessorPtr(const Block & header)>;

View File

@ -37,6 +37,8 @@ void ExpressionTransform::transform(Chunk & chunk)
if (on_totals)
{
/// Drop totals if both out stream and joined stream doesn't have ones.
/// See comment in ExpressionTransform.h
if (default_totals && !expression->hasTotalsInJoin())
return;

View File

@ -10,7 +10,11 @@ using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class ExpressionTransform : public ISimpleTransform
{
public:
ExpressionTransform(const Block & header_, ExpressionActionsPtr expression_, bool on_totals_ = false, bool default_totals_ = false);
ExpressionTransform(
const Block & header_,
ExpressionActionsPtr expression_,
bool on_totals_ = false,
bool default_totals_ = false);
String getName() const override { return "ExpressionTransform"; }
@ -20,6 +24,9 @@ protected:
private:
ExpressionActionsPtr expression;
bool on_totals;
/// This flag means that we have manually added totals to our pipeline.
/// It may happen in case if joined subquery has totals, but out string doesn't.
/// We need to join default values with subquery totals if we have them, or return empty chunk is haven't.
bool default_totals;
bool initialized = false;
};

View File

@ -8,6 +8,7 @@
#include <Compression/CompressedWriteBuffer.h>
#include <DataStreams/NativeBlockInputStream.h>
#include <DataStreams/NativeBlockOutputStream.h>
#include <Disks/DiskSpaceMonitor.h>
namespace ProfileEvents
@ -95,11 +96,11 @@ MergeSortingTransform::MergeSortingTransform(
const SortDescription & description_,
size_t max_merged_block_size_, UInt64 limit_,
size_t max_bytes_before_remerge_,
size_t max_bytes_before_external_sort_, const std::string & tmp_path_,
size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_,
size_t min_free_disk_space_)
: SortingTransform(header, description_, max_merged_block_size_, limit_)
, max_bytes_before_remerge(max_bytes_before_remerge_)
, max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_path(tmp_path_)
, max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_)
, min_free_disk_space(min_free_disk_space_) {}
Processors MergeSortingTransform::expandPipeline()
@ -172,10 +173,14 @@ void MergeSortingTransform::consume(Chunk chunk)
*/
if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort)
{
if (!enoughSpaceInDirectory(tmp_path, sum_bytes_in_blocks + min_free_disk_space))
throw Exception("Not enough space for external sort in " + tmp_path, ErrorCodes::NOT_ENOUGH_SPACE);
size_t size = sum_bytes_in_blocks + min_free_disk_space;
auto reservation = tmp_volume->reserve(size);
if (!reservation)
throw Exception("Not enough space for external sort in temporary storage", ErrorCodes::NOT_ENOUGH_SPACE);
const std::string tmp_path(reservation->getDisk()->getPath());
temporary_files.emplace_back(createTemporaryFile(tmp_path));
const std::string & path = temporary_files.back()->path();
merge_sorter = std::make_unique<MergeSorter>(std::move(chunks), description, max_merged_block_size, limit);
auto current_processor = std::make_shared<BufferingToFileTransform>(header_without_constants, log, path);

View File

@ -9,6 +9,9 @@
namespace DB
{
class Volume;
using VolumePtr = std::shared_ptr<Volume>;
class MergeSortingTransform : public SortingTransform
{
public:
@ -17,7 +20,7 @@ public:
const SortDescription & description_,
size_t max_merged_block_size_, UInt64 limit_,
size_t max_bytes_before_remerge_,
size_t max_bytes_before_external_sort_, const std::string & tmp_path_,
size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_,
size_t min_free_disk_space_);
String getName() const override { return "MergeSortingTransform"; }
@ -32,7 +35,7 @@ protected:
private:
size_t max_bytes_before_remerge;
size_t max_bytes_before_external_sort;
const std::string tmp_path;
VolumePtr tmp_volume;
size_t min_free_disk_space;
Logger * log = &Logger::get("MergeSortingTransform");

View File

@ -27,6 +27,8 @@
#include <Processors/Transforms/MergingAggregatedTransform.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
#include <Disks/DiskSpaceMonitor.h>
#include <Disks/DiskLocal.h>
#include <Poco/ConsoleChannel.h>
#include <Poco/AutoPtr.h>
#include <Common/CurrentThread.h>
@ -187,6 +189,8 @@ try
auto & factory = AggregateFunctionFactory::instance();
auto cur_path = Poco::Path().absolute().toString();
auto disk = std::make_shared<DiskLocal>("tmp", cur_path, 0);
auto tmp_volume = std::make_shared<Volume>("tmp", std::vector<DiskPtr>{disk}, 0);
auto execute_one_stream = [&](String msg, size_t num_threads, bool two_level, bool external)
{
@ -228,7 +232,7 @@ try
group_by_two_level_threshold_bytes,
max_bytes_before_external_group_by,
false, /// empty_result_for_aggregation_by_empty_set
cur_path, /// tmp_path
tmp_volume,
1, /// max_threads
0
);
@ -301,7 +305,7 @@ try
group_by_two_level_threshold_bytes,
max_bytes_before_external_group_by,
false, /// empty_result_for_aggregation_by_empty_set
cur_path, /// tmp_path
tmp_volume,
1, /// max_threads
0
);

View File

@ -1,6 +1,8 @@
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h>
#include <Disks/DiskSpaceMonitor.h>
#include <Disks/DiskLocal.h>
#include <Processors/IProcessor.h>
#include <Processors/ISource.h>
@ -116,7 +118,10 @@ try
Logger::root().setChannel(channel);
Logger::root().setLevel("trace");
auto execute_chain = [](
auto disk = std::make_shared<DiskLocal>("tmp", ".", 0);
auto tmp_volume = std::make_shared<Volume>("tmp", std::vector<DiskPtr>{disk}, 0);
auto execute_chain = [tmp_volume](
String msg,
UInt64 source_block_size,
UInt64 blocks_count,
@ -133,7 +138,9 @@ try
SortDescription description = {{0, 1, 1}};
auto transform = std::make_shared<MergeSortingTransform>(
source->getPort().getHeader(), description,
max_merged_block_size, limit, max_bytes_before_remerge, max_bytes_before_external_sort, ".", 0);
max_merged_block_size, limit,
max_bytes_before_remerge, max_bytes_before_external_sort,
tmp_volume, 0);
auto sink = std::make_shared<CheckSortedSink>();
connect(source->getPort(), transform->getInputs().front());

View File

@ -105,6 +105,9 @@ public:
/// Returns true if the storage replicates SELECT, INSERT and ALTER commands among replicas.
virtual bool supportsReplication() const { return false; }
/// Returns true if the storage supports parallel insert.
virtual bool supportsParallelInsert() const { return false; }
/// Returns true if the storage supports deduplication of inserted data blocks.
virtual bool supportsDeduplication() const { return false; }

View File

@ -140,12 +140,12 @@ BlockInputStreamPtr StorageLiveView::completeQuery(BlockInputStreams from)
block_context->makeQueryContext();
auto blocks_storage_id = getBlocksStorageID();
auto blocks_storage = StorageBlocks::createStorage(blocks_storage_id, parent_storage->getColumns(),
auto blocks_storage = StorageBlocks::createStorage(blocks_storage_id, getParentStorage()->getColumns(),
std::move(from), QueryProcessingStage::WithMergeableState);
block_context->addExternalTable(blocks_storage_id.table_name, blocks_storage);
InterpreterSelectQuery select(inner_blocks_query->clone(), *block_context, StoragePtr(), SelectQueryOptions(QueryProcessingStage::Complete));
InterpreterSelectQuery select(getInnerBlocksQuery(), *block_context, StoragePtr(), SelectQueryOptions(QueryProcessingStage::Complete));
BlockInputStreamPtr data = std::make_shared<MaterializingBlockInputStream>(select.execute().in);
/// Squashing is needed here because the view query can generate a lot of blocks
@ -255,16 +255,12 @@ StorageLiveView::StorageLiveView(
throw Exception("UNION is not supported for LIVE VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW);
inner_query = query.select->list_of_selects->children.at(0);
inner_blocks_query = inner_query->clone();
InterpreterSelectQuery(inner_blocks_query, *live_view_context, SelectQueryOptions().modify().analyze());
select_table_id = extractDependentTable(inner_blocks_query, global_context, table_id_.table_name, inner_subquery);
auto inner_query_tmp = inner_query->clone();
select_table_id = extractDependentTable(inner_query_tmp, global_context, table_id_.table_name, inner_subquery);
global_context.addDependency(select_table_id, table_id_);
parent_storage = local_context.getTable(select_table_id);
is_temporary = query.temporary;
temporary_live_view_timeout = local_context.getSettingsRef().temporary_live_view_timeout.totalSeconds();
@ -310,6 +306,22 @@ Block StorageLiveView::getHeader() const
return sample_block;
}
ASTPtr StorageLiveView::getInnerBlocksQuery()
{
std::lock_guard lock(sample_block_lock);
if (!inner_blocks_query)
{
inner_blocks_query = inner_query->clone();
/// Rewrite inner query with right aliases for JOIN.
/// It cannot be done in constructor or startup() because InterpreterSelectQuery may access table,
/// which is not loaded yet during server startup, so we do it lazily
InterpreterSelectQuery(inner_blocks_query, *live_view_context, SelectQueryOptions().modify().analyze());
auto table_id = getStorageID();
extractDependentTable(inner_blocks_query, global_context, table_id.table_name, inner_subquery);
}
return inner_blocks_query->clone();
}
bool StorageLiveView::getNewBlocks()
{
SipHash hash;
@ -458,6 +470,7 @@ void StorageLiveView::startup()
void StorageLiveView::shutdown()
{
global_context.removeDependency(select_table_id, getStorageID());
bool expected = false;
if (!shutdown_called.compare_exchange_strong(expected, true))
return;

View File

@ -53,7 +53,7 @@ public:
{
return StorageID("", getStorageID().table_name + "_blocks");
}
StoragePtr getParentStorage() const { return parent_storage; }
StoragePtr getParentStorage() const { return global_context.getTable(select_table_id); }
NameAndTypePair getColumn(const String & column_name) const override;
bool hasColumn(const String & column_name) const override;
@ -65,12 +65,7 @@ public:
return inner_subquery->clone();
return nullptr;
}
ASTPtr getInnerBlocksQuery() const
{
if (inner_blocks_query)
return inner_blocks_query->clone();
return nullptr;
}
ASTPtr getInnerBlocksQuery();
/// It is passed inside the query and solved at its level.
bool supportsSampling() const override { return true; }
@ -177,10 +172,9 @@ private:
ASTPtr inner_blocks_query; /// query over the mergeable blocks to produce final result
Context & global_context;
std::unique_ptr<Context> live_view_context;
StoragePtr parent_storage;
bool is_temporary = false;
/// Mutex to protect access to sample block
/// Mutex to protect access to sample block and inner_blocks_query
mutable std::mutex sample_block_lock;
mutable Block sample_block;

View File

@ -101,6 +101,7 @@ namespace ErrorCodes
extern const int UNKNOWN_SETTING;
extern const int READONLY_SETTING;
extern const int ABORTED;
extern const int UNEXPECTED_AST_STRUCTURE;
}
@ -629,7 +630,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new
{
auto new_ttl_entry = create_ttl_entry(ast);
if (!only_check)
column_ttl_entries_by_name.emplace(name, new_ttl_entry);
column_ttl_entries_by_name[name] = new_ttl_entry;
}
}
}
@ -637,36 +638,35 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new
if (new_ttl_table_ast)
{
std::vector<TTLEntry> update_move_ttl_entries;
ASTPtr update_ttl_table_ast = nullptr;
TTLEntry update_ttl_table_entry;
TTLEntry update_rows_ttl_entry;
bool seen_delete_ttl = false;
for (auto ttl_element_ptr : new_ttl_table_ast->children)
{
ASTTTLElement & ttl_element = static_cast<ASTTTLElement &>(*ttl_element_ptr);
if (ttl_element.destination_type == PartDestinationType::DELETE)
const auto * ttl_element = ttl_element_ptr->as<ASTTTLElement>();
if (!ttl_element)
throw Exception("Unexpected AST element in TTL expression", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
if (ttl_element->destination_type == PartDestinationType::DELETE)
{
if (seen_delete_ttl)
{
throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION);
}
auto new_ttl_table_entry = create_ttl_entry(ttl_element.children[0]);
auto new_rows_ttl_entry = create_ttl_entry(ttl_element->children[0]);
if (!only_check)
{
update_ttl_table_ast = ttl_element.children[0];
update_ttl_table_entry = new_ttl_table_entry;
}
update_rows_ttl_entry = new_rows_ttl_entry;
seen_delete_ttl = true;
}
else
{
auto new_ttl_entry = create_ttl_entry(ttl_element.children[0]);
auto new_ttl_entry = create_ttl_entry(ttl_element->children[0]);
new_ttl_entry.entry_ast = ttl_element_ptr;
new_ttl_entry.destination_type = ttl_element.destination_type;
new_ttl_entry.destination_name = ttl_element.destination_name;
new_ttl_entry.destination_type = ttl_element->destination_type;
new_ttl_entry.destination_name = ttl_element->destination_name;
if (!new_ttl_entry.getDestination(getStoragePolicy()))
{
String message;
@ -684,8 +684,8 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new
if (!only_check)
{
ttl_table_entry = update_ttl_table_entry;
ttl_table_ast = update_ttl_table_ast;
rows_ttl_entry = update_rows_ttl_entry;
ttl_table_ast = new_ttl_table_ast;
auto move_ttl_entries_lock = std::lock_guard<std::mutex>(move_ttl_entries_mutex);
move_ttl_entries = update_move_ttl_entries;

View File

@ -576,8 +576,10 @@ public:
bool hasSortingKey() const { return !sorting_key_columns.empty(); }
bool hasPrimaryKey() const { return !primary_key_columns.empty(); }
bool hasSkipIndices() const { return !skip_indices.empty(); }
bool hasTableTTL() const { return ttl_table_ast != nullptr; }
bool hasAnyColumnTTL() const { return !column_ttl_entries_by_name.empty(); }
bool hasAnyMoveTTL() const { return !move_ttl_entries.empty(); }
bool hasRowsTTL() const { return !rows_ttl_entry.isEmpty(); }
/// Check that the part is not broken and calculate the checksums for it if they are not present.
MutableDataPartPtr loadPartAndFixMetadata(const DiskPtr & disk, const String & relative_path);
@ -735,6 +737,8 @@ public:
/// Checks if given part already belongs destination disk or volume for this rule.
bool isPartInDestination(const StoragePolicyPtr & policy, const MergeTreeDataPart & part) const;
bool isEmpty() const { return expression == nullptr; }
};
std::optional<TTLEntry> selectTTLEntryForTTLInfos(const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const;
@ -742,7 +746,7 @@ public:
using TTLEntriesByName = std::unordered_map<String, TTLEntry>;
TTLEntriesByName column_ttl_entries_by_name;
TTLEntry ttl_table_entry;
TTLEntry rows_ttl_entry;
/// This mutex is required for background move operations which do not obtain global locks.
mutable std::mutex move_ttl_entries_mutex;

View File

@ -278,8 +278,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
}
if (data.hasTableTTL())
updateTTL(data.ttl_table_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
if (data.hasRowsTTL())
updateTTL(data.rows_ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
for (const auto & [name, ttl_entry] : data.column_ttl_entries_by_name)
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);

View File

@ -217,7 +217,7 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
per_part_sum_marks.push_back(sum_marks);
per_part_columns_lock.emplace_back(part.data_part->columns_lock);
per_part_columns_lock.emplace_back(part.data_part, part.data_part->columns_lock);
auto [required_columns, required_pre_columns, should_reorder] =
getReadTaskColumns(data, part.data_part, column_names, prewhere_info, check_columns);

View File

@ -3,6 +3,7 @@
#include <Core/NamesAndTypes.h>
#include <Storages/MergeTree/RangesInDataPart.h>
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/SelectQueryInfo.h>
#include <mutex>
@ -93,7 +94,7 @@ private:
const size_t threads, const size_t sum_marks, std::vector<size_t> per_part_sum_marks,
RangesInDataParts & parts, const size_t min_marks_for_concurrent_read);
std::vector<std::shared_lock<std::shared_mutex>> per_part_columns_lock;
std::vector<std::pair<MergeTreeData::DataPartPtr, std::shared_lock<std::shared_mutex>>> per_part_columns_lock;
const MergeTreeData & data;
Names column_names;
bool do_not_steal_tasks;

View File

@ -55,15 +55,6 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr
ttl_table = formattedAST(data.ttl_table_ast);
std::ostringstream ttl_move_stream;
for (const auto & ttl_entry : data.move_ttl_entries)
{
if (ttl_move_stream.tellp() > 0)
ttl_move_stream << ", ";
ttl_move_stream << formattedAST(ttl_entry.entry_ast);
}
ttl_move = ttl_move_stream.str();
skip_indices = data.getIndices().toString();
if (data.canUseAdaptiveGranularity())
index_granularity_bytes = data_settings->index_granularity_bytes;
@ -95,9 +86,6 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const
if (!ttl_table.empty())
out << "ttl: " << ttl_table << "\n";
if (!ttl_move.empty())
out << "move ttl: " << ttl_move << "\n";
if (!skip_indices.empty())
out << "indices: " << skip_indices << "\n";
@ -139,9 +127,6 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in)
if (checkString("ttl: ", in))
in >> ttl_table >> "\n";
if (checkString("move ttl: ", in))
in >> ttl_move >> "\n";
if (checkString("indices: ", in))
in >> skip_indices >> "\n";
@ -252,21 +237,6 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl
ErrorCodes::METADATA_MISMATCH);
}
if (ttl_move != from_zk.ttl_move)
{
if (allow_alter)
{
diff.ttl_move_changed = true;
diff.new_ttl_move = from_zk.ttl_move;
}
else
throw Exception(
"Existing table metadata in ZooKeeper differs in move TTL."
" Stored in ZooKeeper: " + from_zk.ttl_move +
", local: " + ttl_move,
ErrorCodes::METADATA_MISMATCH);
}
if (skip_indices != from_zk.skip_indices)
{
if (allow_alter)

View File

@ -28,7 +28,6 @@ struct ReplicatedMergeTreeTableMetadata
String skip_indices;
String constraints;
String ttl_table;
String ttl_move;
UInt64 index_granularity_bytes;
ReplicatedMergeTreeTableMetadata() = default;
@ -54,12 +53,9 @@ struct ReplicatedMergeTreeTableMetadata
bool ttl_table_changed = false;
String new_ttl_table;
bool ttl_move_changed = false;
String new_ttl_move;
bool empty() const
{
return !sorting_key_changed && !skip_indices_changed && !ttl_table_changed && !constraints_changed && !ttl_move_changed;
return !sorting_key_changed && !skip_indices_changed && !ttl_table_changed && !constraints_changed;
}
};

View File

@ -49,6 +49,12 @@
#include <filesystem>
namespace
{
static const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY = 1;
static const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS = 2;
}
namespace DB
{
@ -63,6 +69,7 @@ namespace ErrorCodes
extern const int TYPE_MISMATCH;
extern const int NO_SUCH_COLUMN_IN_TABLE;
extern const int TOO_MANY_ROWS;
extern const int UNABLE_TO_SKIP_UNUSED_SHARDS;
}
namespace ActionLocks
@ -340,12 +347,15 @@ BlockInputStreams StorageDistributed::read(
: ClusterProxy::SelectStreamFactory(
header, processed_stage, QualifiedTableName{remote_database, remote_table}, scalars, has_virtual_shard_num_column, context.getExternalTables());
UInt64 force = settings.force_optimize_skip_unused_shards;
if (settings.optimize_skip_unused_shards)
{
ClusterPtr smaller_cluster;
auto table_id = getStorageID();
if (has_sharding_key)
{
auto smaller_cluster = skipUnusedShards(cluster, query_info);
auto table_id = getStorageID();
smaller_cluster = skipUnusedShards(cluster, query_info);
if (smaller_cluster)
{
@ -354,10 +364,27 @@ BlockInputStreams StorageDistributed::read(
"Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): "
" " << makeFormattedListOfShards(cluster));
}
else
}
if (!smaller_cluster)
{
LOG_DEBUG(log, "Reading from " << table_id.getNameForLogs() <<
(has_sharding_key ? "" : "(no sharding key)") << ": "
"Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - "
"the query will be sent to all shards of the cluster");
if (force)
{
LOG_DEBUG(log, "Reading from " << table_id.getNameForLogs() << ": "
"Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster");
std::stringstream exception_message;
if (has_sharding_key)
exception_message << "No sharding key";
else
exception_message << "Sharding key " << sharding_key_column_name << " is not used";
if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS)
throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS);
if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY && has_sharding_key)
throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS);
}
}
}
@ -548,11 +575,6 @@ void StorageDistributed::ClusterNodeData::shutdownAndDropAllData()
/// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr`
ClusterPtr StorageDistributed::skipUnusedShards(ClusterPtr cluster, const SelectQueryInfo & query_info)
{
if (!has_sharding_key)
{
throw Exception("Internal error: cannot determine shards of a distributed table if no sharding expression is supplied", ErrorCodes::LOGICAL_ERROR);
}
const auto & select = query_info.query->as<ASTSelectQuery &>();
if (!select.prewhere() && !select.where())

Some files were not shown because too many files have changed in this diff Show More