diff --git a/.gitmodules b/.gitmodules index f6f2c652004..dab5b985760 100644 --- a/.gitmodules +++ b/.gitmodules @@ -140,3 +140,7 @@ [submodule "contrib/ryu"] path = contrib/ryu url = https://github.com/ClickHouse-Extras/ryu.git +[submodule "contrib/avro"] + path = contrib/avro + url = https://github.com/ClickHouse-Extras/avro.git + ignore = untracked diff --git a/AUTHORS b/AUTHORS index db769ac16ce..12838d7fa14 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,43 +1,2 @@ -The following authors have created the source code of "ClickHouse" -published and distributed by YANDEX LLC as the owner: - -Alexander Makarov -Alexander Prudaev -Alexey Arno -Alexey Milovidov -Alexey Tronov -Alexey Vasiliev -Alexey Zatelepin -Amy Krishnevsky -Andrey M -Andrey Mironov -Andrey Urusov -Anton Tikhonov -Dmitry Bilunov -Dmitry Galuza -Eugene Konkov -Evgeniy Gatov -Ilya Khomutov -Ilya Korolev -Ivan Blinkov -Maxim Nikulin -Michael Kolupaev -Michael Razuvaev -Nikolai Kochetov -Nikolay Vasiliev -Nikolay Volosatov -Pavel Artemkin -Pavel Kartaviy -Roman Nozdrin -Roman Peshkurov -Sergey Fedorov -Sergey Lazarev -Sergey Magidovich -Sergey Serebryanik -Sergey Veletskiy -Vasily Okunev -Vitaliy Lyudvichenko -Vladimir Chebotarev -Vsevolod Orlov -Vyacheslav Alipov -Yuriy Galitskiy +To see the list of authors who created the source code of ClickHouse, published and distributed by YANDEX LLC as the owner, +run "SELECT * FROM system.contributors;" query on any ClickHouse server. diff --git a/CMakeLists.txt b/CMakeLists.txt index d37cdfc3af8..c194ea5bdc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -352,7 +352,7 @@ include (cmake/find/simdjson.cmake) include (cmake/find/rapidjson.cmake) include (cmake/find/fastops.cmake) include (cmake/find/orc.cmake) -include (cmake/find/replxx.cmake) +include (cmake/find/avro.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 89a6c1cd263..264d88f8553 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,12 +19,12 @@ In order for us (YANDEX LLC) to accept patches and other contributions from you, By adopting the CLA, you state the following: * You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA, -* You has read the terms and conditions of the CLA and agree with them in full, +* You have read the terms and conditions of the CLA and agree with them in full, * You are legally able to provide and license your contributions as stated, * We may use your contributions for our open source projects and for any other our project too, -* We rely on your assurances concerning the rights of third parties in relation to your contributes. +* We rely on your assurances concerning the rights of third parties in relation to your contributions. -If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you has already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA. +If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you have already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA. If you have already adopted terms and conditions of the CLA, you are able to provide your contributes. When you submit your pull request, please add the following information into it: diff --git a/LICENSE b/LICENSE index ef36c40c4b0..f79538892b8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2016-2019 Yandex LLC +Copyright 2016-2020 Yandex LLC Apache License Version 2.0, January 2004 @@ -188,7 +188,7 @@ Copyright 2016-2019 Yandex LLC same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2016-2019 Yandex LLC + Copyright 2016-2020 Yandex LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/cmake/find/avro.cmake b/cmake/find/avro.cmake new file mode 100644 index 00000000000..cdb3fc84d3d --- /dev/null +++ b/cmake/find/avro.cmake @@ -0,0 +1,28 @@ +option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES}) + +if (ENABLE_AVRO) + +option (USE_INTERNAL_AVRO_LIBRARY "Set to FALSE to use system avro library instead of bundled" ${NOT_UNBUNDLED}) + +if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/avro/lang/c++/CMakeLists.txt") + if(USE_INTERNAL_AVRO_LIBRARY) + message(WARNING "submodule contrib/avro is missing. to fix try run: \n git submodule update --init --recursive") + endif() + set(MISSING_INTERNAL_AVRO_LIBRARY 1) + set(USE_INTERNAL_AVRO_LIBRARY 0) +endif() + +if (NOT USE_INTERNAL_AVRO_LIBRARY) +elseif(NOT MISSING_INTERNAL_AVRO_LIBRARY) + include(cmake/find/snappy.cmake) + set(AVROCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/avro/lang/c++/include") + set(AVROCPP_LIBRARY avrocpp) +endif () + +if (AVROCPP_LIBRARY AND AVROCPP_INCLUDE_DIR) + set(USE_AVRO 1) +endif() + +endif() + +message (STATUS "Using avro=${USE_AVRO}: ${AVROCPP_INCLUDE_DIR} : ${AVROCPP_LIBRARY}") diff --git a/cmake/find/boost.cmake b/cmake/find/boost.cmake index 6776d0cea06..ec10a34d839 100644 --- a/cmake/find/boost.cmake +++ b/cmake/find/boost.cmake @@ -31,6 +31,7 @@ if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY) set (Boost_SYSTEM_LIBRARY boost_system_internal) set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal) set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY}) + set (Boost_IOSTREAMS_LIBRARY boost_iostreams_internal) set (Boost_REGEX_LIBRARY boost_regex_internal) set (Boost_INCLUDE_DIRS) @@ -48,4 +49,4 @@ if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY) list (APPEND Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/boost") endif () -message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_REGEX_LIBRARY}") +message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_IOSTREAMS_LIBRARY},${Boost_REGEX_LIBRARY}") diff --git a/cmake/find/poco.cmake b/cmake/find/poco.cmake index b44d2932276..0c676d374f1 100644 --- a/cmake/find/poco.cmake +++ b/cmake/find/poco.cmake @@ -14,6 +14,7 @@ if (NOT ENABLE_LIBRARIES) set (ENABLE_POCO_REDIS ${ENABLE_LIBRARIES} CACHE BOOL "") set (ENABLE_POCO_ODBC ${ENABLE_LIBRARIES} CACHE BOOL "") set (ENABLE_POCO_SQL ${ENABLE_LIBRARIES} CACHE BOOL "") + set (ENABLE_POCO_JSON ${ENABLE_LIBRARIES} CACHE BOOL "") endif () set (POCO_COMPONENTS Net XML SQL Data) @@ -34,6 +35,9 @@ if (NOT DEFINED ENABLE_POCO_ODBC OR ENABLE_POCO_ODBC) list (APPEND POCO_COMPONENTS DataODBC) list (APPEND POCO_COMPONENTS SQLODBC) endif () +if (NOT DEFINED ENABLE_POCO_JSON OR ENABLE_POCO_JSON) + list (APPEND POCO_COMPONENTS JSON) +endif () if (NOT USE_INTERNAL_POCO_LIBRARY) find_package (Poco COMPONENTS ${POCO_COMPONENTS}) @@ -112,6 +116,11 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) endif () endif () + if (NOT DEFINED ENABLE_POCO_JSON OR ENABLE_POCO_JSON) + set (Poco_JSON_LIBRARY PocoJSON) + set (Poco_JSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/poco/JSON/include/") + endif () + if (OPENSSL_FOUND AND (NOT DEFINED ENABLE_POCO_NETSSL OR ENABLE_POCO_NETSSL)) set (Poco_NetSSL_LIBRARY PocoNetSSL ${OPENSSL_LIBRARIES}) set (Poco_Crypto_LIBRARY PocoCrypto ${OPENSSL_LIBRARIES}) @@ -145,8 +154,11 @@ endif () if (Poco_SQLODBC_LIBRARY AND ODBC_FOUND) set (USE_POCO_SQLODBC 1) endif () +if (Poco_JSON_LIBRARY) + set (USE_POCO_JSON 1) +endif () -message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY},${Poco_Redis_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}") +message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY},${Poco_Redis_LIBRARY},${Poco_JSON_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}, JSON=${USE_POCO_JSON}") # How to make sutable poco: # use branch: diff --git a/cmake/find/replxx.cmake b/cmake/find/replxx.cmake deleted file mode 100644 index 3a0e5917b04..00000000000 --- a/cmake/find/replxx.cmake +++ /dev/null @@ -1,40 +0,0 @@ -option (ENABLE_REPLXX "Enable replxx support" ${NOT_UNBUNDLED}) - -if (ENABLE_REPLXX) - option (USE_INTERNAL_REPLXX "Use internal replxx library" ${NOT_UNBUNDLED}) - - if (USE_INTERNAL_REPLXX AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/replxx/README.md") - message (WARNING "submodule contrib/replxx is missing. to fix try run: \n git submodule update --init --recursive") - set (USE_INTERNAL_REPLXX 0) - endif () - - if (NOT USE_INTERNAL_REPLXX) - find_library(LIBRARY_REPLXX NAMES replxx replxx-static) - find_path(INCLUDE_REPLXX replxx.hxx) - - add_library(replxx UNKNOWN IMPORTED) - set_property(TARGET replxx PROPERTY IMPORTED_LOCATION ${LIBRARY_REPLXX}) - target_include_directories(replxx PUBLIC ${INCLUDE_REPLXX}) - - set(CMAKE_REQUIRED_LIBRARIES replxx) - check_cxx_source_compiles( - " - #include - int main() { - replxx::Replxx rx; - } - " - EXTERNAL_REPLXX_WORKS - ) - - if (NOT EXTERNAL_REPLXX_WORKS) - message (FATAL_ERROR "replxx is unusable: ${LIBRARY_REPLXX} ${INCLUDE_REPLXX}") - endif () - endif () - - set(USE_REPLXX 1) - - message (STATUS "Using replxx") -else () - set(USE_REPLXX 0) -endif () diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 8a3dd7f7634..13947425f7b 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -53,6 +53,7 @@ if (SANITIZE) set (USE_CAPNP 0 CACHE BOOL "") set (USE_INTERNAL_ORC_LIBRARY 0 CACHE BOOL "") set (USE_ORC 0 CACHE BOOL "") + set (USE_AVRO 0 CACHE BOOL "") set (ENABLE_SSL 0 CACHE BOOL "") elseif (SANITIZE STREQUAL "thread") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f81d616cddd..7c9db5bb06f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -146,6 +146,20 @@ if (ENABLE_ICU AND USE_INTERNAL_ICU_LIBRARY) add_subdirectory (icu-cmake) endif () +if(USE_INTERNAL_SNAPPY_LIBRARY) + set(SNAPPY_BUILD_TESTS 0 CACHE INTERNAL "") + if (NOT MAKE_STATIC_LIBRARIES) + set(BUILD_SHARED_LIBS 1) # TODO: set at root dir + endif() + + add_subdirectory(snappy) + + set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy") + if(SANITIZE STREQUAL "undefined") + target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined) + endif() +endif() + if (USE_INTERNAL_PARQUET_LIBRARY) if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE) # We dont use arrow's cmakefiles because they uses too many depends and download some libs in compile time @@ -189,20 +203,6 @@ if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE) endif() else() - if(USE_INTERNAL_SNAPPY_LIBRARY) - set(SNAPPY_BUILD_TESTS 0 CACHE INTERNAL "") - if (NOT MAKE_STATIC_LIBRARIES) - set(BUILD_SHARED_LIBS 1) # TODO: set at root dir - endif() - - add_subdirectory(snappy) - - set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy") - if(SANITIZE STREQUAL "undefined") - target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined) - endif() - endif() - add_subdirectory(arrow-cmake) # The library is large - avoid bloat. @@ -212,6 +212,10 @@ else() endif() endif() +if (USE_INTERNAL_AVRO_LIBRARY) + add_subdirectory(avro-cmake) +endif() + if (USE_INTERNAL_POCO_LIBRARY) set (POCO_VERBOSE_MESSAGES 0 CACHE INTERNAL "") set (save_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) @@ -332,6 +336,4 @@ if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() -if (USE_INTERNAL_REPLXX) - add_subdirectory (replxx-cmake) -endif() +add_subdirectory(replxx-cmake) diff --git a/contrib/avro b/contrib/avro new file mode 160000 index 00000000000..5b2752041c8 --- /dev/null +++ b/contrib/avro @@ -0,0 +1 @@ +Subproject commit 5b2752041c8d2f75eb5c1dbec8b4c25fc0e24d12 diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f544b3c50cd --- /dev/null +++ b/contrib/avro-cmake/CMakeLists.txt @@ -0,0 +1,70 @@ +set(AVROCPP_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++) +set(AVROCPP_INCLUDE_DIR ${AVROCPP_ROOT_DIR}/api) +set(AVROCPP_SOURCE_DIR ${AVROCPP_ROOT_DIR}/impl) + +set (CMAKE_CXX_STANDARD 17) + +if (EXISTS ${AVROCPP_ROOT_DIR}/../../share/VERSION.txt) + file(READ "${AVROCPP_ROOT_DIR}/../../share/VERSION.txt" + AVRO_VERSION) +endif() + +string(REPLACE "\n" "" AVRO_VERSION ${AVRO_VERSION}) +set (AVRO_VERSION_MAJOR ${AVRO_VERSION}) +set (AVRO_VERSION_MINOR "0") + +set (AVROCPP_SOURCE_FILES + ${AVROCPP_SOURCE_DIR}/Compiler.cc + ${AVROCPP_SOURCE_DIR}/Node.cc + ${AVROCPP_SOURCE_DIR}/LogicalType.cc + ${AVROCPP_SOURCE_DIR}/NodeImpl.cc + ${AVROCPP_SOURCE_DIR}/ResolverSchema.cc + ${AVROCPP_SOURCE_DIR}/Schema.cc + ${AVROCPP_SOURCE_DIR}/Types.cc + ${AVROCPP_SOURCE_DIR}/ValidSchema.cc + ${AVROCPP_SOURCE_DIR}/Zigzag.cc + ${AVROCPP_SOURCE_DIR}/BinaryEncoder.cc + ${AVROCPP_SOURCE_DIR}/BinaryDecoder.cc + ${AVROCPP_SOURCE_DIR}/Stream.cc + ${AVROCPP_SOURCE_DIR}/FileStream.cc + ${AVROCPP_SOURCE_DIR}/Generic.cc + ${AVROCPP_SOURCE_DIR}/GenericDatum.cc + ${AVROCPP_SOURCE_DIR}/DataFile.cc + ${AVROCPP_SOURCE_DIR}/parsing/Symbol.cc + ${AVROCPP_SOURCE_DIR}/parsing/ValidatingCodec.cc + ${AVROCPP_SOURCE_DIR}/parsing/JsonCodec.cc + ${AVROCPP_SOURCE_DIR}/parsing/ResolvingDecoder.cc + ${AVROCPP_SOURCE_DIR}/json/JsonIO.cc + ${AVROCPP_SOURCE_DIR}/json/JsonDom.cc + ${AVROCPP_SOURCE_DIR}/Resolver.cc + ${AVROCPP_SOURCE_DIR}/Validator.cc + ) + +add_library (avrocpp ${AVROCPP_SOURCE_FILES}) +set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}) + +target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR}) + +target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS}) +target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY}) + +if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY) + target_compile_definitions (avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE) + target_include_directories (avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR}) + target_link_libraries (avrocpp ${SNAPPY_LIBRARY}) +endif () + +if (COMPILER_GCC) + set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor) +elseif (COMPILER_CLANG) + set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor) +endif () + +target_compile_options(avrocpp PRIVATE ${SUPPRESS_WARNINGS}) + +# create a symlink to include headers with +ADD_CUSTOM_TARGET(avro_symlink_headers ALL + COMMAND ${CMAKE_COMMAND} -E make_directory ${AVROCPP_ROOT_DIR}/include + COMMAND ${CMAKE_COMMAND} -E create_symlink ${AVROCPP_ROOT_DIR}/api ${AVROCPP_ROOT_DIR}/include/avro +) +add_dependencies(avrocpp avro_symlink_headers) \ No newline at end of file diff --git a/contrib/boost b/contrib/boost index 830e51edb59..86be2aef20b 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit 830e51edb59c4f37a8638138581e1e56c29ac44f +Subproject commit 86be2aef20bee2356b744e5569eed6eaded85dbe diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index d9a8a70ef17..54dcd750320 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -37,3 +37,8 @@ target_link_libraries(boost_filesystem_internal PRIVATE boost_system_internal) if (USE_INTERNAL_PARQUET_LIBRARY) add_boost_lib(regex) endif() + +if (USE_INTERNAL_AVRO_LIBRARY) + add_boost_lib(iostreams) + target_link_libraries(boost_iostreams_internal PUBLIC ${ZLIB_LIBRARIES}) +endif() diff --git a/contrib/murmurhash/include/murmurhash3.h b/contrib/murmurhash/include/murmurhash3.h index 256da1ad9da..eb16425576a 100644 --- a/contrib/murmurhash/include/murmurhash3.h +++ b/contrib/murmurhash/include/murmurhash3.h @@ -23,6 +23,10 @@ typedef unsigned __int64 uint64_t; #endif // !defined(_MSC_VER) +#ifdef __cplusplus +extern "C" { +#endif + //----------------------------------------------------------------------------- void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); @@ -32,3 +36,7 @@ void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); //----------------------------------------------------------------------------- + +#ifdef __cplusplus +} +#endif diff --git a/contrib/replxx-cmake/CMakeLists.txt b/contrib/replxx-cmake/CMakeLists.txt index 1b27fd53070..1240eb56b39 100644 --- a/contrib/replxx-cmake/CMakeLists.txt +++ b/contrib/replxx-cmake/CMakeLists.txt @@ -1,18 +1,57 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx") +option (ENABLE_REPLXX "Enable replxx support" ${ENABLE_LIBRARIES}) -set(SRCS - ${LIBRARY_DIR}/src/conversion.cxx - ${LIBRARY_DIR}/src/escape.cxx - ${LIBRARY_DIR}/src/history.cxx - ${LIBRARY_DIR}/src/io.cxx - ${LIBRARY_DIR}/src/prompt.cxx - ${LIBRARY_DIR}/src/replxx.cxx - ${LIBRARY_DIR}/src/replxx_impl.cxx - ${LIBRARY_DIR}/src/util.cxx - ${LIBRARY_DIR}/src/wcwidth.cpp - ${LIBRARY_DIR}/src/ConvertUTF.cpp -) +if (ENABLE_REPLXX) + option (USE_INTERNAL_REPLXX "Use internal replxx library" ${NOT_UNBUNDLED}) -add_library(replxx ${SRCS}) -target_include_directories(replxx PUBLIC ${LIBRARY_DIR}/include) -target_compile_options(replxx PUBLIC -Wno-documentation) + if (USE_INTERNAL_REPLXX) + set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/replxx") + + set(SRCS + ${LIBRARY_DIR}/src/conversion.cxx + ${LIBRARY_DIR}/src/ConvertUTF.cpp + ${LIBRARY_DIR}/src/escape.cxx + ${LIBRARY_DIR}/src/history.cxx + ${LIBRARY_DIR}/src/io.cxx + ${LIBRARY_DIR}/src/prompt.cxx + ${LIBRARY_DIR}/src/replxx_impl.cxx + ${LIBRARY_DIR}/src/replxx.cxx + ${LIBRARY_DIR}/src/util.cxx + ${LIBRARY_DIR}/src/wcwidth.cpp + ) + + add_library (replxx ${SRCS}) + target_include_directories(replxx PUBLIC ${LIBRARY_DIR}/include) + else () + find_library(LIBRARY_REPLXX NAMES replxx replxx-static) + find_path(INCLUDE_REPLXX replxx.hxx) + + add_library(replxx UNKNOWN IMPORTED) + set_property(TARGET replxx PROPERTY IMPORTED_LOCATION ${LIBRARY_REPLXX}) + target_include_directories(replxx PUBLIC ${INCLUDE_REPLXX}) + + set(CMAKE_REQUIRED_LIBRARIES replxx) + check_cxx_source_compiles( + " + #include + int main() { + replxx::Replxx rx; + } + " + EXTERNAL_REPLXX_WORKS + ) + + if (NOT EXTERNAL_REPLXX_WORKS) + message (FATAL_ERROR "replxx is unusable: ${LIBRARY_REPLXX} ${INCLUDE_REPLXX}") + endif () + endif () + + target_compile_options(replxx PUBLIC -Wno-documentation) + target_compile_definitions(replxx PUBLIC USE_REPLXX=1) + + message (STATUS "Using replxx") +else () + add_library(replxx INTERFACE) + target_compile_definitions(replxx INTERFACE USE_REPLXX=0) + + message (STATUS "Not using replxx (Beware! Runtime fallback to readline is possible!)") +endif () diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index d87ae447faa..eeda7aa6a1f 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -504,6 +504,10 @@ if (USE_POCO_NETSSL) dbms_target_link_libraries (PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) endif() +if (USE_POCO_JSON) + dbms_target_link_libraries (PRIVATE ${Poco_JSON_LIBRARY}) +endif() + dbms_target_link_libraries (PRIVATE ${Poco_Foundation_LIBRARY}) if (USE_ICU) @@ -522,6 +526,11 @@ if (USE_PARQUET) endif () endif () +if (USE_AVRO) + dbms_target_link_libraries(PRIVATE ${AVROCPP_LIBRARY}) + dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${AVROCPP_INCLUDE_DIR}) +endif () + if (OPENSSL_CRYPTO_LIBRARY) dbms_target_link_libraries (PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) target_link_libraries (clickhouse_common_io PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) diff --git a/dbms/benchmark/clickhouse/benchmark-chyt.sh b/dbms/benchmark/clickhouse/benchmark-chyt.sh new file mode 100755 index 00000000000..efc790a029a --- /dev/null +++ b/dbms/benchmark/clickhouse/benchmark-chyt.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +QUERIES_FILE="queries.sql" +TABLE=$1 +TRIES=3 + +cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do + + echo -n "[" + for i in $(seq 1 $TRIES); do + while true; do + RES=$(command time -f %e -o /dev/stdout curl -sS --location-trusted -H "Authorization: OAuth $YT_TOKEN" "$YT_PROXY.yt.yandex.net/query?default_format=Null&database=*$YT_CLIQUE_ID" --data-binary @- <<< "$query" 2>/dev/null) && break; + done + + [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" + [[ "$i" != $TRIES ]] && echo -n ", " + done + echo "]," +done diff --git a/dbms/benchmark/clickhouse/benchmark-yql.sh b/dbms/benchmark/clickhouse/benchmark-yql.sh new file mode 100755 index 00000000000..7d30d39e7d3 --- /dev/null +++ b/dbms/benchmark/clickhouse/benchmark-yql.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +QUERIES_FILE="queries.sql" +TABLE=$1 +TRIES=3 + +cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do + + echo -n "[" + for i in $(seq 1 $TRIES); do + while true; do + RES=$(command time -f %e -o time ./yql --clickhouse --syntax-version 1 -f empty <<< "USE chyt.hume; PRAGMA max_memory_usage = 100000000000; PRAGMA max_memory_usage_for_all_queries = 100000000000; $query" >/dev/null 2>&1 && cat time) && break; + done + + [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" + [[ "$i" != $TRIES ]] && echo -n ", " + done + echo "]," +done diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index d4c157ac3b0..11ade559a8d 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -4,7 +4,7 @@ set(CLICKHOUSE_CLIENT_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Suggest.cpp ) -set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) +set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${Boost_PROGRAM_OPTIONS_LIBRARY}) include(CheckSymbolExists) check_symbol_exists(readpassphrase readpassphrase.h HAVE_READPASSPHRASE) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 76a225e2597..6c5f9e5fb30 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -2,6 +2,12 @@ #include "ConnectionParameters.h" #include "Suggest.h" +#if USE_REPLXX +# include +#else +# include +#endif + #include #include #include @@ -19,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -496,7 +501,11 @@ private: if (!history_file.empty() && !Poco::File(history_file).exists()) Poco::File(history_file).createFile(); - LineReader lr(&Suggest::instance(), history_file, '\\', config().has("multiline") ? ';' : 0); +#if USE_REPLXX + ReplxxLineReader lr(Suggest::instance(), history_file, '\\', config().has("multiline") ? ';' : 0); +#else + LineReader lr(history_file, '\\', config().has("multiline") ? ';' : 0); +#endif do { @@ -504,6 +513,12 @@ private: if (input.empty()) break; + if (input.ends_with("\\G")) + { + input.resize(input.size() - 2); + has_vertical_output_suffix = true; + } + try { if (!process(input)) diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index cac561117b4..5cfceaeb592 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -111,7 +111,7 @@ void LocalServer::tryInitPath() /// In case of empty path set paths to helpful directories std::string cd = Poco::Path::current(); - context->setTemporaryPath(cd + "tmp"); + context->setTemporaryStorage(cd + "tmp"); context->setFlagsPath(cd + "flags"); context->setUserFilesPath(""); // user's files are everywhere } diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index b2b3298693e..6c7a7383654 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -351,7 +352,8 @@ void HTTPHandler::processQuery( if (buffer_until_eof) { - std::string tmp_path_template = context.getTemporaryPath() + "http_buffers/"; + const std::string tmp_path(context.getTemporaryVolume()->getNextDisk()->getPath()); + const std::string tmp_path_template(tmp_path + "http_buffers/"); auto create_tmp_disk_buffer = [tmp_path_template] (const WriteBufferPtr &) { @@ -590,7 +592,11 @@ void HTTPHandler::processQuery( customizeContext(context); executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, - [&response] (const String & content_type) { response.setContentType(content_type); }, + [&response] (const String & content_type, const String & format) + { + response.setContentType(content_type); + response.add("X-ClickHouse-Format", format); + }, [&response] (const String & current_query_id) { response.add("X-ClickHouse-Query-Id", current_query_id); }); if (used_output.hasDelayed()) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 64a78702bf0..9dd107f9d5f 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -282,7 +282,8 @@ void MySQLHandler::comQuery(ReadBuffer & payload) else { bool with_output = false; - std::function set_content_type = [&with_output](const String &) -> void { + std::function set_content_type_and_format = [&with_output](const String &, const String &) -> void + { with_output = true; }; @@ -305,7 +306,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload) ReadBufferFromString replacement(replacement_query); Context query_context = connection_context; - executeQuery(should_replace ? replacement : payload, *out, true, query_context, set_content_type, nullptr); + executeQuery(should_replace ? replacement : payload, *out, true, query_context, set_content_type_and_format, {}); if (!with_output) packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 3ff943d519e..0cd357bbd94 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -77,6 +77,31 @@ namespace CurrentMetrics extern const Metric VersionInteger; } +namespace +{ + +void setupTmpPath(Logger * log, const std::string & path) +{ + LOG_DEBUG(log, "Setting up " << path << " to store temporary data in it"); + + Poco::File(path).createDirectories(); + + /// Clearing old temporary files. + Poco::DirectoryIterator dir_end; + for (Poco::DirectoryIterator it(path); it != dir_end; ++it) + { + if (it->isFile() && startsWith(it.name(), "tmp")) + { + LOG_DEBUG(log, "Removing old temporary file " << it->path()); + it->remove(); + } + else + LOG_DEBUG(log, "Skipped file in temporary path " << it->path()); + } +} + +} + namespace DB { @@ -331,22 +356,14 @@ int Server::main(const std::vector & /*args*/) DateLUT::instance(); LOG_TRACE(log, "Initialized DateLUT with time zone '" << DateLUT::instance().getTimeZone() << "'."); - /// Directory with temporary data for processing of heavy queries. + + /// Storage with temporary data for processing of heavy queries. { std::string tmp_path = config().getString("tmp_path", path + "tmp/"); - global_context->setTemporaryPath(tmp_path); - Poco::File(tmp_path).createDirectories(); - - /// Clearing old temporary files. - Poco::DirectoryIterator dir_end; - for (Poco::DirectoryIterator it(tmp_path); it != dir_end; ++it) - { - if (it->isFile() && startsWith(it.name(), "tmp")) - { - LOG_DEBUG(log, "Removing old temporary file " << it->path()); - it->remove(); - } - } + std::string tmp_policy = config().getString("tmp_policy", ""); + const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy); + for (const DiskPtr & disk : volume->disks) + setupTmpPath(log, disk->getPath()); } /** Directory with 'flags': files indicating temporary settings for the server set by system administrator. @@ -864,7 +881,11 @@ int Server::main(const std::vector & /*args*/) for (auto & server : servers) server->start(); - setTextLog(global_context->getTextLog()); + { + String level_str = config().getString("text_log.level", ""); + int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str); + setTextLog(global_context->getTextLog(), level); + } buildLoggers(config(), logger()); main_config_reloader->start(); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 29bba1cca5e..1975349fcf1 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -591,11 +591,9 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads) } }); - /// Wait in case of exception. Delete pipeline to release memory. + /// Wait in case of exception happened outside of pool. SCOPE_EXIT( - /// Clear queue in case if somebody is waiting lazy_format to push. lazy_format->finish(); - lazy_format->clearQueue(); try { @@ -604,72 +602,58 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads) catch (...) { /// If exception was thrown during pipeline execution, skip it while processing other exception. + tryLogCurrentException(log); } - - /// pipeline = QueryPipeline() ); - while (true) + while (!lazy_format->isFinished() && !exception) { - Block block; - - while (true) + if (isQueryCancelled()) { - if (isQueryCancelled()) - { - /// A packet was received requesting to stop execution of the request. - executor->cancel(); - - break; - } - else - { - if (after_send_progress.elapsed() / 1000 >= query_context->getSettingsRef().interactive_delay) - { - /// Some time passed and there is a progress. - after_send_progress.restart(); - sendProgress(); - } - - sendLogs(); - - if ((block = lazy_format->getBlock(query_context->getSettingsRef().interactive_delay / 1000))) - break; - - if (lazy_format->isFinished()) - break; - - if (exception) - { - pool.wait(); - break; - } - } - } - - /** If data has run out, we will send the profiling data and total values to - * the last zero block to be able to use - * this information in the suffix output of stream. - * If the request was interrupted, then `sendTotals` and other methods could not be called, - * because we have not read all the data yet, - * and there could be ongoing calculations in other threads at the same time. - */ - if (!block && !isQueryCancelled()) - { - pool.wait(); - pipeline.finalize(); - - sendTotals(lazy_format->getTotals()); - sendExtremes(lazy_format->getExtremes()); - sendProfileInfo(lazy_format->getProfileInfo()); - sendProgress(); - sendLogs(); - } - - sendData(block); - if (!block) + /// A packet was received requesting to stop execution of the request. + executor->cancel(); break; + } + + if (after_send_progress.elapsed() / 1000 >= query_context->getSettingsRef().interactive_delay) + { + /// Some time passed and there is a progress. + after_send_progress.restart(); + sendProgress(); + } + + sendLogs(); + + if (auto block = lazy_format->getBlock(query_context->getSettingsRef().interactive_delay / 1000)) + { + if (!state.io.null_format) + sendData(block); + } } + + /// Finish lazy_format before waiting. Otherwise some thread may write into it, and waiting will lock. + lazy_format->finish(); + pool.wait(); + + /** If data has run out, we will send the profiling data and total values to + * the last zero block to be able to use + * this information in the suffix output of stream. + * If the request was interrupted, then `sendTotals` and other methods could not be called, + * because we have not read all the data yet, + * and there could be ongoing calculations in other threads at the same time. + */ + if (!isQueryCancelled()) + { + pipeline.finalize(); + + sendTotals(lazy_format->getTotals()); + sendExtremes(lazy_format->getExtremes()); + sendProfileInfo(lazy_format->getProfileInfo()); + sendProgress(); + sendLogs(); + } + + sendData({}); } state.io.onFinish(); diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml index c1479eaa528..1ed9c75e5ec 100644 --- a/dbms/programs/server/config.xml +++ b/dbms/programs/server/config.xml @@ -3,27 +3,27 @@ NOTE: User and query level settings are set up in "users.xml" file. --> - - - + + + - - + + - + trace /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log @@ -34,6 +34,7 @@ 8123 9000 + 9004 /var/lib/clickhouse/tmp/ + + + /var/lib/clickhouse/user_files/ @@ -343,6 +355,11 @@ toStartOfHour(event_time) --> toYYYYMM(event_date) + + + 7500 @@ -377,10 +394,12 @@ diff --git a/dbms/programs/server/users.xml b/dbms/programs/server/users.xml index 87e6c406b0a..d631fbb0f8a 100644 --- a/dbms/programs/server/users.xml +++ b/dbms/programs/server/users.xml @@ -49,7 +49,7 @@ In first line will be password and in second - corresponding SHA256. How to generate double SHA1: - Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | openssl dgst -sha1 -binary | openssl dgst -sha1 + Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-' In first line will be password and in second - corresponding double SHA1. --> diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 61fd28f2a70..47240db8b0d 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -309,7 +309,7 @@ protected: /// Uses a DFA based approach in order to better handle patterns without /// time assertions. /// - /// NOTE: This implementation relies on the assumption that the pattern are *small*. + /// NOTE: This implementation relies on the assumption that the pattern is *small*. /// /// This algorithm performs in O(mn) (with m the number of DFA states and N the number /// of events) with a memory consumption and memory allocations in O(m). It means that diff --git a/dbms/src/Common/Allocator.h b/dbms/src/Common/Allocator.h index 5d39d327243..12116f6d8d3 100644 --- a/dbms/src/Common/Allocator.h +++ b/dbms/src/Common/Allocator.h @@ -50,16 +50,21 @@ * * P.S. This is also required, because tcmalloc can not allocate a chunk of * memory greater than 16 GB. + * + * P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows + * to override it during linkage when using ClickHouse as a library in + * third-party applications which may already use own allocator doing mmaps + * in the implementation of alloc/realloc. */ #ifdef NDEBUG - static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20); + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 20); #else /** * In debug build, use small mmap threshold to reproduce more memory * stomping bugs. Along with ASLR it will hopefully detect more issues than * ASan. The program may fail due to the limit on number of memory mappings. */ - static constexpr size_t MMAP_THRESHOLD = 4096; + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 4096; #endif static constexpr size_t MMAP_MIN_ALIGNMENT = 4096; diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index adf40ed2951..fe5bca9f55a 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -478,6 +478,7 @@ namespace ErrorCodes extern const int FILE_ALREADY_EXISTS = 504; extern const int CANNOT_DELETE_DIRECTORY = 505; extern const int UNEXPECTED_ERROR_CODE = 506; + extern const int UNABLE_TO_SKIP_UNUSED_SHARDS = 507; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 25da9674e4d..318da1a27f2 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -195,7 +195,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded << ", e.displayText() = " << e.displayText() << (with_stacktrace ? getExceptionStackTraceString(e) : "") << (with_extra_info ? getExtraExceptionInfo(e) : "") - << " (version " << VERSION_STRING << VERSION_OFFICIAL; + << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; } catch (...) {} } diff --git a/dbms/src/Common/RemoteHostFilter.cpp b/dbms/src/Common/RemoteHostFilter.cpp index 16aaac35dbe..4c4aa3bca81 100644 --- a/dbms/src/Common/RemoteHostFilter.cpp +++ b/dbms/src/Common/RemoteHostFilter.cpp @@ -1,12 +1,13 @@ #include -#include #include -#include #include +#include +#include #include #include #include + namespace DB { namespace ErrorCodes diff --git a/dbms/src/Common/RemoteHostFilter.h b/dbms/src/Common/RemoteHostFilter.h index 86743891051..48d9b2bda7c 100644 --- a/dbms/src/Common/RemoteHostFilter.h +++ b/dbms/src/Common/RemoteHostFilter.h @@ -1,17 +1,19 @@ #pragma once +#include #include #include -#include -#include +namespace Poco { class URI; } +namespace Poco { namespace Util { class AbstractConfiguration; } } + namespace DB { class RemoteHostFilter { /** - * This class checks if url is allowed. + * This class checks if URL is allowed. * If primary_hosts and regexp_hosts are empty all urls are allowed. */ public: @@ -25,6 +27,7 @@ private: std::unordered_set primary_hosts; /// Allowed primary () URL from config.xml std::vector regexp_hosts; /// Allowed regexp () URL from config.xml - bool checkForDirectEntry(const std::string & str) const; /// Checks if the primary_hosts and regexp_hosts contain str. If primary_hosts and regexp_hosts are empty return true. + /// Checks if the primary_hosts and regexp_hosts contain str. If primary_hosts and regexp_hosts are empty return true. + bool checkForDirectEntry(const std::string & str) const; }; } diff --git a/dbms/src/Common/checkStackSize.cpp b/dbms/src/Common/checkStackSize.cpp index 16074e74281..10e93a8356c 100644 --- a/dbms/src/Common/checkStackSize.cpp +++ b/dbms/src/Common/checkStackSize.cpp @@ -23,7 +23,14 @@ namespace DB static thread_local void * stack_address = nullptr; static thread_local size_t max_stack_size = 0; -void checkStackSize() +/** It works fine when interpreters are instantiated by ClickHouse code in properly prepared threads, + * but there are cases when ClickHouse runs as a library inside another application. + * If application is using user-space lightweight threads with manually allocated stacks, + * current implementation is not reasonable, as it has no way to properly check the remaining + * stack size without knowing the details of how stacks are allocated. + * We mark this function as weak symbol to be able to replace it in another ClickHouse-based products. + */ +__attribute__((__weak__)) void checkStackSize() { using namespace DB; diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 97587078128..fa3d0b8bd0e 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -53,6 +53,7 @@ struct Settings : public SettingsCollection M(SettingUInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(SettingUInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ M(SettingUInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algo support it). 0 means unlimited.", 0) \ + M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. By default, it is determined automatically.", 0) \ M(SettingMaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ M(SettingMaxThreads, max_alter_threads, 0, "The maximum number of threads to execute the ALTER requests. By default, it is determined automatically.", 0) \ M(SettingUInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ @@ -111,6 +112,7 @@ struct Settings : public SettingsCollection \ M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \ M(SettingBool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \ + M(SettingUInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \ \ M(SettingBool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \ M(SettingUInt64, min_chunk_bytes_for_parallel_parsing, (1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \ @@ -187,6 +189,7 @@ struct Settings : public SettingsCollection M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \ M(SettingBool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \ M(SettingBool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ + M(SettingString, input_format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ \ M(SettingBool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ \ @@ -198,6 +201,8 @@ struct Settings : public SettingsCollection M(SettingUInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \ M(SettingBool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \ M(SettingUInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \ + M(SettingString, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ + M(SettingUInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ \ M(SettingBool, use_client_time_zone, false, "Use client timezone for interpreting DateTime string values, instead of adopting server timezone.", 0) \ \ @@ -360,7 +365,7 @@ struct Settings : public SettingsCollection M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.", 0) \ M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql' and 'odbc' table functions.", 0) \ \ - M(SettingBool, experimental_use_processors, false, "Use processors pipeline.", 0) \ + M(SettingBool, experimental_use_processors, true, "Use processors pipeline.", 0) \ \ M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.", 0) \ M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \ diff --git a/dbms/src/Core/SettingsCollection.cpp b/dbms/src/Core/SettingsCollection.cpp index 60c18a19fa7..9e44e327fcc 100644 --- a/dbms/src/Core/SettingsCollection.cpp +++ b/dbms/src/Core/SettingsCollection.cpp @@ -62,7 +62,7 @@ void SettingNumber::set(const Field & x) template void SettingNumber::set(const String & x) { - set(completeParse(x)); + set(parseWithSizeSuffix(x)); } template <> diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index ea80ab7d427..4f350ba00d5 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -31,7 +31,6 @@ enum class TypeIndex Float64, Date, DateTime, - DateTime32 = DateTime, DateTime64, String, FixedString, @@ -158,8 +157,6 @@ using Decimal32 = Decimal; using Decimal64 = Decimal; using Decimal128 = Decimal; -// TODO (nemkov): consider making a strong typedef -//using DateTime32 = time_t; using DateTime64 = Decimal64; template <> struct TypeName { static const char * get() { return "Decimal32"; } }; diff --git a/dbms/src/Core/config_core.h.in b/dbms/src/Core/config_core.h.in index fdbd69decd3..2365340cf33 100644 --- a/dbms/src/Core/config_core.h.in +++ b/dbms/src/Core/config_core.h.in @@ -10,5 +10,6 @@ #cmakedefine01 USE_POCO_DATAODBC #cmakedefine01 USE_POCO_MONGODB #cmakedefine01 USE_POCO_REDIS +#cmakedefine01 USE_POCO_JSON #cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_SSL diff --git a/dbms/src/DataStreams/BlockIO.h b/dbms/src/DataStreams/BlockIO.h index 50af2922306..c043e297d1f 100644 --- a/dbms/src/DataStreams/BlockIO.h +++ b/dbms/src/DataStreams/BlockIO.h @@ -66,6 +66,8 @@ struct BlockIO finish_callback = rhs.finish_callback; exception_callback = rhs.exception_callback; + null_format = rhs.null_format; + return *this; } }; diff --git a/dbms/src/DataStreams/IBlockStream_fwd.h b/dbms/src/DataStreams/IBlockStream_fwd.h index 06319a55262..d74a9528ed9 100644 --- a/dbms/src/DataStreams/IBlockStream_fwd.h +++ b/dbms/src/DataStreams/IBlockStream_fwd.h @@ -12,5 +12,6 @@ class IBlockOutputStream; using BlockInputStreamPtr = std::shared_ptr; using BlockInputStreams = std::vector; using BlockOutputStreamPtr = std::shared_ptr; +using BlockOutputStreams = std::vector; } diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index 52f85f1349c..21422d0fe54 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace ProfileEvents @@ -21,10 +22,10 @@ namespace DB MergeSortingBlockInputStream::MergeSortingBlockInputStream( const BlockInputStreamPtr & input, SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_, size_t max_bytes_before_remerge_, - size_t max_bytes_before_external_sort_, const std::string & tmp_path_, size_t min_free_disk_space_) + size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_, size_t min_free_disk_space_) : description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_), max_bytes_before_remerge(max_bytes_before_remerge_), - max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_path(tmp_path_), + max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_), min_free_disk_space(min_free_disk_space_) { children.push_back(input); @@ -78,10 +79,14 @@ Block MergeSortingBlockInputStream::readImpl() */ if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort) { - if (!enoughSpaceInDirectory(tmp_path, sum_bytes_in_blocks + min_free_disk_space)) - throw Exception("Not enough space for external sort in " + tmp_path, ErrorCodes::NOT_ENOUGH_SPACE); + size_t size = sum_bytes_in_blocks + min_free_disk_space; + auto reservation = tmp_volume->reserve(size); + if (!reservation) + throw Exception("Not enough space for external sort in temporary storage", ErrorCodes::NOT_ENOUGH_SPACE); + const std::string tmp_path(reservation->getDisk()->getPath()); temporary_files.emplace_back(createTemporaryFile(tmp_path)); + const std::string & path = temporary_files.back()->path(); MergeSortingBlocksBlockInputStream block_in(blocks, description, max_merged_block_size, limit); diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h index ce82f6bb120..5b157310765 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h @@ -18,6 +18,9 @@ namespace DB struct TemporaryFileStream; +class Volume; +using VolumePtr = std::shared_ptr; + namespace ErrorCodes { extern const int NOT_ENOUGH_SPACE; @@ -77,7 +80,7 @@ public: MergeSortingBlockInputStream(const BlockInputStreamPtr & input, SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_, size_t max_bytes_before_remerge_, - size_t max_bytes_before_external_sort_, const std::string & tmp_path_, + size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_, size_t min_free_disk_space_); String getName() const override { return "MergeSorting"; } @@ -97,7 +100,7 @@ private: size_t max_bytes_before_remerge; size_t max_bytes_before_external_sort; - const std::string tmp_path; + VolumePtr tmp_volume; size_t min_free_disk_space; Logger * log = &Logger::get("MergeSortingBlockInputStream"); diff --git a/dbms/src/DataStreams/NullAndDoCopyBlockInputStream.h b/dbms/src/DataStreams/NullAndDoCopyBlockInputStream.h index 8ef64cc5e05..8fe05c387a3 100644 --- a/dbms/src/DataStreams/NullAndDoCopyBlockInputStream.h +++ b/dbms/src/DataStreams/NullAndDoCopyBlockInputStream.h @@ -21,9 +21,19 @@ class NullAndDoCopyBlockInputStream : public IBlockInputStream { public: NullAndDoCopyBlockInputStream(const BlockInputStreamPtr & input_, BlockOutputStreamPtr output_) - : input(input_), output(output_) { - children.push_back(input_); + input_streams.push_back(input_); + output_streams.push_back(output_); + + for (auto & input_stream : input_streams) + children.push_back(input_stream); + } + + NullAndDoCopyBlockInputStream(const BlockInputStreams & input_, BlockOutputStreams & output_) + : input_streams(input_), output_streams(output_) + { + for (auto & input_stream : input_) + children.push_back(input_stream); } /// Suppress readPrefix and readSuffix, because they are called by copyData. @@ -39,13 +49,20 @@ public: protected: Block readImpl() override { - copyData(*input, *output); + /// We do not use cancel flag here. + /// If query was cancelled, it will be processed by child streams. + /// Part of the data will be processed. + + if (input_streams.size() == 1 && output_streams.size() == 1) + copyData(*input_streams.at(0), *output_streams.at(0)); + else + copyData(input_streams, output_streams); return Block(); } private: - BlockInputStreamPtr input; - BlockOutputStreamPtr output; + BlockInputStreams input_streams; + BlockOutputStreams output_streams; }; } diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 0adc7b43ed1..90e2d95ad15 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -56,9 +56,24 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( StoragePtr inner_table = materialized_view->getTargetTable(); auto inner_table_id = inner_table->getStorageID(); query = materialized_view->getInnerQuery(); + std::unique_ptr insert = std::make_unique(); insert->database = inner_table_id.database_name; insert->table = inner_table_id.table_name; + + /// Get list of columns we get from select query. + auto header = InterpreterSelectQuery(query, *views_context, SelectQueryOptions().analyze()) + .getSampleBlock(); + + /// Insert only columns returned by select. + auto list = std::make_shared(); + for (auto & column : header) + /// But skip columns which storage doesn't have. + if (inner_table->hasColumn(column.name)) + list->children.emplace_back(std::make_shared(column.name)); + + insert->columns = std::move(list); + ASTPtr insert_query_ptr(insert.release()); InterpreterInsertQuery interpreter(insert_query_ptr, *views_context); BlockIO io = interpreter.execute(); diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp index 339f81321e4..c08abba3bdf 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.cpp +++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp @@ -70,7 +70,7 @@ bool TTLBlockInputStream::isTTLExpired(time_t ttl) Block TTLBlockInputStream::readImpl() { /// Skip all data if table ttl is expired for part - if (storage.hasTableTTL() && isTTLExpired(old_ttl_infos.table_ttl.max)) + if (storage.hasRowsTTL() && isTTLExpired(old_ttl_infos.table_ttl.max)) { rows_removed = data_part->rows_count; return {}; @@ -80,7 +80,7 @@ Block TTLBlockInputStream::readImpl() if (!block) return block; - if (storage.hasTableTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min))) + if (storage.hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min))) removeRowsWithExpiredTableTTL(block); removeValuesWithExpiredColumnTTL(block); @@ -106,10 +106,10 @@ void TTLBlockInputStream::readSuffixImpl() void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) { - storage.ttl_table_entry.expression->execute(block); + storage.rows_ttl_entry.expression->execute(block); const IColumn * ttl_column = - block.getByName(storage.ttl_table_entry.result_column).column.get(); + block.getByName(storage.rows_ttl_entry.result_column).column.get(); const auto & column_names = header.getNames(); MutableColumns result_columns; diff --git a/dbms/src/DataStreams/copyData.cpp b/dbms/src/DataStreams/copyData.cpp index 9d17596fc8d..fd4bfab28d8 100644 --- a/dbms/src/DataStreams/copyData.cpp +++ b/dbms/src/DataStreams/copyData.cpp @@ -1,6 +1,10 @@ +#include #include #include #include +#include +#include +#include namespace DB @@ -51,6 +55,79 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCall inline void doNothing(const Block &) {} +namespace +{ + + +struct ParallelInsertsHandler +{ + using CencellationHook = std::function; + + explicit ParallelInsertsHandler(BlockOutputStreams & output_streams, CencellationHook cancellation_hook_, size_t num_threads) + : outputs(output_streams.size()), cancellation_hook(std::move(cancellation_hook_)) + { + exceptions.resize(num_threads); + + for (auto & output : output_streams) + outputs.push(output.get()); + } + + void onBlock(Block & block, size_t /*thread_num*/) + { + IBlockOutputStream * out = nullptr; + + outputs.pop(out); + out->write(block); + outputs.push(out); + } + + void onFinishThread(size_t /*thread_num*/) {} + void onFinish() {} + + void onException(std::exception_ptr & exception, size_t thread_num) + { + exceptions[thread_num] = exception; + cancellation_hook(); + } + + void rethrowFirstException() + { + for (auto & exception : exceptions) + if (exception) + std::rethrow_exception(exception); + } + + ConcurrentBoundedQueue outputs; + std::vector exceptions; + CencellationHook cancellation_hook; +}; + +} + +static void copyDataImpl(BlockInputStreams & inputs, BlockOutputStreams & outputs) +{ + for (auto & output : outputs) + output->writePrefix(); + + using Processor = ParallelInputsProcessor; + Processor * processor_ptr = nullptr; + + ParallelInsertsHandler handler(outputs, [&processor_ptr]() { processor_ptr->cancel(false); }, inputs.size()); + ParallelInputsProcessor processor(inputs, nullptr, inputs.size(), handler); + processor_ptr = &processor; + + processor.process(); + processor.wait(); + handler.rethrowFirstException(); + + /// readPrefix is called in ParallelInputsProcessor. + for (auto & input : inputs) + input->readSuffix(); + + for (auto & output : outputs) + output->writeSuffix(); +} + void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic * is_cancelled) { auto is_cancelled_pred = [is_cancelled] () @@ -61,6 +138,10 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic & is_cancelled) { diff --git a/dbms/src/DataStreams/copyData.h b/dbms/src/DataStreams/copyData.h index f2bce8f411b..ae72dbd2421 100644 --- a/dbms/src/DataStreams/copyData.h +++ b/dbms/src/DataStreams/copyData.h @@ -16,6 +16,8 @@ class Block; */ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic * is_cancelled = nullptr); +void copyData(BlockInputStreams & inputs, BlockOutputStreams & outputs); + void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled); void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled, diff --git a/dbms/src/Disks/DiskSpaceMonitor.cpp b/dbms/src/Disks/DiskSpaceMonitor.cpp index 59b8c21119a..6cc6d7e04db 100644 --- a/dbms/src/Disks/DiskSpaceMonitor.cpp +++ b/dbms/src/Disks/DiskSpaceMonitor.cpp @@ -111,6 +111,12 @@ Volume::Volume( << " < " << formatReadableSizeWithBinarySuffix(MIN_PART_SIZE) << ")"); } +DiskPtr Volume::getNextDisk() +{ + size_t start_from = last_used.fetch_add(1u, std::memory_order_relaxed); + size_t index = start_from % disks.size(); + return disks[index]; +} ReservationPtr Volume::reserve(UInt64 expected_size) { diff --git a/dbms/src/Disks/DiskSpaceMonitor.h b/dbms/src/Disks/DiskSpaceMonitor.h index 3d2216b545b..cb00944e149 100644 --- a/dbms/src/Disks/DiskSpaceMonitor.h +++ b/dbms/src/Disks/DiskSpaceMonitor.h @@ -67,6 +67,13 @@ public: const String & config_prefix, const DiskSelector & disk_selector); + /// Next disk (round-robin) + /// + /// - Used with policy for temporary data + /// - Ignores all limitations + /// - Shares last access with reserve() + DiskPtr getNextDisk(); + /// Uses Round-robin to choose disk for reservation. /// Returns valid reservation or nullptr if there is no space left on any disk. ReservationPtr reserve(UInt64 bytes) override; diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index 240e591123f..f812b56aa5d 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -68,6 +68,7 @@ static FormatSettings getInputFormatSetting(const Settings & settings, const Con format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter; format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter; format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter; + format_settings.avro.schema_registry_url = settings.input_format_avro_schema_registry_url; return format_settings; } @@ -99,6 +100,8 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter; format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter; format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter; + format_settings.avro.output_codec = settings.output_format_avro_codec; + format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval; return format_settings; } @@ -325,6 +328,8 @@ FormatFactory::FormatFactory() registerInputFormatProcessorORC(*this); registerInputFormatProcessorParquet(*this); registerOutputFormatProcessorParquet(*this); + registerInputFormatProcessorAvro(*this); + registerOutputFormatProcessorAvro(*this); registerInputFormatProcessorTemplate(*this); registerOutputFormatProcessorTemplate(*this); diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h index cbf64afeaec..345ceaee690 100644 --- a/dbms/src/Formats/FormatFactory.h +++ b/dbms/src/Formats/FormatFactory.h @@ -166,6 +166,8 @@ void registerInputFormatProcessorORC(FormatFactory & factory); void registerOutputFormatProcessorParquet(FormatFactory & factory); void registerInputFormatProcessorProtobuf(FormatFactory & factory); void registerOutputFormatProcessorProtobuf(FormatFactory & factory); +void registerInputFormatProcessorAvro(FormatFactory & factory); +void registerOutputFormatProcessorAvro(FormatFactory & factory); void registerInputFormatProcessorTemplate(FormatFactory & factory); void registerOutputFormatProcessorTemplate(FormatFactory &factory); diff --git a/dbms/src/Formats/FormatSettings.h b/dbms/src/Formats/FormatSettings.h index 6219edf6e6d..cc6f7f4dbb3 100644 --- a/dbms/src/Formats/FormatSettings.h +++ b/dbms/src/Formats/FormatSettings.h @@ -110,6 +110,16 @@ struct FormatSettings }; Custom custom; + + struct Avro + { + String schema_registry_url; + String output_codec; + UInt64 output_sync_interval = 16 * 1024; + }; + + Avro avro; + }; } diff --git a/dbms/src/Formats/ParsedTemplateFormatString.cpp b/dbms/src/Formats/ParsedTemplateFormatString.cpp index 981d43089a2..af6fc39c8fd 100644 --- a/dbms/src/Formats/ParsedTemplateFormatString.cpp +++ b/dbms/src/Formats/ParsedTemplateFormatString.cpp @@ -16,11 +16,11 @@ namespace ErrorCodes ParsedTemplateFormatString::ParsedTemplateFormatString(const FormatSchemaInfo & schema, const ColumnIdxGetter & idx_by_name) { + ReadBufferFromFile schema_file(schema.absoluteSchemaPath(), 4096); + String format_string; + readStringUntilEOF(format_string, schema_file); try { - ReadBufferFromFile schema_file(schema.absoluteSchemaPath(), 4096); - String format_string; - readStringUntilEOF(format_string, schema_file); parse(format_string, idx_by_name); } catch (DB::Exception & e) @@ -193,7 +193,7 @@ const char * ParsedTemplateFormatString::readMayBeQuotedColumnNameInto(const cha String ParsedTemplateFormatString::dump() const { WriteBufferFromOwnString res; - res << "Delimiter " << 0 << ": "; + res << "\nDelimiter " << 0 << ": "; verbosePrintString(delimiters.front().c_str(), delimiters.front().c_str() + delimiters.front().size(), res); size_t num_columns = std::max(formats.size(), format_idx_to_column_idx.size()); diff --git a/dbms/src/Formats/config_formats.h.in b/dbms/src/Formats/config_formats.h.in index 1ddd0e18aa9..308ded92b5d 100644 --- a/dbms/src/Formats/config_formats.h.in +++ b/dbms/src/Formats/config_formats.h.in @@ -2,6 +2,7 @@ // .h autogenerated by cmake! +#cmakedefine01 USE_AVRO #cmakedefine01 USE_CAPNP #cmakedefine01 USE_SNAPPY #cmakedefine01 USE_PARQUET diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index fc8e444330c..4bf33ac93ba 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -746,6 +746,23 @@ inline void readBinary(Decimal128 & x, ReadBuffer & buf) { readPODBinary(x, buf) inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); } +template +inline std::enable_if_t && (sizeof(T) <= 8), void> +readBinaryBigEndian(T & x, ReadBuffer & buf) /// Assuming little endian architecture. +{ + readPODBinary(x, buf); + + if constexpr (sizeof(x) == 1) + return; + else if constexpr (sizeof(x) == 2) + x = __builtin_bswap16(x); + else if constexpr (sizeof(x) == 4) + x = __builtin_bswap32(x); + else if constexpr (sizeof(x) == 8) + x = __builtin_bswap64(x); +} + + /// Generic methods to read value in text tab-separated format. template inline std::enable_if_t, void> @@ -955,28 +972,78 @@ inline T parse(const char * data, size_t size) return res; } -/// Read something from text format, but expect complete parse of given text -/// For example: 723145 -- ok, 213MB -- not ok template -inline T completeParse(const char * data, size_t size) +inline std::enable_if_t, void> +readTextWithSizeSuffix(T & x, ReadBuffer & buf) { readText(x, buf); } + +template +inline std::enable_if_t, void> +readTextWithSizeSuffix(T & x, ReadBuffer & buf) +{ + readIntText(x, buf); + if (buf.eof()) + return; + + /// Updates x depending on the suffix + auto finish = [&buf, &x] (UInt64 base, int power_of_two) mutable + { + ++buf.position(); + if (buf.eof()) + { + x *= base; /// For decimal suffixes, such as k, M, G etc. + } + else if (*buf.position() == 'i') + { + x = (x << power_of_two); /// For binary suffixes, such as ki, Mi, Gi, etc. + ++buf.position(); + } + return; + }; + + switch (*buf.position()) + { + case 'k': [[fallthrough]]; + case 'K': + finish(1000, 10); + break; + case 'M': + finish(1000000, 20); + break; + case 'G': + finish(1000000000, 30); + break; + case 'T': + finish(1000000000000ULL, 40); + break; + default: + return; + } + return; +} + +/// Read something from text format and trying to parse the suffix. +/// If the suffix is not valid gives an error +/// For example: 723145 -- ok, 213MB -- not ok, but 213Mi -- ok +template +inline T parseWithSizeSuffix(const char * data, size_t size) { T res; ReadBufferFromMemory buf(data, size); - readText(res, buf); + readTextWithSizeSuffix(res, buf); assertEOF(buf); return res; } template -inline T completeParse(const String & s) +inline T parseWithSizeSuffix(const String & s) { - return completeParse(s.data(), s.size()); + return parseWithSizeSuffix(s.data(), s.size()); } template -inline T completeParse(const char * data) +inline T parseWithSizeSuffix(const char * data) { - return completeParse(data, strlen(data)); + return parseWithSizeSuffix(data, strlen(data)); } template diff --git a/dbms/src/IO/readDecimalText.h b/dbms/src/IO/readDecimalText.h index 4dad5fdb557..5b2a3f76481 100644 --- a/dbms/src/IO/readDecimalText.h +++ b/dbms/src/IO/readDecimalText.h @@ -101,7 +101,13 @@ inline bool readDigits(ReadBuffer & buf, T & x, unsigned int & digits, int & exp { ++buf.position(); Int32 addition_exp = 0; - readIntText(addition_exp, buf); + if (!tryReadIntText(addition_exp, buf)) + { + if constexpr (_throw_on_error) + throw Exception("Cannot parse exponent while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER); + else + return false; + } exponent += addition_exp; stop = true; continue; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 39b5175722c..8118f2d1c54 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace ProfileEvents @@ -681,22 +682,25 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData && current_memory_usage > static_cast(params.max_bytes_before_external_group_by) && worth_convert_to_two_level) { - if (!enoughSpaceInDirectory(params.tmp_path, current_memory_usage + params.min_free_disk_space)) - throw Exception("Not enough space for external aggregation in " + params.tmp_path, ErrorCodes::NOT_ENOUGH_SPACE); + size_t size = current_memory_usage + params.min_free_disk_space; + auto reservation = params.tmp_volume->reserve(size); + if (!reservation) + throw Exception("Not enough space for external aggregation in temporary storage", ErrorCodes::NOT_ENOUGH_SPACE); - writeToTemporaryFile(result); + const std::string tmp_path(reservation->getDisk()->getPath()); + writeToTemporaryFile(result, tmp_path); } return true; } -void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants) +void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path) { Stopwatch watch; size_t rows = data_variants.size(); - auto file = createTemporaryFile(params.tmp_path); + auto file = createTemporaryFile(tmp_path); const std::string & path = file->path(); WriteBufferFromFile file_buf(path); CompressedWriteBuffer compressed_buf(file_buf); @@ -753,6 +757,10 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants) << (uncompressed_bytes / elapsed_seconds / 1048576.0) << " MiB/sec. uncompressed, " << (compressed_bytes / elapsed_seconds / 1048576.0) << " MiB/sec. compressed)"); } +void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants) +{ + return writeToTemporaryFile(data_variants, params.tmp_volume->getNextDisk()->getPath()); +} template diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index ce2872714cb..cdb1b96f4e8 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -46,6 +46,8 @@ namespace ErrorCodes class IBlockOutputStream; +class Volume; +using VolumePtr = std::shared_ptr; /** Different data structures that can be used for aggregation * For efficiency, the aggregation data itself is put into the pool. @@ -860,7 +862,7 @@ public: /// Return empty result when aggregating without keys on empty set. bool empty_result_for_aggregation_by_empty_set; - const std::string tmp_path; + VolumePtr tmp_volume; /// Settings is used to determine cache size. No threads are created. size_t max_threads; @@ -873,7 +875,7 @@ public: size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_, size_t max_bytes_before_external_group_by_, bool empty_result_for_aggregation_by_empty_set_, - const std::string & tmp_path_, size_t max_threads_, + VolumePtr tmp_volume_, size_t max_threads_, size_t min_free_disk_space_) : src_header(src_header_), keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()), @@ -881,7 +883,7 @@ public: group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_), max_bytes_before_external_group_by(max_bytes_before_external_group_by_), empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_), - tmp_path(tmp_path_), max_threads(max_threads_), + tmp_volume(tmp_volume_), max_threads(max_threads_), min_free_disk_space(min_free_disk_space_) { } @@ -889,7 +891,7 @@ public: /// Only parameters that matter during merge. Params(const Block & intermediate_header_, const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_) - : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, "", max_threads_, 0) + : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0) { intermediate_header = intermediate_header_; } @@ -955,6 +957,7 @@ public: void setCancellationHook(const CancellationHook cancellation_hook); /// For external aggregation. + void writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path); void writeToTemporaryFile(AggregatedDataVariants & data_variants); bool hasTemporaryFiles() const { return !temporary_files.empty(); } diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index 35365e4b1b1..c6c89b565f1 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes extern const int PARAMETER_OUT_OF_BOUND; } -AnalyzedJoin::AnalyzedJoin(const Settings & settings, const String & tmp_path_) +AnalyzedJoin::AnalyzedJoin(const Settings & settings, VolumePtr tmp_volume_) : size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode}) , default_max_bytes(settings.default_max_bytes_in_join) , join_use_nulls(settings.join_use_nulls) @@ -27,7 +27,7 @@ AnalyzedJoin::AnalyzedJoin(const Settings & settings, const String & tmp_path_) , partial_merge_join(settings.partial_merge_join) , partial_merge_join_optimizations(settings.partial_merge_join_optimizations) , partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks) - , tmp_path(tmp_path_) + , tmp_volume(tmp_volume_) {} void AnalyzedJoin::addUsingKey(const ASTPtr & ast) diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 4832f968695..4a83fe77fc9 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -21,6 +21,9 @@ class Block; struct Settings; +class Volume; +using VolumePtr = std::shared_ptr; + class AnalyzedJoin { /** Query of the form `SELECT expr(x) AS k FROM t1 ANY LEFT JOIN (SELECT expr(x) AS k FROM t2) USING k` @@ -62,10 +65,10 @@ class AnalyzedJoin /// Original name -> name. Only ranamed columns. std::unordered_map renames; - String tmp_path; + VolumePtr tmp_volume; public: - AnalyzedJoin(const Settings &, const String & tmp_path); + AnalyzedJoin(const Settings &, VolumePtr tmp_volume); /// for StorageJoin AnalyzedJoin(SizeLimits limits, bool use_nulls, ASTTableJoin::Kind kind, ASTTableJoin::Strictness strictness, @@ -82,7 +85,7 @@ public: ASTTableJoin::Kind kind() const { return table_join.kind; } ASTTableJoin::Strictness strictness() const { return table_join.strictness; } const SizeLimits & sizeLimits() const { return size_limits; } - const String & getTemporaryPath() const { return tmp_path; } + VolumePtr getTemporaryVolume() { return tmp_volume; } bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); } bool forceNullableLeft() const { return join_use_nulls && isRightOrFull(table_join.kind); } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 66ce18aa2c4..04d01a24cc1 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,7 @@ namespace ErrorCodes extern const int SCALAR_ALREADY_EXISTS; extern const int UNKNOWN_SCALAR; extern const int NOT_ENOUGH_PRIVILEGES; + extern const int UNKNOWN_POLICY; } @@ -123,12 +125,14 @@ struct ContextShared String interserver_scheme; /// http or https String path; /// Path to the data directory, with a slash at the end. - String tmp_path; /// The path to the temporary files that occur when processing the request. String flags_path; /// Path to the directory with some control flags for server maintenance. String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function. String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries. ConfigurationPtr config; /// Global configuration settings. + String tmp_path; /// Path to the temporary files that occur when processing the request. + mutable VolumePtr tmp_volume; /// Volume for the the temporary files that occur when processing the request. + Databases databases; /// List of databases and tables in them. mutable std::optional embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. mutable std::optional external_dictionaries_loader; @@ -151,9 +155,9 @@ struct ContextShared std::unique_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. mutable std::unique_ptr compression_codec_selector; - /// Storage disk chooser + /// Storage disk chooser for MergeTree engines mutable std::unique_ptr merge_tree_disk_selector; - /// Storage policy chooser + /// Storage policy chooser for MergeTree engines mutable std::unique_ptr merge_tree_storage_policy_selector; std::optional merge_tree_settings; /// Settings of MergeTree* engines. @@ -527,12 +531,6 @@ String Context::getPath() const return shared->path; } -String Context::getTemporaryPath() const -{ - auto lock = getLock(); - return shared->tmp_path; -} - String Context::getFlagsPath() const { auto lock = getLock(); @@ -551,13 +549,19 @@ String Context::getDictionariesLibPath() const return shared->dictionaries_lib_path; } +VolumePtr Context::getTemporaryVolume() const +{ + auto lock = getLock(); + return shared->tmp_volume; +} + void Context::setPath(const String & path) { auto lock = getLock(); shared->path = path; - if (shared->tmp_path.empty()) + if (shared->tmp_path.empty() && !shared->tmp_volume) shared->tmp_path = shared->path + "tmp/"; if (shared->flags_path.empty()) @@ -570,10 +574,31 @@ void Context::setPath(const String & path) shared->dictionaries_lib_path = shared->path + "dictionaries_lib/"; } -void Context::setTemporaryPath(const String & path) +VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name) { auto lock = getLock(); - shared->tmp_path = path; + + if (policy_name.empty()) + { + shared->tmp_path = path; + if (!shared->tmp_path.ends_with('/')) + shared->tmp_path += '/'; + + auto disk = std::make_shared("_tmp_default", shared->tmp_path, 0); + shared->tmp_volume = std::make_shared("_tmp_default", std::vector{disk}, 0); + } + else + { + StoragePolicyPtr tmp_policy = getStoragePolicySelector()[policy_name]; + if (tmp_policy->getVolumes().size() != 1) + throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + shared->tmp_volume = tmp_policy->getVolume(0); + } + + if (!shared->tmp_volume->disks.size()) + throw Exception("No disks volume for temporary files", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + return shared->tmp_volume; } void Context::setFlagsPath(const String & path) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 1af87b527ad..dcce1a4772f 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -91,6 +91,9 @@ class StoragePolicySelector; class IOutputFormat; using OutputFormatPtr = std::shared_ptr; +class Volume; +using VolumePtr = std::shared_ptr; + #if USE_EMBEDDED_COMPILER class CompiledExpressionCache; @@ -195,17 +198,19 @@ public: ~Context(); String getPath() const; - String getTemporaryPath() const; String getFlagsPath() const; String getUserFilesPath() const; String getDictionariesLibPath() const; + VolumePtr getTemporaryVolume() const; + void setPath(const String & path); - void setTemporaryPath(const String & path); void setFlagsPath(const String & path); void setUserFilesPath(const String & path); void setDictionariesLibPath(const String & path); + VolumePtr setTemporaryStorage(const String & path, const String & policy_name = ""); + using ConfigurationPtr = Poco::AutoPtr; /// Global application configuration settings. diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index e9cfe602437..a96f64c110f 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -540,6 +540,7 @@ public: Strings getAllTriedToLoadNames() const { + std::lock_guard lock{mutex}; Strings names; for (auto & [name, info] : infos) if (info.triedToLoad()) diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index b50edc7c412..787b5d3717b 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -96,63 +96,95 @@ Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const BlockIO InterpreterInsertQuery::execute() { + const Settings & settings = context.getSettingsRef(); + const auto & query = query_ptr->as(); checkAccess(query); + BlockIO res; StoragePtr table = getTable(query); auto table_lock = table->lockStructureForShare(true, context.getInitialQueryId()); - /// We create a pipeline of several streams, into which we will write data. - BlockOutputStreamPtr out; - - /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage. - /// Otherwise we'll get duplicates when MV reads same rows again from Kafka. - if (table->noPushingToViews() && !no_destination) - out = table->write(query_ptr, context); - else - out = std::make_shared(table, context, query_ptr, no_destination); - - /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. - /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). - if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash) - { - out = std::make_shared( - out, out->getHeader(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); - } - auto query_sample_block = getSampleBlock(query, table); - - /// Actually we don't know structure of input blocks from query/table, - /// because some clients break insertion protocol (columns != header) - out = std::make_shared( - out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context); - - if (const auto & constraints = table->getConstraints(); !constraints.empty()) - out = std::make_shared(query.table, - out, query_sample_block, table->getConstraints(), context); - - auto out_wrapper = std::make_shared(out); - out_wrapper->setProcessListElement(context.getProcessListElement()); - out = std::move(out_wrapper); - - BlockIO res; - - /// What type of query: INSERT or INSERT SELECT? + BlockInputStreams in_streams; + size_t out_streams_size = 1; if (query.select) { /// Passing 1 as subquery_depth will disable limiting size of intermediate result. InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)}; - /// BlockIO may hold StoragePtrs to temporary tables - res = interpreter_select.execute(); - res.out = nullptr; + if (table->supportsParallelInsert() && settings.max_insert_threads > 0) + { + in_streams = interpreter_select.executeWithMultipleStreams(res.pipeline); + out_streams_size = std::min(size_t(settings.max_insert_threads), in_streams.size()); + } + else + { + res = interpreter_select.execute(); + in_streams.emplace_back(res.in); + res.in = nullptr; + res.out = nullptr; + } + } - res.in = std::make_shared(context, res.in, out->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Position); - res.in = std::make_shared(res.in, out); + BlockOutputStreams out_streams; + auto query_sample_block = getSampleBlock(query, table); + + for (size_t i = 0; i < out_streams_size; i++) + { + /// We create a pipeline of several streams, into which we will write data. + BlockOutputStreamPtr out; + + /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage. + /// Otherwise we'll get duplicates when MV reads same rows again from Kafka. + if (table->noPushingToViews() && !no_destination) + out = table->write(query_ptr, context); + else + out = std::make_shared(table, context, query_ptr, no_destination); + + /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. + /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). + if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash) + { + out = std::make_shared( + out, out->getHeader(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); + } + + /// Actually we don't know structure of input blocks from query/table, + /// because some clients break insertion protocol (columns != header) + out = std::make_shared( + out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context); + + if (const auto & constraints = table->getConstraints(); !constraints.empty()) + out = std::make_shared(query.table, + out, query_sample_block, table->getConstraints(), context); + + auto out_wrapper = std::make_shared(out); + out_wrapper->setProcessListElement(context.getProcessListElement()); + out = std::move(out_wrapper); + out_streams.emplace_back(std::move(out)); + } + + /// What type of query: INSERT or INSERT SELECT? + if (query.select) + { + for (auto & in_stream : in_streams) + { + in_stream = std::make_shared( + context, in_stream, out_streams.at(0)->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Position); + } + + Block in_header = in_streams.at(0)->getHeader(); + if (in_streams.size() > 1) + { + for (size_t i = 1; i < in_streams.size(); ++i) + assertBlocksHaveEqualStructure(in_streams[i]->getHeader(), in_header, "INSERT SELECT"); + } + + res.in = std::make_shared(in_streams, out_streams); if (!allow_materialized) { - Block in_header = res.in->getHeader(); for (const auto & column : table->getColumns()) if (column.default_desc.kind == ColumnDefaultKind::Materialized && in_header.has(column.name)) throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); @@ -160,12 +192,12 @@ BlockIO InterpreterInsertQuery::execute() } else if (query.data && !query.has_tail) /// can execute without additional data { + // res.out = std::move(out_streams.at(0)); res.in = std::make_shared(query_ptr, nullptr, query_sample_block, context, nullptr); - res.in = std::make_shared(res.in, out); + res.in = std::make_shared(res.in, out_streams.at(0)); } else - res.out = std::move(out); - + res.out = std::move(out_streams.at(0)); res.pipeline.addStorageHolder(table); return res; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 0a481a07399..0e47a15bd80 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1871,7 +1871,7 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, - context->getTemporaryPath(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); + context->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); /// If there are several sources, then we perform parallel aggregation if (pipeline.streams.size() > 1) @@ -1937,7 +1937,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, - context->getTemporaryPath(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); + context->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); auto transform_params = std::make_shared(params, final); @@ -2163,7 +2163,7 @@ void InterpreterSelectQuery::executeRollupOrCube(Pipeline & pipeline, Modificato false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, SettingUInt64(0), SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, - context->getTemporaryPath(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); + context->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); if (modificator == Modificator::ROLLUP) pipeline.firstStream() = std::make_shared(pipeline.firstStream(), params); @@ -2192,7 +2192,7 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPipeline & pipeline, Modif false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, SettingUInt64(0), SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, - context->getTemporaryPath(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); + context->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); auto transform_params = std::make_shared(params, true); @@ -2276,7 +2276,7 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline, InputSortingInfoP sorting_stream, output_order_descr, settings.max_block_size, limit, settings.max_bytes_before_remerge_sort, settings.max_bytes_before_external_sort / pipeline.streams.size(), - context->getTemporaryPath(), settings.min_free_disk_space_for_temporary_data); + context->getTemporaryVolume(), settings.min_free_disk_space_for_temporary_data); stream = merging_stream; }); @@ -2358,7 +2358,8 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting return std::make_shared( header, output_order_descr, settings.max_block_size, limit, settings.max_bytes_before_remerge_sort / pipeline.getNumStreams(), - settings.max_bytes_before_external_sort, context->getTemporaryPath(), settings.min_free_disk_space_for_temporary_data); + settings.max_bytes_before_external_sort, context->getTemporaryVolume(), + settings.min_free_disk_space_for_temporary_data); }); /// If there are several streams, we merge them into one diff --git a/dbms/src/Interpreters/MergeJoin.cpp b/dbms/src/Interpreters/MergeJoin.cpp index 884639e9d96..b28caa5d0f7 100644 --- a/dbms/src/Interpreters/MergeJoin.cpp +++ b/dbms/src/Interpreters/MergeJoin.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -406,6 +407,8 @@ void MiniLSM::insert(const BlocksList & blocks) if (blocks.empty()) return; + const std::string path(volume->getNextDisk()->getPath()); + SortedFiles sorted_blocks; if (blocks.size() > 1) { @@ -434,6 +437,7 @@ void MiniLSM::merge(std::function callback) BlockInputStreams inputs = makeSortedInputStreams(sorted_files, sample_block); MergingSortedBlockInputStream sorted_stream(inputs, sort_description, rows_in_block); + const std::string path(volume->getNextDisk()->getPath()); SortedFiles out; flushStreamToFiles(path, sample_block, sorted_stream, out, callback); @@ -484,7 +488,7 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & ri makeSortAndMerge(table_join->keyNamesLeft(), left_sort_description, left_merge_description); makeSortAndMerge(table_join->keyNamesRight(), right_sort_description, right_merge_description); - lsm = std::make_unique(table_join->getTemporaryPath(), right_sample_block, right_sort_description, max_rows_in_right_block); + lsm = std::make_unique(table_join->getTemporaryVolume(), right_sample_block, right_sort_description, max_rows_in_right_block); } void MergeJoin::setTotals(const Block & totals_block) diff --git a/dbms/src/Interpreters/MergeJoin.h b/dbms/src/Interpreters/MergeJoin.h index e20a42bb0d3..a64ffd0a85a 100644 --- a/dbms/src/Interpreters/MergeJoin.h +++ b/dbms/src/Interpreters/MergeJoin.h @@ -17,20 +17,23 @@ class AnalyzedJoin; class MergeJoinCursor; struct MergeJoinEqualRange; +class Volume; +using VolumePtr = std::shared_ptr; + struct MiniLSM { using SortedFiles = std::vector>; - const String & path; + VolumePtr volume; const Block & sample_block; const SortDescription & sort_description; const size_t rows_in_block; const size_t max_size; std::vector sorted_files; - MiniLSM(const String & path_, const Block & sample_block_, const SortDescription & description, + MiniLSM(VolumePtr volume_, const Block & sample_block_, const SortDescription & description, size_t rows_in_block_, size_t max_size_ = 16) - : path(path_) + : volume(volume_) , sample_block(sample_block_) , sort_description(description) , rows_in_block(rows_in_block_) diff --git a/dbms/src/Interpreters/QueryLog.cpp b/dbms/src/Interpreters/QueryLog.cpp index d9b86ea91ea..b57b54ac710 100644 --- a/dbms/src/Interpreters/QueryLog.cpp +++ b/dbms/src/Interpreters/QueryLog.cpp @@ -49,6 +49,7 @@ Block QueryLogElement::createBlock() {std::make_shared(), "memory_usage"}, {std::make_shared(), "query"}, + {std::make_shared(), "exception_code"}, {std::make_shared(), "exception"}, {std::make_shared(), "stack_trace"}, @@ -107,6 +108,7 @@ void QueryLogElement::appendToBlock(Block & block) const columns[i++]->insert(memory_usage); columns[i++]->insertData(query.data(), query.size()); + columns[i++]->insert(exception_code); columns[i++]->insertData(exception.data(), exception.size()); columns[i++]->insertData(stack_trace.data(), stack_trace.size()); diff --git a/dbms/src/Interpreters/QueryLog.h b/dbms/src/Interpreters/QueryLog.h index 0bee61df394..f14691df64e 100644 --- a/dbms/src/Interpreters/QueryLog.h +++ b/dbms/src/Interpreters/QueryLog.h @@ -54,6 +54,7 @@ struct QueryLogElement String query; + Int32 exception_code{}; // because ErrorCodes are int String exception; String stack_trace; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index a485bd7ad73..b929804d0ae 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -816,7 +816,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( SyntaxAnalyzerResult result; result.storage = storage; result.source_columns = source_columns_; - result.analyzed_join = std::make_shared(settings, context.getTemporaryPath()); /// TODO: move to select_query logic + result.analyzed_join = std::make_shared(settings, context.getTemporaryVolume()); /// TODO: move to select_query logic if (storage) collectSourceColumns(storage->getColumns(), result.source_columns, (select_query != nullptr)); diff --git a/dbms/src/Interpreters/SystemLog.cpp b/dbms/src/Interpreters/SystemLog.cpp index 6eb0ce69f4e..d347488ea50 100644 --- a/dbms/src/Interpreters/SystemLog.cpp +++ b/dbms/src/Interpreters/SystemLog.cpp @@ -12,6 +12,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + namespace { @@ -31,8 +36,19 @@ std::shared_ptr createSystemLog( String database = config.getString(config_prefix + ".database", default_database_name); String table = config.getString(config_prefix + ".table", default_table_name); - String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)"); - String engine = "ENGINE = MergeTree PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time)"; + + String engine; + if (config.has(config_prefix + ".engine")) + { + if (config.has(config_prefix + ".partition_by")) + throw Exception("If 'engine' is specified for system table, PARTITION BY parameters should be specified directly inside 'engine' and 'partition_by' setting doesn't make sense", ErrorCodes::BAD_ARGUMENTS); + engine = config.getString(config_prefix + ".engine"); + } + else + { + String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)"); + engine = "ENGINE = MergeTree PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024"; + } size_t flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 1f912738454..b6741e17b8d 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -163,6 +163,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c elem.query_start_time = current_time; elem.query = query_for_logging; + elem.exception_code = getCurrentExceptionCode(); elem.exception = getCurrentExceptionMessage(false); elem.client_info = context.getClientInfo(); @@ -496,6 +497,7 @@ static std::tuple executeQueryImpl( elem.event_time = time(nullptr); elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); + elem.exception_code = getCurrentExceptionCode(); elem.exception = getCurrentExceptionMessage(false); QueryStatus * process_list_elem = context.getProcessListElement(); @@ -573,14 +575,17 @@ BlockIO executeQuery( BlockIO streams; std::tie(ast, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, internal, stage, !may_have_embedded_data, nullptr, allow_processors); - if (streams.in) + + if (const auto * ast_query_with_output = dynamic_cast(ast.get())) { - const auto * ast_query_with_output = dynamic_cast(ast.get()); - String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr) - ? getIdentifierName(ast_query_with_output->format) : context.getDefaultFormat(); + String format_name = ast_query_with_output->format + ? getIdentifierName(ast_query_with_output->format) + : context.getDefaultFormat(); + if (format_name == "Null") streams.null_format = true; } + return streams; } @@ -590,7 +595,7 @@ void executeQuery( WriteBuffer & ostr, bool allow_into_outfile, Context & context, - std::function set_content_type, + std::function set_content_type_and_format, std::function set_query_id) { PODArray parse_buf; @@ -680,8 +685,8 @@ void executeQuery( out->onProgress(progress); }); - if (set_content_type) - set_content_type(out->getContentType()); + if (set_content_type_and_format) + set_content_type_and_format(out->getContentType(), format_name); if (set_query_id) set_query_id(context.getClientInfo().current_query_id); @@ -742,8 +747,8 @@ void executeQuery( out->onProgress(progress); }); - if (set_content_type) - set_content_type(out->getContentType()); + if (set_content_type_and_format) + set_content_type_and_format(out->getContentType(), format_name); if (set_query_id) set_query_id(context.getClientInfo().current_query_id); diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index 3cff461f6d6..59b555b9f94 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -19,7 +19,7 @@ void executeQuery( WriteBuffer & ostr, /// Where to write query output to. bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file. Context & context, /// DB, tables, data types, storage engines, functions, aggregate functions... - std::function set_content_type, /// If non-empty callback is passed, it will be called with the Content-Type of the result. + std::function set_content_type_and_format, /// If non-empty callback is passed, it will be called with the Content-Type and the Format of the result. std::function set_query_id /// If non-empty callback is passed, it will be called with the query id. ); diff --git a/dbms/src/Interpreters/tests/aggregate.cpp b/dbms/src/Interpreters/tests/aggregate.cpp index 4d4d964aa9a..df498d6039d 100644 --- a/dbms/src/Interpreters/tests/aggregate.cpp +++ b/dbms/src/Interpreters/tests/aggregate.cpp @@ -79,7 +79,7 @@ int main(int argc, char ** argv) Aggregator::Params params( stream->getHeader(), {0, 1}, aggregate_descriptions, - false, 0, OverflowMode::THROW, 0, 0, 0, false, "", 1, 0); + false, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, 1, 0); Aggregator aggregator(params); diff --git a/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.cpp new file mode 100644 index 00000000000..acbd892eb48 --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -0,0 +1,671 @@ +#include "AvroRowInputFormat.h" +#if USE_AVRO + +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int THERE_IS_NO_COLUMN; + extern const int INCORRECT_DATA; + extern const int ILLEGAL_COLUMN; + extern const int TYPE_MISMATCH; +} + +class InputStreamReadBufferAdapter : public avro::InputStream +{ +public: + InputStreamReadBufferAdapter(ReadBuffer & in_) : in(in_) {} + + bool next(const uint8_t ** data, size_t * len) override + { + if (in.eof()) + { + *len = 0; + return false; + } + + *data = reinterpret_cast(in.position()); + *len = in.available(); + + in.position() += in.available(); + return true; + } + + void backup(size_t len) override { in.position() -= len; } + + void skip(size_t len) override { in.tryIgnore(len); } + + size_t byteCount() const override { return in.count(); } + +private: + ReadBuffer & in; +}; + +static void deserializeNoop(IColumn &, avro::Decoder &) +{ +} + +/// Insert value with conversion to the column of target type. +template +static void insertNumber(IColumn & column, WhichDataType type, T value) +{ + switch (type.idx) + { + case TypeIndex::UInt8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime64: [[fallthrough]]; + case TypeIndex::UInt64: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int64: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Float32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Float64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception("Type is not compatible with Avro", ErrorCodes::ILLEGAL_COLUMN); + } +} + + +AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type) +{ + WhichDataType target(target_type); + switch (root_node->type()) + { + case avro::AVRO_STRING: [[fallthrough]]; + case avro::AVRO_BYTES: + if (target.isString() || target.isFixedString()) + { + return [tmp = std::string()](IColumn & column, avro::Decoder & decoder) mutable + { + decoder.decodeString(tmp); + column.insertData(tmp.c_str(), tmp.length()); + }; + } + break; + case avro::AVRO_INT: + return [target](IColumn & column, avro::Decoder & decoder) + { + insertNumber(column, target, decoder.decodeInt()); + }; + case avro::AVRO_LONG: + if (target.isDateTime64()) + { + auto date_time_scale = assert_cast(*target_type).getScale(); + auto logical_type = root_node->logicalType().type(); + if ((logical_type == avro::LogicalType::TIMESTAMP_MILLIS && date_time_scale == 3) + || (logical_type == avro::LogicalType::TIMESTAMP_MICROS && date_time_scale == 6)) + { + return [](IColumn & column, avro::Decoder & decoder) + { + assert_cast(column).insertValue(decoder.decodeLong()); + }; + } + } + else + { + return [target](IColumn & column, avro::Decoder & decoder) + { + insertNumber(column, target, decoder.decodeLong()); + }; + } + break; + case avro::AVRO_FLOAT: + return [target](IColumn & column, avro::Decoder & decoder) + { + insertNumber(column, target, decoder.decodeFloat()); + }; + case avro::AVRO_DOUBLE: + return [target](IColumn & column, avro::Decoder & decoder) + { + insertNumber(column, target, decoder.decodeDouble()); + }; + case avro::AVRO_BOOL: + return [target](IColumn & column, avro::Decoder & decoder) + { + insertNumber(column, target, decoder.decodeBool()); + }; + case avro::AVRO_ARRAY: + if (target.isArray()) + { + auto nested_source_type = root_node->leafAt(0); + auto nested_target_type = assert_cast(*target_type).getNestedType(); + auto nested_deserialize = createDeserializeFn(nested_source_type, nested_target_type); + return [nested_deserialize](IColumn & column, avro::Decoder & decoder) + { + ColumnArray & column_array = assert_cast(column); + ColumnArray::Offsets & offsets = column_array.getOffsets(); + IColumn & nested_column = column_array.getData(); + size_t total = 0; + for (size_t n = decoder.arrayStart(); n != 0; n = decoder.arrayNext()) + { + total += n; + for (size_t i = 0; i < n; i++) + { + nested_deserialize(nested_column, decoder); + } + } + offsets.push_back(offsets.back() + total); + }; + } + break; + case avro::AVRO_UNION: + { + auto nullable_deserializer = [root_node, target_type](size_t non_null_union_index) + { + auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type)); + return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) + { + ColumnNullable & col = assert_cast(column); + size_t union_index = decoder.decodeUnionIndex(); + if (union_index == non_null_union_index) + { + nested_deserialize(col.getNestedColumn(), decoder); + col.getNullMapData().push_back(0); + } + else + { + col.insertDefault(); + } + }; + }; + if (root_node->leaves() == 2 && target.isNullable()) + { + if (root_node->leafAt(0)->type() == avro::AVRO_NULL) + return nullable_deserializer(1); + if (root_node->leafAt(1)->type() == avro::AVRO_NULL) + return nullable_deserializer(0); + } + break; + } + case avro::AVRO_NULL: + if (target.isNullable()) + { + auto nested_type = removeNullable(target_type); + if (nested_type->getTypeId() == TypeIndex::Nothing) + { + return [](IColumn &, avro::Decoder & decoder) + { + decoder.decodeNull(); + }; + } + else + { + return [](IColumn & column, avro::Decoder & decoder) + { + ColumnNullable & col = assert_cast(column); + decoder.decodeNull(); + col.insertDefault(); + }; + } + } + break; + case avro::AVRO_ENUM: + if (target.isString()) + { + std::vector symbols; + for (size_t i = 0; i < root_node->names(); i++) + { + symbols.push_back(root_node->nameAt(i)); + } + return [symbols](IColumn & column, avro::Decoder & decoder) + { + size_t enum_index = decoder.decodeEnum(); + const auto & enum_symbol = symbols[enum_index]; + column.insertData(enum_symbol.c_str(), enum_symbol.length()); + }; + } + if (target.isEnum()) + { + const auto & enum_type = dynamic_cast(*target_type); + std::vector symbol_mapping; + for (size_t i = 0; i < root_node->names(); i++) + { + symbol_mapping.push_back(enum_type.castToValue(root_node->nameAt(i))); + } + return [symbol_mapping](IColumn & column, avro::Decoder & decoder) + { + size_t enum_index = decoder.decodeEnum(); + column.insert(symbol_mapping[enum_index]); + }; + } + break; + case avro::AVRO_FIXED: + { + size_t fixed_size = root_node->fixedSize(); + if (target.isFixedString() && target_type->getSizeOfValueInMemory() == fixed_size) + { + return [tmp_fixed = std::vector(fixed_size)](IColumn & column, avro::Decoder & decoder) mutable + { + decoder.decodeFixed(tmp_fixed.size(), tmp_fixed); + column.insertData(reinterpret_cast(tmp_fixed.data()), tmp_fixed.size()); + }; + } + break; + } + case avro::AVRO_MAP: [[fallthrough]]; + case avro::AVRO_RECORD: [[fallthrough]]; + default: + break; + } + + throw Exception( + "Type " + target_type->getName() + " is not compatible with Avro " + avro::ValidSchema(root_node).toJson(false), + ErrorCodes::ILLEGAL_COLUMN); +} + +AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) +{ + switch (root_node->type()) + { + case avro::AVRO_STRING: + return [](avro::Decoder & decoder) { decoder.skipString(); }; + case avro::AVRO_BYTES: + return [](avro::Decoder & decoder) { decoder.skipBytes(); }; + case avro::AVRO_INT: + return [](avro::Decoder & decoder) { decoder.decodeInt(); }; + case avro::AVRO_LONG: + return [](avro::Decoder & decoder) { decoder.decodeLong(); }; + case avro::AVRO_FLOAT: + return [](avro::Decoder & decoder) { decoder.decodeFloat(); }; + case avro::AVRO_DOUBLE: + return [](avro::Decoder & decoder) { decoder.decodeDouble(); }; + case avro::AVRO_BOOL: + return [](avro::Decoder & decoder) { decoder.decodeBool(); }; + case avro::AVRO_ARRAY: + { + auto nested_skip_fn = createSkipFn(root_node->leafAt(0)); + return [nested_skip_fn](avro::Decoder & decoder) + { + for (size_t n = decoder.arrayStart(); n != 0; n = decoder.arrayNext()) + { + for (size_t i = 0; i < n; ++i) + { + nested_skip_fn(decoder); + } + } + }; + } + case avro::AVRO_UNION: + { + std::vector union_skip_fns; + for (size_t i = 0; i < root_node->leaves(); i++) + { + union_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); + } + return [union_skip_fns](avro::Decoder & decoder) { union_skip_fns[decoder.decodeUnionIndex()](decoder); }; + } + case avro::AVRO_NULL: + return [](avro::Decoder & decoder) { decoder.decodeNull(); }; + case avro::AVRO_ENUM: + return [](avro::Decoder & decoder) { decoder.decodeEnum(); }; + case avro::AVRO_FIXED: + { + auto fixed_size = root_node->fixedSize(); + return [fixed_size](avro::Decoder & decoder) { decoder.skipFixed(fixed_size); }; + } + case avro::AVRO_MAP: + { + auto value_skip_fn = createSkipFn(root_node->leafAt(1)); + return [value_skip_fn](avro::Decoder & decoder) + { + for (size_t n = decoder.mapStart(); n != 0; n = decoder.mapNext()) + { + for (size_t i = 0; i < n; ++i) + { + decoder.skipString(); + value_skip_fn(decoder); + } + } + }; + } + case avro::AVRO_RECORD: + { + std::vector field_skip_fns; + for (size_t i = 0; i < root_node->leaves(); i++) + { + field_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); + } + return [field_skip_fns](avro::Decoder & decoder) + { + for (auto & skip_fn : field_skip_fns) + skip_fn(decoder); + }; + } + default: + throw Exception("Unsupported Avro type " + root_node->name().fullname() + " (" + toString(int(root_node->type())) + ")", ErrorCodes::ILLEGAL_COLUMN); + } +} + + +AvroDeserializer::AvroDeserializer(const ColumnsWithTypeAndName & columns, avro::ValidSchema schema) +{ + auto schema_root = schema.root(); + if (schema_root->type() != avro::AVRO_RECORD) + { + throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH); + } + + field_mapping.resize(schema_root->leaves(), -1); + + for (size_t i = 0; i < schema_root->leaves(); ++i) + { + skip_fns.push_back(createSkipFn(schema_root->leafAt(i))); + deserialize_fns.push_back(&deserializeNoop); + } + + for (size_t i = 0; i < columns.size(); ++i) + { + const auto & column = columns[i]; + size_t field_index = 0; + if (!schema_root->nameIndex(column.name, field_index)) + { + throw Exception("Field " + column.name + " not found in Avro schema", ErrorCodes::THERE_IS_NO_COLUMN); + } + auto field_schema = schema_root->leafAt(field_index); + try + { + deserialize_fns[field_index] = createDeserializeFn(field_schema, column.type); + } + catch (Exception & e) + { + e.addMessage("column " + column.name); + throw; + } + field_mapping[field_index] = i; + } +} + +void AvroDeserializer::deserializeRow(MutableColumns & columns, avro::Decoder & decoder) +{ + for (size_t i = 0; i < field_mapping.size(); i++) + { + if (field_mapping[i] >= 0) + { + deserialize_fns[i](*columns[field_mapping[i]], decoder); + } + else + { + skip_fns[i](decoder); + } + } +} + + +AvroRowInputFormat::AvroRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) + : IRowInputFormat(header_, in_, params_) + , file_reader(std::make_unique(in_)) + , deserializer(header_.getColumnsWithTypeAndName(), file_reader.dataSchema()) +{ + file_reader.init(); +} + +bool AvroRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) +{ + if (file_reader.hasMore()) + { + file_reader.decr(); + deserializer.deserializeRow(columns, file_reader.decoder()); + return true; + } + return false; +} + +#if USE_POCO_JSON +class AvroConfluentRowInputFormat::SchemaRegistry +{ +public: + SchemaRegistry(const std::string & base_url_) + { + if (base_url_.empty()) + { + throw Exception("Empty Schema Registry URL", ErrorCodes::BAD_ARGUMENTS); + } + try + { + base_url = base_url_; + } + catch (const Poco::SyntaxException & e) + { + throw Exception("Invalid Schema Registry URL: " + e.displayText(), ErrorCodes::BAD_ARGUMENTS); + } + } + + avro::ValidSchema getSchema(uint32_t id) const + { + try + { + try + { + /// TODO Host checking to prevent SSRF + + Poco::URI url(base_url, "/schemas/ids/" + std::to_string(id)); + + /// One second for connect/send/receive. Just in case. + ConnectionTimeouts timeouts({1, 0}, {1, 0}, {1, 0}); + + Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery()); + + auto session = makePooledHTTPSession(url, timeouts, 1); + session->sendRequest(request); + + Poco::Net::HTTPResponse response; + auto & response_body = session->receiveResponse(response); + + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) + { + throw Exception("HTTP code " + std::to_string(response.getStatus()), ErrorCodes::INCORRECT_DATA); + } + + Poco::JSON::Parser parser; + auto json_body = parser.parse(response_body).extract(); + auto schema = json_body->getValue("schema"); + return avro::compileJsonSchemaFromString(schema); + } + catch (const Exception &) + { + throw; + } + catch (const Poco::Exception & e) + { + throw Exception(Exception::CreateFromPoco, e); + } + catch (const avro::Exception & e) + { + throw Exception(e.what(), ErrorCodes::INCORRECT_DATA); + } + } + catch (Exception & e) + { + e.addMessage("while fetching schema id = " + std::to_string(id)); + throw; + } + } + +private: + Poco::URI base_url; +}; + +static uint32_t readConfluentSchemaId(ReadBuffer & in) +{ + uint8_t magic; + uint32_t schema_id; + + readBinaryBigEndian(magic, in); + readBinaryBigEndian(schema_id, in); + + if (magic != 0x00) + { + throw Exception("Invalid magic byte before AvroConfluent schema identifier." + " Must be zero byte, found " + std::to_string(int(magic)) + " instead", ErrorCodes::INCORRECT_DATA); + } + + return schema_id; +} + +AvroConfluentRowInputFormat::AvroConfluentRowInputFormat( + const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_) + : IRowInputFormat(header_.cloneEmpty(), in_, params_) + , header_columns(header_.getColumnsWithTypeAndName()) + , schema_registry(std::make_unique(format_settings_.avro.schema_registry_url)) + , input_stream(std::make_unique(in)) + , decoder(avro::binaryDecoder()) + +{ + decoder->init(*input_stream); +} + +bool AvroConfluentRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) +{ + if (in.eof()) + { + return false; + } + SchemaId schema_id = readConfluentSchemaId(in); + auto & deserializer = getOrCreateDeserializer(schema_id); + deserializer.deserializeRow(columns, *decoder); + decoder->drain(); + return true; +} + +AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(SchemaId schema_id) +{ + auto it = deserializer_cache.find(schema_id); + if (it == deserializer_cache.end()) + { + auto schema = schema_registry->getSchema(schema_id); + AvroDeserializer deserializer(header_columns, schema); + it = deserializer_cache.emplace(schema_id, deserializer).first; + } + return it->second; +} +#endif + +void registerInputFormatProcessorAvro(FormatFactory & factory) +{ + factory.registerInputFormatProcessor("Avro", []( + ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams & params, + const FormatSettings &) + { + return std::make_shared(sample, buf, params); + }); + +#if USE_POCO_JSON + + /// AvroConfluent format is disabled for the following reasons: + /// 1. There is no test for it. + /// 2. RemoteHostFilter is not used to prevent CSRF attacks. + +#if 0 + factory.registerInputFormatProcessor("AvroConfluent",[]( + ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams & params, + const FormatSettings & settings) + { + return std::make_shared(sample, buf, params, settings); + }); +#endif + +#endif + +} + +} + +#else + +namespace DB +{ +class FormatFactory; +void registerInputFormatProcessorAvro(FormatFactory &) +{ +} +} + +#endif diff --git a/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.h b/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.h new file mode 100644 index 00000000000..0fb979b4f4e --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -0,0 +1,79 @@ +#pragma once +#include "config_formats.h" +#include "config_core.h" +#if USE_AVRO + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + + +namespace DB +{ +class AvroDeserializer +{ +public: + AvroDeserializer(const ColumnsWithTypeAndName & columns, avro::ValidSchema schema); + void deserializeRow(MutableColumns & columns, avro::Decoder & decoder); + +private: + using DeserializeFn = std::function; + using SkipFn = std::function; + static DeserializeFn createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type); + static SkipFn createSkipFn(avro::NodePtr root_node); + + /// Map from field index in Avro schema to column number in block header. Or -1 if there is no corresponding column. + std::vector field_mapping; + + /// How to skip the corresponding field in Avro schema. + std::vector skip_fns; + + /// How to deserialize the corresponding field in Avro schema. + std::vector deserialize_fns; +}; + +class AvroRowInputFormat : public IRowInputFormat +{ +public: + AvroRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); + virtual bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + String getName() const override { return "AvroRowInputFormat"; } + +private: + avro::DataFileReaderBase file_reader; + AvroDeserializer deserializer; +}; + +#if USE_POCO_JSON +class AvroConfluentRowInputFormat : public IRowInputFormat +{ +public: + AvroConfluentRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_); + virtual bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + String getName() const override { return "AvroConfluentRowInputFormat"; } + +private: + const ColumnsWithTypeAndName header_columns; + + class SchemaRegistry; + std::unique_ptr schema_registry; + + using SchemaId = uint32_t; + std::unordered_map deserializer_cache; + AvroDeserializer & getOrCreateDeserializer(SchemaId schema_id); + + avro::InputStreamPtr input_stream; + avro::DecoderPtr decoder; +}; +#endif + +} +#endif diff --git a/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp new file mode 100644 index 00000000000..26b427dfa31 --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -0,0 +1,396 @@ +#include "AvroRowOutputFormat.h" +#if USE_AVRO + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_TYPE_OF_FIELD; + extern const int BAD_ARGUMENTS; + extern const int THERE_IS_NO_COLUMN; + extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; + extern const int CANNOT_READ_ALL_DATA; +} + +class OutputStreamWriteBufferAdapter : public avro::OutputStream +{ +public: + OutputStreamWriteBufferAdapter(WriteBuffer & out_) : out(out_) {} + + virtual bool next(uint8_t ** data, size_t * len) override + { + out.nextIfAtEnd(); + *data = reinterpret_cast(out.position()); + *len = out.available(); + out.position() += out.available(); + + return true; + } + + virtual void backup(size_t len) override { out.position() -= len; } + + virtual uint64_t byteCount() const override { return out.count(); } + virtual void flush() override { out.next(); } + +private: + WriteBuffer & out; +}; + + +AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment) +{ + ++type_name_increment; + + switch (data_type->getTypeId()) + { + case TypeIndex::UInt8: + return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeInt(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::Int8: + return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeInt(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::UInt16: + return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeInt(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::Int16: + return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeInt(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::UInt32: [[fallthrough]]; + case TypeIndex::DateTime: + return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeInt(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::Int32: + return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeInt(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::UInt64: + return {avro::LongSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeLong(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::Int64: + return {avro::LongSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeLong(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::Float32: + return {avro::FloatSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeFloat(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::Float64: + return {avro::DoubleSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeDouble(assert_cast(column).getElement(row_num)); + }}; + case TypeIndex::Date: + { + auto schema = avro::IntSchema(); + schema.root()->setLogicalType(avro::LogicalType(avro::LogicalType::DATE)); + return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + UInt16 date = assert_cast(column).getElement(row_num); + encoder.encodeInt(date); + }}; + } + case TypeIndex::DateTime64: + { + auto schema = avro::LongSchema(); + const auto & provided_type = assert_cast(*data_type); + + if (provided_type.getScale() == 3) + schema.root()->setLogicalType(avro::LogicalType(avro::LogicalType::TIMESTAMP_MILLIS)); + else if (provided_type.getScale() == 6) + schema.root()->setLogicalType(avro::LogicalType(avro::LogicalType::TIMESTAMP_MICROS)); + else + break; + + return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const auto & col = assert_cast(column); + encoder.encodeLong(col.getElement(row_num)); + }}; + } + case TypeIndex::String: + return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeBytes(reinterpret_cast(s.data), s.size); + }}; + case TypeIndex::FixedString: + { + auto size = data_type->getSizeOfValueInMemory(); + auto schema = avro::FixedSchema(size, "fixed_" + toString(type_name_increment)); + return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeFixed(reinterpret_cast(s.data), s.size); + }}; + } + case TypeIndex::Enum8: + { + auto schema = avro::EnumSchema("enum8_" + toString(type_name_increment)); /// type names must be different for different types. + std::unordered_map enum_mapping; + const auto & enum_values = assert_cast(*data_type).getValues(); + for (size_t i = 0; i < enum_values.size(); ++i) + { + schema.addSymbol(enum_values[i].first); + enum_mapping.emplace(enum_values[i].second, i); + } + return {schema, [enum_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + auto enum_value = assert_cast(column).getElement(row_num); + encoder.encodeEnum(enum_mapping.at(enum_value)); + }}; + } + case TypeIndex::Enum16: + { + auto schema = avro::EnumSchema("enum16" + toString(type_name_increment)); + std::unordered_map enum_mapping; + const auto & enum_values = assert_cast(*data_type).getValues(); + for (size_t i = 0; i < enum_values.size(); ++i) + { + schema.addSymbol(enum_values[i].first); + enum_mapping.emplace(enum_values[i].second, i); + } + return {schema, [enum_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + auto enum_value = assert_cast(column).getElement(row_num); + encoder.encodeEnum(enum_mapping.at(enum_value)); + }}; + } + case TypeIndex::Array: + { + const auto & array_type = assert_cast(*data_type); + auto nested_mapping = createSchemaWithSerializeFn(array_type.getNestedType(), type_name_increment); + auto schema = avro::ArraySchema(nested_mapping.schema); + return {schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const ColumnArray & column_array = assert_cast(column); + const ColumnArray::Offsets & offsets = column_array.getOffsets(); + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; + size_t row_count = next_offset - offset; + const IColumn & nested_column = column_array.getData(); + + encoder.arrayStart(); + if (row_count > 0) + { + encoder.setItemCount(row_count); + } + for (size_t i = offset; i < next_offset; ++i) + { + nested_mapping.serialize(nested_column, i, encoder); + } + encoder.arrayEnd(); + }}; + } + case TypeIndex::Nullable: + { + auto nested_type = removeNullable(data_type); + auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment); + if (nested_type->getTypeId() == TypeIndex::Nothing) + { + return nested_mapping; + } + else + { + avro::UnionSchema union_schema; + union_schema.addType(avro::NullSchema()); + union_schema.addType(nested_mapping.schema); + return {union_schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const ColumnNullable & col = assert_cast(column); + if (!col.isNullAt(row_num)) + { + encoder.encodeUnionIndex(1); + nested_mapping.serialize(col.getNestedColumn(), row_num, encoder); + } + else + { + encoder.encodeUnionIndex(0); + encoder.encodeNull(); + } + }}; + } + } + case TypeIndex::LowCardinality: + { + const auto & nested_type = removeLowCardinality(data_type); + auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment); + return {nested_mapping.schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const auto & col = assert_cast(column); + nested_mapping.serialize(*col.getDictionary().getNestedColumn(), col.getIndexAt(row_num), encoder); + }}; + } + case TypeIndex::Nothing: + return {avro::NullSchema(), [](const IColumn &, size_t, avro::Encoder & encoder) { encoder.encodeNull(); }}; + default: + break; + } + throw Exception("Type " + data_type->getName() + " is not supported for Avro output", ErrorCodes::ILLEGAL_COLUMN); +} + + +AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) +{ + avro::RecordSchema record_schema("row"); + + size_t type_name_increment = 0; + for (auto & column : columns) + { + try + { + auto field_mapping = createSchemaWithSerializeFn(column.type, type_name_increment); + serialize_fns.push_back(field_mapping.serialize); + //TODO: verify name starts with A-Za-z_ + record_schema.addField(column.name, field_mapping.schema); + } + catch (Exception & e) + { + e.addMessage("column " + column.name); + throw; + } + } + schema.setSchema(record_schema); +} + +void AvroSerializer::serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder) +{ + size_t num_columns = columns.size(); + for (size_t i = 0; i < num_columns; ++i) + { + serialize_fns[i](*columns[i], row_num, encoder); + } +} + +static avro::Codec getCodec(const std::string & codec_name) +{ + if (codec_name == "") + { +#ifdef SNAPPY_CODEC_AVAILABLE + return avro::Codec::SNAPPY_CODEC; +#else + return avro::Codec::DEFLATE_CODEC; +#endif + } + + if (codec_name == "null") return avro::Codec::NULL_CODEC; + if (codec_name == "deflate") return avro::Codec::DEFLATE_CODEC; +#ifdef SNAPPY_CODEC_AVAILABLE + if (codec_name == "snappy") return avro::Codec::SNAPPY_CODEC; +#endif + + throw Exception("Avro codec " + codec_name + " is not available", ErrorCodes::BAD_ARGUMENTS); +} + +AvroRowOutputFormat::AvroRowOutputFormat( + WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_) + : IRowOutputFormat(header_, out_, callback) + , settings(settings_) + , serializer(header_.getColumnsWithTypeAndName()) + , file_writer( + std::make_unique(out_), + serializer.getSchema(), + settings.avro.output_sync_interval, + getCodec(settings.avro.output_codec)) +{ +} + +AvroRowOutputFormat::~AvroRowOutputFormat() = default; + +void AvroRowOutputFormat::writePrefix() +{ + file_writer.syncIfNeeded(); +} + +void AvroRowOutputFormat::write(const Columns & columns, size_t row_num) +{ + file_writer.syncIfNeeded(); + serializer.serializeRow(columns, row_num, file_writer.encoder()); + file_writer.incr(); +} + +void AvroRowOutputFormat::writeSuffix() +{ + file_writer.close(); +} + +void registerOutputFormatProcessorAvro(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("Avro", []( + WriteBuffer & buf, + const Block & sample, + FormatFactory::WriteCallback callback, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, callback, settings); + }); +} + +} + +#else + +namespace DB +{ +class FormatFactory; +void registerOutputFormatProcessorAvro(FormatFactory &) +{ +} +} + +#endif diff --git a/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.h new file mode 100644 index 00000000000..4d404337d74 --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -0,0 +1,62 @@ +#pragma once +#include "config_formats.h" +#if USE_AVRO +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ +class WriteBuffer; + +class AvroSerializer +{ +public: + AvroSerializer(const ColumnsWithTypeAndName & columns); + const avro::ValidSchema & getSchema() const { return schema; } + void serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder); + +private: + using SerializeFn = std::function; + struct SchemaWithSerializeFn + { + avro::Schema schema; + SerializeFn serialize; + }; + + /// Type names for different complex types (e.g. enums, fixed strings) must be unique. We use simple incremental number to give them different names. + static SchemaWithSerializeFn createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment); + + std::vector serialize_fns; + avro::ValidSchema schema; +}; + +class AvroRowOutputFormat : public IRowOutputFormat +{ +public: + AvroRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_); + virtual ~AvroRowOutputFormat() override; + + String getName() const override { return "AvroRowOutputFormat"; } + void write(const Columns & columns, size_t row_num) override; + void writeField(const IColumn &, const IDataType &, size_t) override {} + virtual void writePrefix() override; + virtual void writeSuffix() override; + +private: + FormatSettings settings; + AvroSerializer serializer; + avro::DataFileWriterBase file_writer; +}; + +} +#endif diff --git a/dbms/src/Processors/Formats/LazyOutputFormat.h b/dbms/src/Processors/Formats/LazyOutputFormat.h index 56aaf249480..441a3449620 100644 --- a/dbms/src/Processors/Formats/LazyOutputFormat.h +++ b/dbms/src/Processors/Formats/LazyOutputFormat.h @@ -20,14 +20,18 @@ public: Block getTotals(); Block getExtremes(); - bool isFinished() { return finished_processing; } + bool isFinished() { return finished_processing && queue.size() == 0; } BlockStreamProfileInfo & getProfileInfo() { return info; } void setRowsBeforeLimit(size_t rows_before_limit) override; - void finish() { finished_processing = true; } - void clearQueue() { queue.clear(); } + void finish() + { + finished_processing = true; + /// Clear queue in case if somebody is waiting lazy_format to push. + queue.clear(); + } protected: void consume(Chunk chunk) override diff --git a/dbms/src/Processors/IInflatingTransform.h b/dbms/src/Processors/IInflatingTransform.h index 45edf5302e5..0ad12f6cd65 100644 --- a/dbms/src/Processors/IInflatingTransform.h +++ b/dbms/src/Processors/IInflatingTransform.h @@ -4,6 +4,20 @@ namespace DB { +/// Transform which can generate several chunks on every consumed. +/// It can be assumed that class is used in following way: +/// +/// for (chunk : input_chunks) +/// { +/// transform.consume(chunk); +/// +/// while (transform.canGenerate()) +/// { +/// transformed_chunk = transform.generate(); +/// ... (process transformed chunk) +/// } +/// } +/// class IInflatingTransform : public IProcessor { protected: diff --git a/dbms/src/Processors/QueryPipeline.h b/dbms/src/Processors/QueryPipeline.h index 85fc7d04513..d8320bb51d6 100644 --- a/dbms/src/Processors/QueryPipeline.h +++ b/dbms/src/Processors/QueryPipeline.h @@ -29,11 +29,14 @@ public: void init(Pipe pipe); /// Simple init for single pipe bool initialized() { return !processors.empty(); } + /// Type of logical data stream for simple transform. + /// Sometimes it's important to know which part of pipeline we are working for. + /// Example: ExpressionTransform need special logic for totals. enum class StreamType { - Main = 0, - Totals, - Extremes, + Main = 0, /// Stream for query data. There may be several streams of this type. + Totals, /// Stream for totals. No more then one. + Extremes, /// Stream for extremes. No more then one. }; using ProcessorGetter = std::function; diff --git a/dbms/src/Processors/Transforms/ExpressionTransform.cpp b/dbms/src/Processors/Transforms/ExpressionTransform.cpp index a5755ae072b..9bd4ba89db6 100644 --- a/dbms/src/Processors/Transforms/ExpressionTransform.cpp +++ b/dbms/src/Processors/Transforms/ExpressionTransform.cpp @@ -37,6 +37,8 @@ void ExpressionTransform::transform(Chunk & chunk) if (on_totals) { + /// Drop totals if both out stream and joined stream doesn't have ones. + /// See comment in ExpressionTransform.h if (default_totals && !expression->hasTotalsInJoin()) return; diff --git a/dbms/src/Processors/Transforms/ExpressionTransform.h b/dbms/src/Processors/Transforms/ExpressionTransform.h index 5a5d60bfacf..87f2c01ea1d 100644 --- a/dbms/src/Processors/Transforms/ExpressionTransform.h +++ b/dbms/src/Processors/Transforms/ExpressionTransform.h @@ -10,7 +10,11 @@ using ExpressionActionsPtr = std::shared_ptr; class ExpressionTransform : public ISimpleTransform { public: - ExpressionTransform(const Block & header_, ExpressionActionsPtr expression_, bool on_totals_ = false, bool default_totals_ = false); + ExpressionTransform( + const Block & header_, + ExpressionActionsPtr expression_, + bool on_totals_ = false, + bool default_totals_ = false); String getName() const override { return "ExpressionTransform"; } @@ -20,6 +24,9 @@ protected: private: ExpressionActionsPtr expression; bool on_totals; + /// This flag means that we have manually added totals to our pipeline. + /// It may happen in case if joined subquery has totals, but out string doesn't. + /// We need to join default values with subquery totals if we have them, or return empty chunk is haven't. bool default_totals; bool initialized = false; }; diff --git a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp index 39da24ba149..060d860b0b5 100644 --- a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace ProfileEvents @@ -95,11 +96,11 @@ MergeSortingTransform::MergeSortingTransform( const SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_, size_t max_bytes_before_remerge_, - size_t max_bytes_before_external_sort_, const std::string & tmp_path_, + size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_, size_t min_free_disk_space_) : SortingTransform(header, description_, max_merged_block_size_, limit_) , max_bytes_before_remerge(max_bytes_before_remerge_) - , max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_path(tmp_path_) + , max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_) , min_free_disk_space(min_free_disk_space_) {} Processors MergeSortingTransform::expandPipeline() @@ -172,10 +173,14 @@ void MergeSortingTransform::consume(Chunk chunk) */ if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort) { - if (!enoughSpaceInDirectory(tmp_path, sum_bytes_in_blocks + min_free_disk_space)) - throw Exception("Not enough space for external sort in " + tmp_path, ErrorCodes::NOT_ENOUGH_SPACE); + size_t size = sum_bytes_in_blocks + min_free_disk_space; + auto reservation = tmp_volume->reserve(size); + if (!reservation) + throw Exception("Not enough space for external sort in temporary storage", ErrorCodes::NOT_ENOUGH_SPACE); + const std::string tmp_path(reservation->getDisk()->getPath()); temporary_files.emplace_back(createTemporaryFile(tmp_path)); + const std::string & path = temporary_files.back()->path(); merge_sorter = std::make_unique(std::move(chunks), description, max_merged_block_size, limit); auto current_processor = std::make_shared(header_without_constants, log, path); diff --git a/dbms/src/Processors/Transforms/MergeSortingTransform.h b/dbms/src/Processors/Transforms/MergeSortingTransform.h index ecfaeb4f272..09c2b182fc7 100644 --- a/dbms/src/Processors/Transforms/MergeSortingTransform.h +++ b/dbms/src/Processors/Transforms/MergeSortingTransform.h @@ -9,6 +9,9 @@ namespace DB { +class Volume; +using VolumePtr = std::shared_ptr; + class MergeSortingTransform : public SortingTransform { public: @@ -17,7 +20,7 @@ public: const SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_, size_t max_bytes_before_remerge_, - size_t max_bytes_before_external_sort_, const std::string & tmp_path_, + size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_, size_t min_free_disk_space_); String getName() const override { return "MergeSortingTransform"; } @@ -32,7 +35,7 @@ protected: private: size_t max_bytes_before_remerge; size_t max_bytes_before_external_sort; - const std::string tmp_path; + VolumePtr tmp_volume; size_t min_free_disk_space; Logger * log = &Logger::get("MergeSortingTransform"); diff --git a/dbms/src/Processors/tests/processors_test_aggregation.cpp b/dbms/src/Processors/tests/processors_test_aggregation.cpp index ccf31d953ac..903633c18ec 100644 --- a/dbms/src/Processors/tests/processors_test_aggregation.cpp +++ b/dbms/src/Processors/tests/processors_test_aggregation.cpp @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -187,6 +189,8 @@ try auto & factory = AggregateFunctionFactory::instance(); auto cur_path = Poco::Path().absolute().toString(); + auto disk = std::make_shared("tmp", cur_path, 0); + auto tmp_volume = std::make_shared("tmp", std::vector{disk}, 0); auto execute_one_stream = [&](String msg, size_t num_threads, bool two_level, bool external) { @@ -228,7 +232,7 @@ try group_by_two_level_threshold_bytes, max_bytes_before_external_group_by, false, /// empty_result_for_aggregation_by_empty_set - cur_path, /// tmp_path + tmp_volume, 1, /// max_threads 0 ); @@ -301,7 +305,7 @@ try group_by_two_level_threshold_bytes, max_bytes_before_external_group_by, false, /// empty_result_for_aggregation_by_empty_set - cur_path, /// tmp_path + tmp_volume, 1, /// max_threads 0 ); diff --git a/dbms/src/Processors/tests/processors_test_merge_sorting_transform.cpp b/dbms/src/Processors/tests/processors_test_merge_sorting_transform.cpp index 8e6b4655127..f0b20959e87 100644 --- a/dbms/src/Processors/tests/processors_test_merge_sorting_transform.cpp +++ b/dbms/src/Processors/tests/processors_test_merge_sorting_transform.cpp @@ -1,6 +1,8 @@ #include #include +#include +#include #include #include @@ -116,7 +118,10 @@ try Logger::root().setChannel(channel); Logger::root().setLevel("trace"); - auto execute_chain = []( + auto disk = std::make_shared("tmp", ".", 0); + auto tmp_volume = std::make_shared("tmp", std::vector{disk}, 0); + + auto execute_chain = [tmp_volume]( String msg, UInt64 source_block_size, UInt64 blocks_count, @@ -133,7 +138,9 @@ try SortDescription description = {{0, 1, 1}}; auto transform = std::make_shared( source->getPort().getHeader(), description, - max_merged_block_size, limit, max_bytes_before_remerge, max_bytes_before_external_sort, ".", 0); + max_merged_block_size, limit, + max_bytes_before_remerge, max_bytes_before_external_sort, + tmp_volume, 0); auto sink = std::make_shared(); connect(source->getPort(), transform->getInputs().front()); diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index dcb805b141e..d5499e0e9d7 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -105,6 +105,9 @@ public: /// Returns true if the storage replicates SELECT, INSERT and ALTER commands among replicas. virtual bool supportsReplication() const { return false; } + /// Returns true if the storage supports parallel insert. + virtual bool supportsParallelInsert() const { return false; } + /// Returns true if the storage supports deduplication of inserted data blocks. virtual bool supportsDeduplication() const { return false; } diff --git a/dbms/src/Storages/LiveView/StorageLiveView.cpp b/dbms/src/Storages/LiveView/StorageLiveView.cpp index 917b2be39fd..9331eb386b0 100644 --- a/dbms/src/Storages/LiveView/StorageLiveView.cpp +++ b/dbms/src/Storages/LiveView/StorageLiveView.cpp @@ -140,12 +140,12 @@ BlockInputStreamPtr StorageLiveView::completeQuery(BlockInputStreams from) block_context->makeQueryContext(); auto blocks_storage_id = getBlocksStorageID(); - auto blocks_storage = StorageBlocks::createStorage(blocks_storage_id, parent_storage->getColumns(), + auto blocks_storage = StorageBlocks::createStorage(blocks_storage_id, getParentStorage()->getColumns(), std::move(from), QueryProcessingStage::WithMergeableState); block_context->addExternalTable(blocks_storage_id.table_name, blocks_storage); - InterpreterSelectQuery select(inner_blocks_query->clone(), *block_context, StoragePtr(), SelectQueryOptions(QueryProcessingStage::Complete)); + InterpreterSelectQuery select(getInnerBlocksQuery(), *block_context, StoragePtr(), SelectQueryOptions(QueryProcessingStage::Complete)); BlockInputStreamPtr data = std::make_shared(select.execute().in); /// Squashing is needed here because the view query can generate a lot of blocks @@ -255,16 +255,12 @@ StorageLiveView::StorageLiveView( throw Exception("UNION is not supported for LIVE VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW); inner_query = query.select->list_of_selects->children.at(0); - inner_blocks_query = inner_query->clone(); - InterpreterSelectQuery(inner_blocks_query, *live_view_context, SelectQueryOptions().modify().analyze()); - - select_table_id = extractDependentTable(inner_blocks_query, global_context, table_id_.table_name, inner_subquery); + auto inner_query_tmp = inner_query->clone(); + select_table_id = extractDependentTable(inner_query_tmp, global_context, table_id_.table_name, inner_subquery); global_context.addDependency(select_table_id, table_id_); - parent_storage = local_context.getTable(select_table_id); - is_temporary = query.temporary; temporary_live_view_timeout = local_context.getSettingsRef().temporary_live_view_timeout.totalSeconds(); @@ -310,6 +306,22 @@ Block StorageLiveView::getHeader() const return sample_block; } +ASTPtr StorageLiveView::getInnerBlocksQuery() +{ + std::lock_guard lock(sample_block_lock); + if (!inner_blocks_query) + { + inner_blocks_query = inner_query->clone(); + /// Rewrite inner query with right aliases for JOIN. + /// It cannot be done in constructor or startup() because InterpreterSelectQuery may access table, + /// which is not loaded yet during server startup, so we do it lazily + InterpreterSelectQuery(inner_blocks_query, *live_view_context, SelectQueryOptions().modify().analyze()); + auto table_id = getStorageID(); + extractDependentTable(inner_blocks_query, global_context, table_id.table_name, inner_subquery); + } + return inner_blocks_query->clone(); +} + bool StorageLiveView::getNewBlocks() { SipHash hash; @@ -458,6 +470,7 @@ void StorageLiveView::startup() void StorageLiveView::shutdown() { + global_context.removeDependency(select_table_id, getStorageID()); bool expected = false; if (!shutdown_called.compare_exchange_strong(expected, true)) return; diff --git a/dbms/src/Storages/LiveView/StorageLiveView.h b/dbms/src/Storages/LiveView/StorageLiveView.h index 77992b97e19..0af1a297391 100644 --- a/dbms/src/Storages/LiveView/StorageLiveView.h +++ b/dbms/src/Storages/LiveView/StorageLiveView.h @@ -53,7 +53,7 @@ public: { return StorageID("", getStorageID().table_name + "_blocks"); } - StoragePtr getParentStorage() const { return parent_storage; } + StoragePtr getParentStorage() const { return global_context.getTable(select_table_id); } NameAndTypePair getColumn(const String & column_name) const override; bool hasColumn(const String & column_name) const override; @@ -65,12 +65,7 @@ public: return inner_subquery->clone(); return nullptr; } - ASTPtr getInnerBlocksQuery() const - { - if (inner_blocks_query) - return inner_blocks_query->clone(); - return nullptr; - } + ASTPtr getInnerBlocksQuery(); /// It is passed inside the query and solved at its level. bool supportsSampling() const override { return true; } @@ -177,10 +172,9 @@ private: ASTPtr inner_blocks_query; /// query over the mergeable blocks to produce final result Context & global_context; std::unique_ptr live_view_context; - StoragePtr parent_storage; bool is_temporary = false; - /// Mutex to protect access to sample block + /// Mutex to protect access to sample block and inner_blocks_query mutable std::mutex sample_block_lock; mutable Block sample_block; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index d62f78f5cb8..146440b11c3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -101,6 +101,7 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; extern const int READONLY_SETTING; extern const int ABORTED; + extern const int UNEXPECTED_AST_STRUCTURE; } @@ -629,7 +630,7 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new { auto new_ttl_entry = create_ttl_entry(ast); if (!only_check) - column_ttl_entries_by_name.emplace(name, new_ttl_entry); + column_ttl_entries_by_name[name] = new_ttl_entry; } } } @@ -637,36 +638,35 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new if (new_ttl_table_ast) { std::vector update_move_ttl_entries; - ASTPtr update_ttl_table_ast = nullptr; - TTLEntry update_ttl_table_entry; + TTLEntry update_rows_ttl_entry; bool seen_delete_ttl = false; for (auto ttl_element_ptr : new_ttl_table_ast->children) { - ASTTTLElement & ttl_element = static_cast(*ttl_element_ptr); - if (ttl_element.destination_type == PartDestinationType::DELETE) + const auto * ttl_element = ttl_element_ptr->as(); + if (!ttl_element) + throw Exception("Unexpected AST element in TTL expression", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + + if (ttl_element->destination_type == PartDestinationType::DELETE) { if (seen_delete_ttl) { throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); } - auto new_ttl_table_entry = create_ttl_entry(ttl_element.children[0]); + auto new_rows_ttl_entry = create_ttl_entry(ttl_element->children[0]); if (!only_check) - { - update_ttl_table_ast = ttl_element.children[0]; - update_ttl_table_entry = new_ttl_table_entry; - } + update_rows_ttl_entry = new_rows_ttl_entry; seen_delete_ttl = true; } else { - auto new_ttl_entry = create_ttl_entry(ttl_element.children[0]); + auto new_ttl_entry = create_ttl_entry(ttl_element->children[0]); new_ttl_entry.entry_ast = ttl_element_ptr; - new_ttl_entry.destination_type = ttl_element.destination_type; - new_ttl_entry.destination_name = ttl_element.destination_name; + new_ttl_entry.destination_type = ttl_element->destination_type; + new_ttl_entry.destination_name = ttl_element->destination_name; if (!new_ttl_entry.getDestination(getStoragePolicy())) { String message; @@ -684,8 +684,8 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new if (!only_check) { - ttl_table_entry = update_ttl_table_entry; - ttl_table_ast = update_ttl_table_ast; + rows_ttl_entry = update_rows_ttl_entry; + ttl_table_ast = new_ttl_table_ast; auto move_ttl_entries_lock = std::lock_guard(move_ttl_entries_mutex); move_ttl_entries = update_move_ttl_entries; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 3c051829a61..ab5644749ee 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -576,8 +576,10 @@ public: bool hasSortingKey() const { return !sorting_key_columns.empty(); } bool hasPrimaryKey() const { return !primary_key_columns.empty(); } bool hasSkipIndices() const { return !skip_indices.empty(); } - bool hasTableTTL() const { return ttl_table_ast != nullptr; } + bool hasAnyColumnTTL() const { return !column_ttl_entries_by_name.empty(); } + bool hasAnyMoveTTL() const { return !move_ttl_entries.empty(); } + bool hasRowsTTL() const { return !rows_ttl_entry.isEmpty(); } /// Check that the part is not broken and calculate the checksums for it if they are not present. MutableDataPartPtr loadPartAndFixMetadata(const DiskPtr & disk, const String & relative_path); @@ -735,6 +737,8 @@ public: /// Checks if given part already belongs destination disk or volume for this rule. bool isPartInDestination(const StoragePolicyPtr & policy, const MergeTreeDataPart & part) const; + + bool isEmpty() const { return expression == nullptr; } }; std::optional selectTTLEntryForTTLInfos(const MergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; @@ -742,7 +746,7 @@ public: using TTLEntriesByName = std::unordered_map; TTLEntriesByName column_ttl_entries_by_name; - TTLEntry ttl_table_entry; + TTLEntry rows_ttl_entry; /// This mutex is required for background move operations which do not obtain global locks. mutable std::mutex move_ttl_entries_mutex; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 1c8e2e5621a..549345de8d1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -278,8 +278,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } - if (data.hasTableTTL()) - updateTTL(data.ttl_table_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); + if (data.hasRowsTTL()) + updateTTL(data.rows_ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); for (const auto & [name, ttl_entry] : data.column_ttl_entries_by_name) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true); diff --git a/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp b/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp index d308667a67b..a70dfc2d78c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -217,7 +217,7 @@ std::vector MergeTreeReadPool::fillPerPartInfo( per_part_sum_marks.push_back(sum_marks); - per_part_columns_lock.emplace_back(part.data_part->columns_lock); + per_part_columns_lock.emplace_back(part.data_part, part.data_part->columns_lock); auto [required_columns, required_pre_columns, should_reorder] = getReadTaskColumns(data, part.data_part, column_names, prewhere_info, check_columns); diff --git a/dbms/src/Storages/MergeTree/MergeTreeReadPool.h b/dbms/src/Storages/MergeTree/MergeTreeReadPool.h index 2e9cb76f0cd..4151b781d6e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReadPool.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -93,7 +94,7 @@ private: const size_t threads, const size_t sum_marks, std::vector per_part_sum_marks, RangesInDataParts & parts, const size_t min_marks_for_concurrent_read); - std::vector> per_part_columns_lock; + std::vector>> per_part_columns_lock; const MergeTreeData & data; Names column_names; bool do_not_steal_tasks; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 703659bb4ea..4ebb51f0b41 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -55,15 +55,6 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr ttl_table = formattedAST(data.ttl_table_ast); - std::ostringstream ttl_move_stream; - for (const auto & ttl_entry : data.move_ttl_entries) - { - if (ttl_move_stream.tellp() > 0) - ttl_move_stream << ", "; - ttl_move_stream << formattedAST(ttl_entry.entry_ast); - } - ttl_move = ttl_move_stream.str(); - skip_indices = data.getIndices().toString(); if (data.canUseAdaptiveGranularity()) index_granularity_bytes = data_settings->index_granularity_bytes; @@ -95,9 +86,6 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!ttl_table.empty()) out << "ttl: " << ttl_table << "\n"; - if (!ttl_move.empty()) - out << "move ttl: " << ttl_move << "\n"; - if (!skip_indices.empty()) out << "indices: " << skip_indices << "\n"; @@ -139,9 +127,6 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("ttl: ", in)) in >> ttl_table >> "\n"; - if (checkString("move ttl: ", in)) - in >> ttl_move >> "\n"; - if (checkString("indices: ", in)) in >> skip_indices >> "\n"; @@ -252,21 +237,6 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl ErrorCodes::METADATA_MISMATCH); } - if (ttl_move != from_zk.ttl_move) - { - if (allow_alter) - { - diff.ttl_move_changed = true; - diff.new_ttl_move = from_zk.ttl_move; - } - else - throw Exception( - "Existing table metadata in ZooKeeper differs in move TTL." - " Stored in ZooKeeper: " + from_zk.ttl_move + - ", local: " + ttl_move, - ErrorCodes::METADATA_MISMATCH); - } - if (skip_indices != from_zk.skip_indices) { if (allow_alter) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 23fc4f6a024..d8af3c2087a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -28,7 +28,6 @@ struct ReplicatedMergeTreeTableMetadata String skip_indices; String constraints; String ttl_table; - String ttl_move; UInt64 index_granularity_bytes; ReplicatedMergeTreeTableMetadata() = default; @@ -54,12 +53,9 @@ struct ReplicatedMergeTreeTableMetadata bool ttl_table_changed = false; String new_ttl_table; - bool ttl_move_changed = false; - String new_ttl_move; - bool empty() const { - return !sorting_key_changed && !skip_indices_changed && !ttl_table_changed && !constraints_changed && !ttl_move_changed; + return !sorting_key_changed && !skip_indices_changed && !ttl_table_changed && !constraints_changed; } }; diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index ce07cbad2ba..c19d743794c 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -49,6 +49,12 @@ #include +namespace +{ +static const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY = 1; +static const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS = 2; +} + namespace DB { @@ -63,6 +69,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int TOO_MANY_ROWS; + extern const int UNABLE_TO_SKIP_UNUSED_SHARDS; } namespace ActionLocks @@ -340,12 +347,15 @@ BlockInputStreams StorageDistributed::read( : ClusterProxy::SelectStreamFactory( header, processed_stage, QualifiedTableName{remote_database, remote_table}, scalars, has_virtual_shard_num_column, context.getExternalTables()); + UInt64 force = settings.force_optimize_skip_unused_shards; if (settings.optimize_skip_unused_shards) { + ClusterPtr smaller_cluster; + auto table_id = getStorageID(); + if (has_sharding_key) { - auto smaller_cluster = skipUnusedShards(cluster, query_info); - auto table_id = getStorageID(); + smaller_cluster = skipUnusedShards(cluster, query_info); if (smaller_cluster) { @@ -354,10 +364,27 @@ BlockInputStreams StorageDistributed::read( "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): " " " << makeFormattedListOfShards(cluster)); } - else + } + + if (!smaller_cluster) + { + LOG_DEBUG(log, "Reading from " << table_id.getNameForLogs() << + (has_sharding_key ? "" : "(no sharding key)") << ": " + "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - " + "the query will be sent to all shards of the cluster"); + + if (force) { - LOG_DEBUG(log, "Reading from " << table_id.getNameForLogs() << ": " - "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster"); + std::stringstream exception_message; + if (has_sharding_key) + exception_message << "No sharding key"; + else + exception_message << "Sharding key " << sharding_key_column_name << " is not used"; + + if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS) + throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS); + if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY && has_sharding_key) + throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS); } } } @@ -548,11 +575,6 @@ void StorageDistributed::ClusterNodeData::shutdownAndDropAllData() /// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr` ClusterPtr StorageDistributed::skipUnusedShards(ClusterPtr cluster, const SelectQueryInfo & query_info) { - if (!has_sharding_key) - { - throw Exception("Internal error: cannot determine shards of a distributed table if no sharding expression is supplied", ErrorCodes::LOGICAL_ERROR); - } - const auto & select = query_info.query->as(); if (!select.prewhere() && !select.where()) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index f5279ebef92..d8b25627a7e 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -652,7 +652,7 @@ bool StorageMergeTree::merge( { /// Force filter by TTL in 'OPTIMIZE ... FINAL' query to remove expired values from old parts /// without TTL infos or with outdated TTL infos, e.g. after 'ALTER ... MODIFY TTL' query. - bool force_ttl = (final && (hasTableTTL() || hasAnyColumnTTL())); + bool force_ttl = (final && (hasRowsTTL() || hasAnyColumnTTL())); new_part = merger_mutator.mergePartsToTemporaryPart( future_part, *merge_entry, table_lock_holder, time(nullptr), diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 765b43ed90e..96e5d3cf2ed 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -33,6 +33,8 @@ public: std::string getName() const override { return merging_params.getModeName() + "MergeTree"; } + bool supportsParallelInsert() const override { return true; } + bool supportsIndexForIn() const override { return true; } Pipes readWithProcessors( diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 2fdd7daa684..db113624f68 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3132,7 +3132,7 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p return false; }; - bool force_ttl = (final && (hasTableTTL() || hasAnyColumnTTL())); + bool force_ttl = (final && (hasRowsTTL() || hasAnyColumnTTL())); const auto storage_settings_ptr = getSettings(); if (!partition && final) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 0fff99b00f3..aadbdd1b558 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -84,6 +84,7 @@ public: std::string getName() const override { return "Replicated" + merging_params.getModeName() + "MergeTree"; } + bool supportsParallelInsert() const override { return true; } bool supportsReplication() const override { return true; } bool supportsDeduplication() const override { return true; } diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 550ead28996..65c4f19b7cb 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -61,7 +61,6 @@ const char * auto_config_build[] "USE_HYPERSCAN", "@USE_HYPERSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", "USE_POCO_REDIS", "@USE_POCO_REDIS@", - "USE_REPLXX", "@USE_REPLXX@", nullptr, nullptr }; diff --git a/dbms/tests/integration/test_multiple_disks/test.py b/dbms/tests/integration/test_multiple_disks/test.py index d2d14e88b9a..9ccac05b9f4 100644 --- a/dbms/tests/integration/test_multiple_disks/test.py +++ b/dbms/tests/integration/test_multiple_disks/test.py @@ -360,6 +360,7 @@ def test_max_data_part_size(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("mt_with_overflow","MergeTree()"), ("replicated_mt_with_overflow","ReplicatedMergeTree('/clickhouse/replicated_mt_with_overflow', '1')",), @@ -454,6 +455,7 @@ def test_background_move(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("stopped_moving_mt","MergeTree()"), ("stopped_moving_replicated_mt","ReplicatedMergeTree('/clickhouse/stopped_moving_replicated_mt', '1')",), @@ -720,6 +722,7 @@ def produce_alter_move(node, name): pass +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("concurrently_altering_mt","MergeTree()"), ("concurrently_altering_replicated_mt","ReplicatedMergeTree('/clickhouse/concurrently_altering_replicated_mt', '1')",), @@ -773,6 +776,7 @@ def test_concurrent_alter_move(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("concurrently_dropping_mt","MergeTree()"), ("concurrently_dropping_replicated_mt","ReplicatedMergeTree('/clickhouse/concurrently_dropping_replicated_mt', '1')",), @@ -901,6 +905,8 @@ def test_mutate_to_another_disk(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("alter_modifying_mt","MergeTree()"), ("replicated_alter_modifying_mt","ReplicatedMergeTree('/clickhouse/replicated_alter_modifying_mt', '1')",), diff --git a/dbms/tests/integration/test_parts_delete_zookeeper/test.py b/dbms/tests/integration/test_parts_delete_zookeeper/test.py index 1c23a4a658a..7e4a8d36741 100644 --- a/dbms/tests/integration/test_parts_delete_zookeeper/test.py +++ b/dbms/tests/integration/test_parts_delete_zookeeper/test.py @@ -40,13 +40,13 @@ def test_merge_doesnt_work_without_zookeeper(start_cluster): assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "2\n" node1.query("OPTIMIZE TABLE test_table FINAL") - assert node1.query("SELECT count(*) from system.parts") == "3\n" + assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "3\n" - assert_eq_with_retry(node1, "SELECT count(*) from system.parts", "1") + assert_eq_with_retry(node1, "SELECT count(*) from system.parts where table = 'test_table' and active = 1", "1") node1.query("TRUNCATE TABLE test_table") - assert node1.query("SELECT count(*) from system.parts") == "0\n" + assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "0\n" node1.query("INSERT INTO test_table VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)") node1.query("INSERT INTO test_table VALUES ('2018-10-01', 4), ('2018-10-02', 5), ('2018-10-03', 6)") @@ -56,6 +56,6 @@ def test_merge_doesnt_work_without_zookeeper(start_cluster): node1.query("OPTIMIZE TABLE test_table FINAL") pm.drop_instance_zk_connections(node1) time.sleep(10) # > old_parts_lifetime - assert node1.query("SELECT count(*) from system.parts") == "3\n" + assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "3\n" - assert_eq_with_retry(node1, "SELECT count(*) from system.parts", "1") + assert_eq_with_retry(node1, "SELECT count(*) from system.parts where table = 'test_table' and active = 1", "1") diff --git a/dbms/tests/integration/test_prometheus_endpoint/test.py b/dbms/tests/integration/test_prometheus_endpoint/test.py index dcd31621cb5..25d83cfb47c 100644 --- a/dbms/tests/integration/test_prometheus_endpoint/test.py +++ b/dbms/tests/integration/test_prometheus_endpoint/test.py @@ -3,6 +3,7 @@ import pytest import re import requests +import time from helpers.cluster import ClickHouseCluster @@ -24,7 +25,7 @@ def parse_response_line(line): "# HELP", "# TYPE", ] - assert any(line.startswith(prefix) for prefix in allowed_prefixes), msg + assert any(line.startswith(prefix) for prefix in allowed_prefixes) if line.startswith("#"): return {} @@ -34,12 +35,23 @@ def parse_response_line(line): return {name: int(val)} -def get_and_check_metrics(): - response = requests.get("http://{host}:{port}/metrics".format( - host=node.ip_address, port=8001), allow_redirects=False) +def get_and_check_metrics(retries): + while True: + try: + response = requests.get("http://{host}:{port}/metrics".format( + host=node.ip_address, port=8001), allow_redirects=False) - if response.status_code != 200: - response.raise_for_status() + if response.status_code != 200: + response.raise_for_status() + + break + except: + if retries >= 0: + retries -= 1 + time.sleep(0.5) + continue + else: + raise assert response.headers['content-type'].startswith('text/plain') @@ -55,13 +67,13 @@ def get_and_check_metrics(): def test_prometheus_endpoint(start_cluster): - metrics_dict = get_and_check_metrics() + metrics_dict = get_and_check_metrics(10) assert metrics_dict['ClickHouseProfileEvents_Query'] >= 0 prev_query_count = metrics_dict['ClickHouseProfileEvents_Query'] - resp = node.query("SELECT 1") - resp = node.query("SELECT 2") - resp = node.query("SELECT 3") + node.query("SELECT 1") + node.query("SELECT 2") + node.query("SELECT 3") - metrics_dict = get_and_check_metrics() + metrics_dict = get_and_check_metrics(10) assert metrics_dict['ClickHouseProfileEvents_Query'] >= prev_query_count + 3 diff --git a/dbms/tests/integration/test_server_initialization/test.py b/dbms/tests/integration/test_server_initialization/test.py index 64ed3181118..22c8b13d392 100644 --- a/dbms/tests/integration/test_server_initialization/test.py +++ b/dbms/tests/integration/test_server_initialization/test.py @@ -1,3 +1,4 @@ +import time import pytest from helpers.cluster import ClickHouseCluster @@ -6,7 +7,7 @@ from helpers.cluster import ClickHouseCluster def started_cluster(): try: cluster = ClickHouseCluster(__file__) - instance = cluster.add_instance('dummy', clickhouse_path_dir='clickhouse_path') + instance = cluster.add_instance('dummy', clickhouse_path_dir='clickhouse_path', stay_alive=True) cluster.start() cluster_fail = ClickHouseCluster(__file__, name='fail') @@ -34,3 +35,14 @@ def test_partially_dropped_tables(started_cluster): "./var/lib/clickhouse/metadata/default/sophisticated_default.sql\n" assert instance.query("SELECT n FROM should_be_restored") == "1\n2\n3\n" assert instance.query("SELECT count() FROM system.tables WHERE name='should_be_dropped'") == "0\n" + + +def test_live_view_dependency(started_cluster): + instance = started_cluster.instances['dummy'] + instance.query("CREATE DATABASE a_load_first") + instance.query("CREATE DATABASE b_load_second") + instance.query("CREATE TABLE b_load_second.mt (a Int32) Engine=MergeTree order by tuple()") + instance.query("CREATE LIVE VIEW a_load_first.lv AS SELECT sum(a) FROM b_load_second.mt", settings={'allow_experimental_live_view': 1}) + instance.restart_clickhouse() + time.sleep(5) + instance.query("SELECT 1") diff --git a/dbms/tests/integration/test_settings_constraints_distributed/__init__.py b/dbms/tests/integration/test_settings_constraints_distributed/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml b/dbms/tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml new file mode 100644 index 00000000000..5d8df3ac56d --- /dev/null +++ b/dbms/tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + distributed + + + node2 + 9000 + distributed + + + + + diff --git a/dbms/tests/integration/test_settings_constraints_distributed/configs/users_on_cluster.xml b/dbms/tests/integration/test_settings_constraints_distributed/configs/users_on_cluster.xml new file mode 100644 index 00000000000..e1a1122d9d6 --- /dev/null +++ b/dbms/tests/integration/test_settings_constraints_distributed/configs/users_on_cluster.xml @@ -0,0 +1,41 @@ + + + + 10000000000 + 0 + random + + + 1000000 + 0 + random + 2 + + + 1 + 1000000 + + + + + + + + default + default + + + + distributed_profile + default + + ::/0 + + + + + + + + + diff --git a/dbms/tests/integration/test_settings_constraints_distributed/configs/users_on_distributed.xml b/dbms/tests/integration/test_settings_constraints_distributed/configs/users_on_distributed.xml new file mode 100644 index 00000000000..15db7b92b15 --- /dev/null +++ b/dbms/tests/integration/test_settings_constraints_distributed/configs/users_on_distributed.xml @@ -0,0 +1,34 @@ + + + + 1000000 + 0 + random + + + 2000000000 + 0 + random + + + + + + default + default + + + + remote_profile + default + + ::/0 + + + + + + + + + diff --git a/dbms/tests/integration/test_settings_constraints_distributed/test.py b/dbms/tests/integration/test_settings_constraints_distributed/test.py new file mode 100644 index 00000000000..7b73af2d5fe --- /dev/null +++ b/dbms/tests/integration/test_settings_constraints_distributed/test.py @@ -0,0 +1,42 @@ +import time + +import pytest + +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], user_configs=['configs/users_on_cluster.xml']) +node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], user_configs=['configs/users_on_cluster.xml']) + +distributed = cluster.add_instance('distributed', main_configs=['configs/remote_servers.xml'], user_configs=['configs/users_on_distributed.xml']) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + for node in [node1, node2]: + node.query("CREATE TABLE sometable(date Date, id UInt32, value Int32) ENGINE = MergeTree() ORDER BY id;") + node.query("INSERT INTO sometable VALUES (toDate('2020-01-20'), 1, 1)") + + distributed.query("CREATE TABLE proxy (date Date, id UInt32, value Int32) ENGINE = Distributed(test_cluster, default, sometable);") + + yield cluster + + finally: + cluster.shutdown() + +def test_settings_under_remote(started_cluster): + assert distributed.query("SELECT COUNT() FROM proxy") == '1\n' + + with pytest.raises(QueryRuntimeException): + distributed.query("SELECT COUNT() FROM proxy", user='remote') + + assert distributed.query("SELECT COUNT() FROM proxy", settings={"max_memory_usage": 1000000}, user='remote') == '1\n' + + with pytest.raises(QueryRuntimeException): + distributed.query("SELECT COUNT() FROM proxy", settings={"max_memory_usage": 1000001}, user='remote') diff --git a/dbms/tests/integration/test_text_log_level/__init__.py b/dbms/tests/integration/test_text_log_level/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_text_log_level/configs/config.d/text_log.xml b/dbms/tests/integration/test_text_log_level/configs/config.d/text_log.xml new file mode 100644 index 00000000000..3712c5851ae --- /dev/null +++ b/dbms/tests/integration/test_text_log_level/configs/config.d/text_log.xml @@ -0,0 +1,5 @@ + + + information + + diff --git a/dbms/tests/integration/test_text_log_level/test.py b/dbms/tests/integration/test_text_log_level/test.py new file mode 100644 index 00000000000..d7cf72fd9ea --- /dev/null +++ b/dbms/tests/integration/test_text_log_level/test.py @@ -0,0 +1,32 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance('node', config_dir='configs') + +@pytest.fixture(scope='module') +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + +def test_basic(start_cluster): + with pytest.raises(QueryRuntimeException): + # generates log with "Error" level + node.query('SELECT * FROM no_such_table') + + node.query('SYSTEM FLUSH LOGS') + + assert int(node.query("SELECT count() FROM system.text_log WHERE level = 'Trace'")) == 0 + assert int(node.query("SELECT count() FROM system.text_log WHERE level = 'Debug'")) == 0 + assert int(node.query("SELECT count() FROM system.text_log WHERE level = 'Information'")) >= 1 + assert int(node.query("SELECT count() FROM system.text_log WHERE level = 'Error'")) >= 1 diff --git a/dbms/tests/integration/test_tmp_policy/__init__.py b/dbms/tests/integration/test_tmp_policy/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_tmp_policy/configs/config.d/storage_configuration.xml b/dbms/tests/integration/test_tmp_policy/configs/config.d/storage_configuration.xml new file mode 100644 index 00000000000..f8574a38208 --- /dev/null +++ b/dbms/tests/integration/test_tmp_policy/configs/config.d/storage_configuration.xml @@ -0,0 +1,25 @@ + + + + + /disk1/ + + + /disk2/ + + + + + + +
+ disk1 + disk2 +
+
+
+
+
+ + tmp +
diff --git a/dbms/tests/integration/test_tmp_policy/test.py b/dbms/tests/integration/test_tmp_policy/test.py new file mode 100644 index 00000000000..5c5900cc9dc --- /dev/null +++ b/dbms/tests/integration/test_tmp_policy/test.py @@ -0,0 +1,34 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance('node', + config_dir='configs', + tmpfs=['/disk1:size=100M', '/disk2:size=100M']) + +@pytest.fixture(scope='module') +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_different_versions(start_cluster): + query = 'SELECT count(ignore(*)) FROM (SELECT * FROM system.numbers LIMIT 1e7) GROUP BY number' + settings = { + 'max_bytes_before_external_group_by': 1<<20, + 'max_bytes_before_external_sort': 1<<20, + } + + assert node.contains_in_log('Setting up /disk1/ to store temporary data in it') + assert node.contains_in_log('Setting up /disk2/ to store temporary data in it') + + node.query(query, settings=settings) + assert node.contains_in_log('Writing part of aggregation data into temporary file /disk1/') + assert node.contains_in_log('Writing part of aggregation data into temporary file /disk2/') diff --git a/dbms/tests/integration/test_ttl_move/test.py b/dbms/tests/integration/test_ttl_move/test.py index 7fabdd85230..b498178e4d7 100644 --- a/dbms/tests/integration/test_ttl_move/test.py +++ b/dbms/tests/integration/test_ttl_move/test.py @@ -50,6 +50,7 @@ def get_used_disks_for_table(node, table_name): return node.query("select disk_name from system.parts where table == '{}' and active=1 order by modification_time".format(table_name)).strip().split('\n') +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,alter", [ ("mt_test_rule_with_invalid_destination","MergeTree()",0), ("replicated_mt_test_rule_with_invalid_destination","ReplicatedMergeTree('/clickhouse/replicated_test_rule_with_invalid_destination', '1')",0), @@ -109,6 +110,7 @@ def test_rule_with_invalid_destination(started_cluster, name, engine, alter): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_inserts_to_disk_do_not_work","MergeTree()",0), ("replicated_mt_test_inserts_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_disk_do_not_work', '1')",0), @@ -141,6 +143,7 @@ def test_inserts_to_disk_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_moves_to_disk_do_not_work","MergeTree()",0), ("replicated_mt_test_moves_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_do_not_work', '1')",0), @@ -187,6 +190,7 @@ def test_moves_to_disk_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("mt_test_moves_to_volume_work","MergeTree()"), ("replicated_mt_test_moves_to_volume_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_volume_work', '1')"), @@ -233,6 +237,7 @@ def test_moves_to_volume_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_inserts_to_volume_do_not_work","MergeTree()",0), ("replicated_mt_test_inserts_to_volume_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_volume_do_not_work', '1')",0), @@ -271,6 +276,7 @@ def test_inserts_to_volume_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("mt_test_moves_to_disk_eventually_work","MergeTree()"), ("replicated_mt_test_moves_to_disk_eventually_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_eventually_work', '1')"), @@ -326,6 +332,7 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_merges_to_disk_do_not_work","MergeTree()",0), ("replicated_mt_test_merges_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_disk_do_not_work', '1')",0), @@ -383,6 +390,7 @@ def test_merges_to_disk_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("mt_test_merges_with_full_disk_work","MergeTree()"), ("replicated_mt_test_merges_with_full_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_with_full_disk_work', '1')"), @@ -449,6 +457,7 @@ def test_merges_with_full_disk_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_moves_after_merges_do_not_work","MergeTree()",0), ("replicated_mt_test_moves_after_merges_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_do_not_work', '1')",0), @@ -501,6 +510,7 @@ def test_moves_after_merges_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive,bar", [ ("mt_test_moves_after_alter_do_not_work","MergeTree()",0,"DELETE"), ("replicated_mt_test_moves_after_alter_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_alter_do_not_work', '1')",0,"DELETE"), @@ -544,6 +554,7 @@ def test_ttls_do_not_work_after_alter(started_cluster, name, engine, positive, b node1.query("DROP TABLE IF EXISTS {}".format(name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_alter_multiple_ttls_positive", "MergeTree()", True), ("mt_replicated_test_alter_multiple_ttls_positive", "ReplicatedMergeTree('/clickhouse/replicated_test_alter_multiple_ttls_positive', '1')", True), @@ -626,6 +637,7 @@ limitations under the License.""" node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("concurrently_altering_ttl_mt","MergeTree()"), ("concurrently_altering_ttl_replicated_mt","ReplicatedMergeTree('/clickhouse/concurrently_altering_ttl_replicated_mt', '1')",), @@ -716,6 +728,7 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) +@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,positive", [ ("test_double_move_while_select_negative", 0), ("test_double_move_while_select_positive", 1), diff --git a/dbms/tests/performance/math.xml b/dbms/tests/performance/math.xml index 5f4f302a0e8..f4d31713a08 100644 --- a/dbms/tests/performance/math.xml +++ b/dbms/tests/performance/math.xml @@ -1,14 +1,18 @@ - once + + loop + + 5 + 10000 + - 1000 - 10000 + 50 + 60000 - func @@ -37,7 +41,7 @@ - SELECT count() FROM system.numbers WHERE NOT ignore({func}(toFloat64(number))) - SELECT count() FROM system.numbers WHERE NOT ignore({func}(toFloat32(number))) - SELECT count() FROM system.numbers WHERE NOT ignore({func}(number)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(toFloat64(number))) + SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(toFloat32(number))) + SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(number)) diff --git a/dbms/tests/performance/parallel_insert.xml b/dbms/tests/performance/parallel_insert.xml new file mode 100644 index 00000000000..44a2964f881 --- /dev/null +++ b/dbms/tests/performance/parallel_insert.xml @@ -0,0 +1,24 @@ + + loop + + + + 2 + + + + + + + + + default.hits_10m_single + + + CREATE TABLE hits2 AS hits_10m_single + set max_insert_threads=8 + + INSERT INTO hits2 SELECT * FROM hits_10m_single + + DROP TABLE IF EXISTS hits2 + diff --git a/dbms/tests/performance/parse_engine_file.xml b/dbms/tests/performance/parse_engine_file.xml index 080acbd53f2..8a0054bdd7f 100644 --- a/dbms/tests/performance/parse_engine_file.xml +++ b/dbms/tests/performance/parse_engine_file.xml @@ -34,6 +34,7 @@ TSKV RowBinary Native + Avro diff --git a/dbms/tests/performance/select_format.xml b/dbms/tests/performance/select_format.xml index 621247fee1e..189b35a2700 100644 --- a/dbms/tests/performance/select_format.xml +++ b/dbms/tests/performance/select_format.xml @@ -44,6 +44,7 @@ XML ODBCDriver2 MySQLWire + Avro diff --git a/dbms/tests/queries/0_stateless/00265_content_type.sh b/dbms/tests/queries/0_stateless/00265_content_type.sh deleted file mode 100755 index feddb46a6a4..00000000000 --- a/dbms/tests/queries/0_stateless/00265_content_type.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&default_format=JSONCompact" --data-binary @- <<< "SELECT 1" 2>&1 | grep '< Content-Type'; -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT JSON" 2>&1 | grep '< Content-Type'; -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1" 2>&1 | grep '< Content-Type'; -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT TabSeparated" 2>&1 | grep '< Content-Type'; -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT Vertical" 2>&1 | grep '< Content-Type'; -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT Native" 2>&1 | grep '< Content-Type'; -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT RowBinary" 2>&1 | grep '< Content-Type'; diff --git a/dbms/tests/queries/0_stateless/00265_content_type.reference b/dbms/tests/queries/0_stateless/00265_content_type_and_format.reference similarity index 59% rename from dbms/tests/queries/0_stateless/00265_content_type.reference rename to dbms/tests/queries/0_stateless/00265_content_type_and_format.reference index 0693d1118da..dbe9ebc0f58 100644 --- a/dbms/tests/queries/0_stateless/00265_content_type.reference +++ b/dbms/tests/queries/0_stateless/00265_content_type_and_format.reference @@ -1,7 +1,14 @@ -< Content-Type: application/json; charset=UTF-8 -< Content-Type: application/json; charset=UTF-8 -< Content-Type: text/tab-separated-values; charset=UTF-8 -< Content-Type: text/tab-separated-values; charset=UTF-8 -< Content-Type: text/plain; charset=UTF-8 -< Content-Type: application/octet-stream -< Content-Type: application/octet-stream +< Content-Type: application/json; charset=UTF-8 +< X-ClickHouse-Format: JSONCompact +< Content-Type: application/json; charset=UTF-8 +< X-ClickHouse-Format: JSON +< Content-Type: text/tab-separated-values; charset=UTF-8 +< X-ClickHouse-Format: TabSeparated +< Content-Type: text/tab-separated-values; charset=UTF-8 +< X-ClickHouse-Format: TabSeparated +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Format: Vertical +< Content-Type: application/octet-stream +< X-ClickHouse-Format: Native +< Content-Type: application/octet-stream +< X-ClickHouse-Format: RowBinary diff --git a/dbms/tests/queries/0_stateless/00265_content_type_and_format.sh b/dbms/tests/queries/0_stateless/00265_content_type_and_format.sh new file mode 100755 index 00000000000..2a36a17c6a1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00265_content_type_and_format.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&default_format=JSONCompact" --data-binary @- <<< "SELECT 1" 2>&1 | grep -e '< Content-Type' -e '< X-ClickHouse-Format' | sed 's/\r$//' | sort; +${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT JSON" 2>&1 | grep -e '< Content-Type' -e '< X-ClickHouse-Format' | sed 's/\r$//' | sort; +${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1" 2>&1 | grep -e '< Content-Type' -e '< X-ClickHouse-Format' | sed 's/\r$//' | sort; +${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT TabSeparated" 2>&1 | grep -e '< Content-Type' -e '< X-ClickHouse-Format' | sed 's/\r$//' | sort; +${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT Vertical" 2>&1 | grep -e '< Content-Type' -e '< X-ClickHouse-Format' | sed 's/\r$//' | sort; +${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT Native" 2>&1 | grep -e '< Content-Type' -e '< X-ClickHouse-Format' | sed 's/\r$//' | sort; +${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT RowBinary" 2>&1 | grep -e '< Content-Type' -e '< X-ClickHouse-Format' | sed 's/\r$//' | sort; diff --git a/dbms/tests/queries/0_stateless/00501_http_head.sh b/dbms/tests/queries/0_stateless/00501_http_head.sh index e235da3c192..7251fc2cf21 100755 --- a/dbms/tests/queries/0_stateless/00501_http_head.sh +++ b/dbms/tests/queries/0_stateless/00501_http_head.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh ( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=SELECT%201"; - ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "X-ClickHouse-Query-Id:" + ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "X-ClickHouse-Query-Id:" | grep -v "X-ClickHouse-Format:" if [[ `${CLICKHOUSE_CURL} -sS -X POST -I "${CLICKHOUSE_URL}&query=SELECT+1" | grep -c '411 Length Required'` -ne 1 ]]; then echo FAIL diff --git a/dbms/tests/queries/0_stateless/01023_materialized_view_query_context.sql b/dbms/tests/queries/0_stateless/01023_materialized_view_query_context.sql index 1a7f8c15678..d68d6df6ea3 100644 --- a/dbms/tests/queries/0_stateless/01023_materialized_view_query_context.sql +++ b/dbms/tests/queries/0_stateless/01023_materialized_view_query_context.sql @@ -1,5 +1,6 @@ -- Create dictionary, since dictGet*() uses DB::Context in executeImpl() -- (To cover scope of the Context in DB::PushingToViewsBlockOutputStream::process) +DROP TABLE IF EXISTS mv; DROP DATABASE IF EXISTS dict_in_01023; CREATE DATABASE dict_in_01023; diff --git a/dbms/tests/queries/0_stateless/01039_test_setting_parse.reference b/dbms/tests/queries/0_stateless/01039_test_setting_parse.reference index 30237035c2c..49233946390 100644 --- a/dbms/tests/queries/0_stateless/01039_test_setting_parse.reference +++ b/dbms/tests/queries/0_stateless/01039_test_setting_parse.reference @@ -1,2 +1,10 @@ -10000000001 -10000000001 +1000000000 +3221225472 +1567000 +125952 +1567000 +125952 +12000000 +32505856 +1000000000000 +1099511627776 diff --git a/dbms/tests/queries/0_stateless/01039_test_setting_parse.sql b/dbms/tests/queries/0_stateless/01039_test_setting_parse.sql index 494e43b001f..6a4eadf6a40 100644 --- a/dbms/tests/queries/0_stateless/01039_test_setting_parse.sql +++ b/dbms/tests/queries/0_stateless/01039_test_setting_parse.sql @@ -1,7 +1,20 @@ -SET max_memory_usage = 10000000001; - +SET max_memory_usage = '1G'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; - -SET max_memory_usage = '1G'; -- { serverError 27 } - +SET max_memory_usage = '3Gi'; +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; +SET max_memory_usage = '1567k'; +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; +SET max_memory_usage = '123ki'; +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; +SET max_memory_usage = '1567K'; +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; +SET max_memory_usage = '123Ki'; +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; +SET max_memory_usage = '12M'; +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; +SET max_memory_usage = '31Mi'; +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; +SET max_memory_usage = '1T'; +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; +SET max_memory_usage = '1Ti'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; diff --git a/dbms/tests/queries/0_stateless/01060_avro.reference b/dbms/tests/queries/0_stateless/01060_avro.reference new file mode 100644 index 00000000000..21fcc53f081 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01060_avro.reference @@ -0,0 +1,36 @@ +=== input += primitive +1,1,2,3.4,5.6,"b1","s1" +0,-1,9223372036854775807,3.00004,0.00001,"","" +1,2,"s1" +0,9223372036854775807,"" +"s1",2,1 +"",9223372036854775807,0 +"s1" +"" += complex +"A","t","['s1','s2']","[['a1'],['a2']]","s1",\N,"79cd909892d7e7ade1987cc7422628ba" +"C","f","[]","[]",\N,123,"79cd909892d7e7ade1987cc7422628ba" +"79cd909892d7e7ade1987cc7422628ba" +"79cd909892d7e7ade1987cc7422628ba" += logical_types +"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000" +18250,1578641516227,1578641516227000 += compression +1000 +1000 += other +0 +1000 +not found +=== output += primitive +1,1,2,3.4,5.6,"b1","s1" += complex +"A","t","['s1','s2']","[['a1'],['a2']]","s1",\N,"79cd909892d7e7ade1987cc7422628ba" += logical_types +"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000" += other +0 +1000 +147 diff --git a/dbms/tests/queries/0_stateless/01060_avro.sh b/dbms/tests/queries/0_stateless/01060_avro.sh new file mode 100755 index 00000000000..b57a7ad7a85 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01060_avro.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CUR_DIR/../shell_config.sh + +DATA_DIR=$CUR_DIR/data_avro + +# input +echo === input +echo = primitive + +cat $DATA_DIR/primitive.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a_bool UInt8, b_int Int32, c_long Int64, d_float Float32, e_double Float64, f_bytes String, g_string String' -q 'select * from table' +cat $DATA_DIR/primitive.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a_bool UInt8, c_long Int64, g_string String' -q 'select * from table' +cat $DATA_DIR/primitive.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'g_string String, c_long Int64, a_bool UInt8' -q 'select * from table' +cat $DATA_DIR/primitive.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'g_string String' -q 'select * from table' + +echo = complex +cat $DATA_DIR/complex.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a_enum_to_string String, b_enum_to_enum Enum('t' = 1, 'f' = 0), c_array_string Array(String), d_array_array_string Array(Array(String)), e_union_null_string Nullable(String), f_union_long_null Nullable(Int64), g_fixed FixedString(32)" -q 'select * from table' +cat $DATA_DIR/complex.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "g_fixed FixedString(32)" -q 'select * from table' + +echo = logical_types +cat $DATA_DIR/logical_types.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a_date Date, b_timestamp_millis DateTime64(3, 'UTC'), c_timestamp_micros DateTime64(6, 'UTC')" -q 'select * from table' +cat $DATA_DIR/logical_types.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a_date Int32, b_timestamp_millis Int64, c_timestamp_micros Int64' -q 'select * from table' + + + +echo = compression +cat $DATA_DIR/simple.null.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int64' -q 'select count() from table' +cat $DATA_DIR/simple.deflate.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int64' -q 'select count() from table' + +#snappy is optional +#cat $DATA_DIR/simple.snappy.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int64' -q 'select count() from table' + +echo = other +#no data +cat $DATA_DIR/empty.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int64' -q 'select count() from table' +# type mismatch +cat $DATA_DIR/simple.null.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int32' -q 'select count() from table' +# field not found +cat $DATA_DIR/simple.null.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'b Int64' -q 'select count() from table' 2>&1 | grep -i 'not found' -o + + + + + + +# output +echo === output + +echo = primitive +S1="a_bool UInt8, b_int Int32, c_long Int64, d_float Float32, e_double Float64, f_bytes String, g_string String" +echo '1,1,2,3.4,5.6,"b1","s1"' | ${CLICKHOUSE_LOCAL} --input-format CSV -S "$S1" -q "select * from table format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S1" -q 'select * from table' + +echo = complex +S2="a_enum_to_string String, b_enum_to_enum Enum('t' = 1, 'f' = 0), c_array_string Array(String), d_array_array_string Array(Array(String)), e_union_null_string Nullable(String), f_union_long_null Nullable(Int64), g_fixed FixedString(32)" +echo "\"A\",\"t\",\"['s1','s2']\",\"[['a1'],['a2']]\",\"s1\",\N,\"79cd909892d7e7ade1987cc7422628ba\"" | ${CLICKHOUSE_LOCAL} --input-format CSV -S "$S2" -q "select * from table format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S2" -q 'select * from table' + +echo = logical_types +S3="a_date Date, b_timestamp_millis DateTime64(3, 'UTC'), c_timestamp_micros DateTime64(6, 'UTC')" +echo '"2019-12-20","2020-01-10 07:31:56.227","2020-01-10 07:31:56.227000"' | ${CLICKHOUSE_LOCAL} --input-format CSV -S "$S3" -q "select * from table format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S3" -q 'select * from table' + +echo = other +S4="a Int64" +${CLICKHOUSE_LOCAL} -q "select toInt64(number) as a from numbers(0) format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S4" -q 'select count() from table' +${CLICKHOUSE_LOCAL} -q "select toInt64(number) as a from numbers(1000) format Avro" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "$S4" -q 'select count() from table' + +# type supported via conversion +${CLICKHOUSE_LOCAL} -q "select toInt16(123) as a format Avro" | wc -c \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql b/dbms/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql index 6c8f6c637fc..730263a2b12 100644 --- a/dbms/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql +++ b/dbms/tests/queries/0_stateless/01060_shutdown_table_after_detach.sql @@ -1,4 +1,5 @@ -CREATE TABLE IF NOT EXISTS test Engine = MergeTree ORDER BY number AS SELECT number, toString(rand()) x from numbers(10000000); +DROP TABLE IF EXISTS test; +CREATE TABLE test Engine = MergeTree ORDER BY number AS SELECT number, toString(rand()) x from numbers(10000000); SELECT count() FROM test; diff --git a/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table.reference b/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table.reference new file mode 100644 index 00000000000..a07a1e62f3a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table.reference @@ -0,0 +1,8 @@ +1 +1 +2 +3 +1 0 +1 0 +2 0 +3 0 diff --git a/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table.sql b/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table.sql new file mode 100644 index 00000000000..6be401597f6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS mv; +DROP TABLE IF EXISTS mv_source; +DROP TABLE IF EXISTS mv_target; + +CREATE TABLE mv_source (`a` UInt64) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE mv_target (`a` UInt64) ENGINE = MergeTree ORDER BY tuple(); + +CREATE MATERIALIZED VIEW mv TO mv_target AS SELECT * FROM mv_source; + +INSERT INTO mv_source VALUES (1); + +ALTER TABLE mv_target ADD COLUMN b UInt8; +INSERT INTO mv_source VALUES (1),(2),(3); + +SELECT * FROM mv ORDER BY a; +SELECT * FROM mv_target ORDER BY a; diff --git a/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table_with_default_expression.reference b/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table_with_default_expression.reference new file mode 100644 index 00000000000..97a02d3b487 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table_with_default_expression.reference @@ -0,0 +1,8 @@ +1 +1 +2 +3 +1 2 +1 2 +2 3 +3 4 diff --git a/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table_with_default_expression.sql b/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table_with_default_expression.sql new file mode 100644 index 00000000000..dcfba76f9c2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01069_materialized_view_alter_target_table_with_default_expression.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS mv; +DROP TABLE IF EXISTS mv_source; +DROP TABLE IF EXISTS mv_target; + +CREATE TABLE mv_source (`a` UInt64) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE mv_target (`a` UInt64) ENGINE = MergeTree ORDER BY tuple(); + +CREATE MATERIALIZED VIEW mv TO mv_target AS SELECT * FROM mv_source; + +INSERT INTO mv_source VALUES (1); + +ALTER TABLE mv_target ADD COLUMN b UInt8 DEFAULT a + 1; +INSERT INTO mv_source VALUES (1),(2),(3); + +SELECT * FROM mv ORDER BY a; +SELECT * FROM mv_target ORDER BY a; diff --git a/dbms/tests/queries/0_stateless/01070_alter_with_ttl.reference b/dbms/tests/queries/0_stateless/01070_alter_with_ttl.reference new file mode 100644 index 00000000000..308d004ebf7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01070_alter_with_ttl.reference @@ -0,0 +1,2 @@ +CREATE TABLE default.alter_ttl (`i` Int, `s` String TTL toDate(\'2020-01-01\')) ENGINE = MergeTree ORDER BY i TTL toDate(\'2020-05-05\') SETTINGS index_granularity = 8192 +CREATE TABLE default.alter_ttl (`d` Date, `s` String TTL d + toIntervalDay(1)) ENGINE = MergeTree ORDER BY d TTL d + toIntervalMonth(1) SETTINGS index_granularity = 8192 diff --git a/dbms/tests/queries/0_stateless/01070_alter_with_ttl.sql b/dbms/tests/queries/0_stateless/01070_alter_with_ttl.sql new file mode 100644 index 00000000000..3adc3ccd6ae --- /dev/null +++ b/dbms/tests/queries/0_stateless/01070_alter_with_ttl.sql @@ -0,0 +1,12 @@ +drop table if exists alter_ttl; + +create table alter_ttl(i Int) engine = MergeTree order by i ttl toDate('2020-05-05'); +alter table alter_ttl add column s String; +alter table alter_ttl modify column s String ttl toDate('2020-01-01'); +show create table alter_ttl; +drop table alter_ttl; + +create table alter_ttl(d Date, s String) engine = MergeTree order by d ttl d + interval 1 month; +alter table alter_ttl modify column s String ttl d + interval 1 day; +show create table alter_ttl; +drop table alter_ttl; diff --git a/dbms/tests/queries/0_stateless/01070_exception_code_in_query_log_table.reference b/dbms/tests/queries/0_stateless/01070_exception_code_in_query_log_table.reference new file mode 100644 index 00000000000..1dee767cd4e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01070_exception_code_in_query_log_table.reference @@ -0,0 +1,3 @@ +0 +0 +60 diff --git a/dbms/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql b/dbms/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql new file mode 100644 index 00000000000..9cca089ce08 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS test_table_for_01070_exception_code_in_query_log_table; +SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; -- { serverError 60 } +CREATE TABLE test_table_for_01070_exception_code_in_query_log_table (value UInt64) ENGINE=Memory(); +SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; +SYSTEM FLUSH LOGS; +SELECT exception_code FROM system.query_log WHERE query='SELECT * FROM test_table_for_01070_exception_code_in_query_log_table' ORDER BY exception_code; +DROP TABLE IF EXISTS test_table_for_01070_exception_code_in_query_log_table; diff --git a/dbms/tests/queries/0_stateless/01070_template_empty_file.reference b/dbms/tests/queries/0_stateless/01070_template_empty_file.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/01070_template_empty_file.sql b/dbms/tests/queries/0_stateless/01070_template_empty_file.sql new file mode 100644 index 00000000000..46a8f38f80b --- /dev/null +++ b/dbms/tests/queries/0_stateless/01070_template_empty_file.sql @@ -0,0 +1,2 @@ +select 1 format Template settings format_template_row='01070_nonexistent_file.txt'; -- { clientError 107 } +select 1 format Template settings format_template_row='/dev/null'; -- { clientError 474 } diff --git a/dbms/tests/queries/0_stateless/01070_to_decimal_or_null_exception.reference b/dbms/tests/queries/0_stateless/01070_to_decimal_or_null_exception.reference new file mode 100644 index 00000000000..b981dd4e1b6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01070_to_decimal_or_null_exception.reference @@ -0,0 +1,3 @@ +\N 1 +\N 1 +\N 1 diff --git a/dbms/tests/queries/0_stateless/01070_to_decimal_or_null_exception.sql b/dbms/tests/queries/0_stateless/01070_to_decimal_or_null_exception.sql new file mode 100644 index 00000000000..9283cc76cd7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01070_to_decimal_or_null_exception.sql @@ -0,0 +1,7 @@ +SELECT toDecimal32('e', 1); -- { serverError 72 } +SELECT toDecimal64('e', 2); -- { serverError 72 } +SELECT toDecimal128('e', 3); -- { serverError 72 } + +SELECT toDecimal32OrNull('e', 1) x, isNull(x); +SELECT toDecimal64OrNull('e', 2) x, isNull(x); +SELECT toDecimal128OrNull('e', 3) x, isNull(x); diff --git a/dbms/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.reference b/dbms/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql b/dbms/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql new file mode 100644 index 00000000000..f12d5f8846d --- /dev/null +++ b/dbms/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql @@ -0,0 +1,26 @@ +set optimize_skip_unused_shards=1; + +drop table if exists data_01068; +drop table if exists dist_01068; + +create table data_01068 (key Int) Engine=Null(); + +create table dist_01068 as data_01068 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01068); +set force_optimize_skip_unused_shards=0; +select * from dist_01068; +set force_optimize_skip_unused_shards=1; +select * from dist_01068; +set force_optimize_skip_unused_shards=2; +select * from dist_01068; -- { serverError 507 } + +drop table if exists dist_01068; +create table dist_01068 as data_01068 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01068, key%2); +set force_optimize_skip_unused_shards=0; +select * from dist_01068; +set force_optimize_skip_unused_shards=1; +select * from dist_01068; -- { serverError 507 } +set force_optimize_skip_unused_shards=2; +select * from dist_01068; -- { serverError 507 } + +drop table if exists data_01068; +drop table if exists dist_01068; diff --git a/dbms/tests/queries/0_stateless/01071_live_view_detach_dependency.reference b/dbms/tests/queries/0_stateless/01071_live_view_detach_dependency.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/01071_live_view_detach_dependency.sql b/dbms/tests/queries/0_stateless/01071_live_view_detach_dependency.sql new file mode 100644 index 00000000000..22d8adc503c --- /dev/null +++ b/dbms/tests/queries/0_stateless/01071_live_view_detach_dependency.sql @@ -0,0 +1,8 @@ +SET allow_experimental_live_view = 1; +DROP TABLE IF EXISTS test; +DROP TABLE IF EXISTS lv; +CREATE TABLE test (n Int8) ENGINE = Memory; +CREATE LIVE VIEW lv AS SELECT * FROM test; +DETACH TABLE lv; +INSERT INTO test VALUES (42); +DROP TABLE test; diff --git a/dbms/tests/queries/0_stateless/data_avro/complex.avro b/dbms/tests/queries/0_stateless/data_avro/complex.avro new file mode 100644 index 00000000000..0880f581882 Binary files /dev/null and b/dbms/tests/queries/0_stateless/data_avro/complex.avro differ diff --git a/dbms/tests/queries/0_stateless/data_avro/complex.avsc b/dbms/tests/queries/0_stateless/data_avro/complex.avsc new file mode 100644 index 00000000000..325169aeb57 --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/complex.avsc @@ -0,0 +1,20 @@ +{ + "type": "record", + "name": "row", + "fields": [ + {"name": "a_enum_to_string", "type": { "type": "enum", "name": "enum_1", "symbols" : ["A", "B", "C"]}}, + {"name": "b_enum_to_enum", "type": { "type": "enum", "name": "enum_2", "symbols" : ["t", "f"]}}, + {"name": "c_array_string", "type": { "type": "array", "items": "string"}}, + {"name": "d_array_array_string", "type": { "type": "array", "items": {"type": "array", "items": "string"}}}, + {"name": "e_union_null_string", "type": ["null", "string"]}, + {"name": "f_union_long_null", "type": ["long", "null"]}, + {"name": "g_fixed", "type": {"type":"fixed", "size": 32, "name": "fixed_1"}}, + {"name": "h_record_skip", "type": { + "type": "record", + "name": "subrecord", + "fields": [ + {"name": "a", "type": "string"} + ] + }} + ] + } \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/data_avro/complex.json b/dbms/tests/queries/0_stateless/data_avro/complex.json new file mode 100644 index 00000000000..d05e09c72fc --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/complex.json @@ -0,0 +1,2 @@ +{"a_enum_to_string":"A","b_enum_to_enum":"t","c_array_string":["s1", "s2"],"d_array_array_string":[["a1"], ["a2"]],"e_union_null_string":{"string": "s1"},"f_union_long_null":null,"g_fixed":"79cd909892d7e7ade1987cc7422628ba","h_record_skip":{"a": "a"}} +{"a_enum_to_string":"C","b_enum_to_enum":"f","c_array_string":[],"d_array_array_string":[],"e_union_null_string":null,"f_union_long_null":{"long": 123},"g_fixed":"79cd909892d7e7ade1987cc7422628ba","h_record_skip":{"a": "a"}} \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/data_avro/empty.avro b/dbms/tests/queries/0_stateless/data_avro/empty.avro new file mode 100644 index 00000000000..7cfae81758c Binary files /dev/null and b/dbms/tests/queries/0_stateless/data_avro/empty.avro differ diff --git a/dbms/tests/queries/0_stateless/data_avro/empty.avsc b/dbms/tests/queries/0_stateless/data_avro/empty.avsc new file mode 100644 index 00000000000..923eda71054 --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/empty.avsc @@ -0,0 +1,7 @@ +{ + "type": "record", + "name": "row", + "fields": [ + {"name": "a", "type": "long"} + ] + } \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/data_avro/empty.json b/dbms/tests/queries/0_stateless/data_avro/empty.json new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/data_avro/generate_avro.sh b/dbms/tests/queries/0_stateless/data_avro/generate_avro.sh new file mode 100755 index 00000000000..3538c8693e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/generate_avro.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +#avro tools: https://www.apache.org/dyn/closer.cgi?path=avro/avro-1.9.1/java/avro-tools-1.9.1.jar + + +avro-tools fromjson --schema-file primitive.avsc primitive.json > primitive.avro +avro-tools fromjson --schema-file complex.avsc complex.json > complex.avro +avro-tools fromjson --schema-file logical_types.avsc logical_types.json > logical_types.avro +avro-tools fromjson --schema-file empty.avsc empty.json > empty.avro + +#compression +avro-tools fromjson --codec null --schema-file simple.avsc simple.json > simple.null.avro +avro-tools fromjson --codec deflate --schema-file simple.avsc simple.json > simple.deflate.avro +avro-tools fromjson --codec snappy --schema-file simple.avsc simple.json > simple.snappy.avro \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/data_avro/logical_types.avro b/dbms/tests/queries/0_stateless/data_avro/logical_types.avro new file mode 100644 index 00000000000..7b8a3f60b7a Binary files /dev/null and b/dbms/tests/queries/0_stateless/data_avro/logical_types.avro differ diff --git a/dbms/tests/queries/0_stateless/data_avro/logical_types.avsc b/dbms/tests/queries/0_stateless/data_avro/logical_types.avsc new file mode 100644 index 00000000000..5d9fd96821f --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/logical_types.avsc @@ -0,0 +1,9 @@ +{ + "type": "record", + "name": "row", + "fields": [ + {"name": "a_date", "type": { "type": "int", "logicalType": "date"}}, + {"name": "b_timestamp_millis", "type": { "type": "long", "logicalType": "timestamp-millis"}}, + {"name": "c_timestamp_micros", "type": { "type": "long", "logicalType": "timestamp-micros"}} + ] + } \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/data_avro/logical_types.json b/dbms/tests/queries/0_stateless/data_avro/logical_types.json new file mode 100644 index 00000000000..652b85246e7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/logical_types.json @@ -0,0 +1 @@ +{"a_date":18250,"b_timestamp_millis":1578641516227,"c_timestamp_micros":1578641516227000} diff --git a/dbms/tests/queries/0_stateless/data_avro/primitive.avro b/dbms/tests/queries/0_stateless/data_avro/primitive.avro new file mode 100644 index 00000000000..ef5eb36639f Binary files /dev/null and b/dbms/tests/queries/0_stateless/data_avro/primitive.avro differ diff --git a/dbms/tests/queries/0_stateless/data_avro/primitive.avsc b/dbms/tests/queries/0_stateless/data_avro/primitive.avsc new file mode 100644 index 00000000000..a4f06d02b01 --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/primitive.avsc @@ -0,0 +1,14 @@ +{ + "type": "record", + "name": "row", + "fields": [ + {"name": "a_bool", "type": "boolean"}, + {"name": "b_int", "type": "int"}, + {"name": "c_long", "type": "long"}, + {"name": "d_float", "type": "float"}, + {"name": "e_double", "type": "double"}, + {"name": "f_bytes", "type": "bytes"}, + {"name": "g_string", "type": "string"}, + {"name": "h_null", "type": "null"} + ] + } \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/data_avro/primitive.json b/dbms/tests/queries/0_stateless/data_avro/primitive.json new file mode 100644 index 00000000000..fc521c8829c --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/primitive.json @@ -0,0 +1,2 @@ +{"a_bool":true,"b_int":1,"c_long":2,"d_float":3.4,"e_double":5.6,"f_bytes":"b1","g_string":"s1","h_null": null} +{"a_bool":false,"b_int":-1,"c_long":9223372036854775807,"d_float":3.00004,"e_double":0.00001,"f_bytes":"","g_string":"","h_null": null} \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/data_avro/simple.avsc b/dbms/tests/queries/0_stateless/data_avro/simple.avsc new file mode 100644 index 00000000000..923eda71054 --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/simple.avsc @@ -0,0 +1,7 @@ +{ + "type": "record", + "name": "row", + "fields": [ + {"name": "a", "type": "long"} + ] + } \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/data_avro/simple.deflate.avro b/dbms/tests/queries/0_stateless/data_avro/simple.deflate.avro new file mode 100644 index 00000000000..d4ba226b447 Binary files /dev/null and b/dbms/tests/queries/0_stateless/data_avro/simple.deflate.avro differ diff --git a/dbms/tests/queries/0_stateless/data_avro/simple.json b/dbms/tests/queries/0_stateless/data_avro/simple.json new file mode 100644 index 00000000000..c09fc0b732f --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_avro/simple.json @@ -0,0 +1,1000 @@ +{"a":1} +{"a":2} +{"a":3} +{"a":4} +{"a":5} +{"a":6} +{"a":7} +{"a":8} +{"a":9} +{"a":10} +{"a":11} +{"a":12} +{"a":13} +{"a":14} +{"a":15} +{"a":16} +{"a":17} +{"a":18} +{"a":19} +{"a":20} +{"a":21} +{"a":22} +{"a":23} +{"a":24} +{"a":25} +{"a":26} +{"a":27} +{"a":28} +{"a":29} +{"a":30} +{"a":31} +{"a":32} +{"a":33} +{"a":34} +{"a":35} +{"a":36} +{"a":37} +{"a":38} +{"a":39} +{"a":40} +{"a":41} +{"a":42} +{"a":43} +{"a":44} +{"a":45} +{"a":46} +{"a":47} +{"a":48} +{"a":49} +{"a":50} +{"a":51} +{"a":52} +{"a":53} +{"a":54} +{"a":55} +{"a":56} +{"a":57} +{"a":58} +{"a":59} +{"a":60} +{"a":61} +{"a":62} +{"a":63} +{"a":64} +{"a":65} +{"a":66} +{"a":67} +{"a":68} +{"a":69} +{"a":70} +{"a":71} +{"a":72} +{"a":73} +{"a":74} +{"a":75} +{"a":76} +{"a":77} +{"a":78} +{"a":79} +{"a":80} +{"a":81} +{"a":82} +{"a":83} +{"a":84} +{"a":85} +{"a":86} +{"a":87} +{"a":88} +{"a":89} +{"a":90} +{"a":91} +{"a":92} +{"a":93} +{"a":94} +{"a":95} +{"a":96} +{"a":97} +{"a":98} +{"a":99} +{"a":100} +{"a":101} +{"a":102} +{"a":103} +{"a":104} +{"a":105} +{"a":106} +{"a":107} +{"a":108} +{"a":109} +{"a":110} +{"a":111} +{"a":112} +{"a":113} +{"a":114} +{"a":115} +{"a":116} +{"a":117} +{"a":118} +{"a":119} +{"a":120} +{"a":121} +{"a":122} +{"a":123} +{"a":124} +{"a":125} +{"a":126} +{"a":127} +{"a":128} +{"a":129} +{"a":130} +{"a":131} +{"a":132} +{"a":133} +{"a":134} +{"a":135} +{"a":136} +{"a":137} +{"a":138} +{"a":139} +{"a":140} +{"a":141} +{"a":142} +{"a":143} +{"a":144} +{"a":145} +{"a":146} +{"a":147} +{"a":148} +{"a":149} +{"a":150} +{"a":151} +{"a":152} +{"a":153} +{"a":154} +{"a":155} +{"a":156} +{"a":157} +{"a":158} +{"a":159} +{"a":160} +{"a":161} +{"a":162} +{"a":163} +{"a":164} +{"a":165} +{"a":166} +{"a":167} +{"a":168} +{"a":169} +{"a":170} +{"a":171} +{"a":172} +{"a":173} +{"a":174} +{"a":175} +{"a":176} +{"a":177} +{"a":178} +{"a":179} +{"a":180} +{"a":181} +{"a":182} +{"a":183} +{"a":184} +{"a":185} +{"a":186} +{"a":187} +{"a":188} +{"a":189} +{"a":190} +{"a":191} +{"a":192} +{"a":193} +{"a":194} +{"a":195} +{"a":196} +{"a":197} +{"a":198} +{"a":199} +{"a":200} +{"a":201} +{"a":202} +{"a":203} +{"a":204} +{"a":205} +{"a":206} +{"a":207} +{"a":208} +{"a":209} +{"a":210} +{"a":211} +{"a":212} +{"a":213} +{"a":214} +{"a":215} +{"a":216} +{"a":217} +{"a":218} +{"a":219} +{"a":220} +{"a":221} +{"a":222} +{"a":223} +{"a":224} +{"a":225} +{"a":226} +{"a":227} +{"a":228} +{"a":229} +{"a":230} +{"a":231} +{"a":232} +{"a":233} +{"a":234} +{"a":235} +{"a":236} +{"a":237} +{"a":238} +{"a":239} +{"a":240} +{"a":241} +{"a":242} +{"a":243} +{"a":244} +{"a":245} +{"a":246} +{"a":247} +{"a":248} +{"a":249} +{"a":250} +{"a":251} +{"a":252} +{"a":253} +{"a":254} +{"a":255} +{"a":256} +{"a":257} +{"a":258} +{"a":259} +{"a":260} +{"a":261} +{"a":262} +{"a":263} +{"a":264} +{"a":265} +{"a":266} +{"a":267} +{"a":268} +{"a":269} +{"a":270} +{"a":271} +{"a":272} +{"a":273} +{"a":274} +{"a":275} +{"a":276} +{"a":277} +{"a":278} +{"a":279} +{"a":280} +{"a":281} +{"a":282} +{"a":283} +{"a":284} +{"a":285} +{"a":286} +{"a":287} +{"a":288} +{"a":289} +{"a":290} +{"a":291} +{"a":292} +{"a":293} +{"a":294} +{"a":295} +{"a":296} +{"a":297} +{"a":298} +{"a":299} +{"a":300} +{"a":301} +{"a":302} +{"a":303} +{"a":304} +{"a":305} +{"a":306} +{"a":307} +{"a":308} +{"a":309} +{"a":310} +{"a":311} +{"a":312} +{"a":313} +{"a":314} +{"a":315} +{"a":316} +{"a":317} +{"a":318} +{"a":319} +{"a":320} +{"a":321} +{"a":322} +{"a":323} +{"a":324} +{"a":325} +{"a":326} +{"a":327} +{"a":328} +{"a":329} +{"a":330} +{"a":331} +{"a":332} +{"a":333} +{"a":334} +{"a":335} +{"a":336} +{"a":337} +{"a":338} +{"a":339} +{"a":340} +{"a":341} +{"a":342} +{"a":343} +{"a":344} +{"a":345} +{"a":346} +{"a":347} +{"a":348} +{"a":349} +{"a":350} +{"a":351} +{"a":352} +{"a":353} +{"a":354} +{"a":355} +{"a":356} +{"a":357} +{"a":358} +{"a":359} +{"a":360} +{"a":361} +{"a":362} +{"a":363} +{"a":364} +{"a":365} +{"a":366} +{"a":367} +{"a":368} +{"a":369} +{"a":370} +{"a":371} +{"a":372} +{"a":373} +{"a":374} +{"a":375} +{"a":376} +{"a":377} +{"a":378} +{"a":379} +{"a":380} +{"a":381} +{"a":382} +{"a":383} +{"a":384} +{"a":385} +{"a":386} +{"a":387} +{"a":388} +{"a":389} +{"a":390} +{"a":391} +{"a":392} +{"a":393} +{"a":394} +{"a":395} +{"a":396} +{"a":397} +{"a":398} +{"a":399} +{"a":400} +{"a":401} +{"a":402} +{"a":403} +{"a":404} +{"a":405} +{"a":406} +{"a":407} +{"a":408} +{"a":409} +{"a":410} +{"a":411} +{"a":412} +{"a":413} +{"a":414} +{"a":415} +{"a":416} +{"a":417} +{"a":418} +{"a":419} +{"a":420} +{"a":421} +{"a":422} +{"a":423} +{"a":424} +{"a":425} +{"a":426} +{"a":427} +{"a":428} +{"a":429} +{"a":430} +{"a":431} +{"a":432} +{"a":433} +{"a":434} +{"a":435} +{"a":436} +{"a":437} +{"a":438} +{"a":439} +{"a":440} +{"a":441} +{"a":442} +{"a":443} +{"a":444} +{"a":445} +{"a":446} +{"a":447} +{"a":448} +{"a":449} +{"a":450} +{"a":451} +{"a":452} +{"a":453} +{"a":454} +{"a":455} +{"a":456} +{"a":457} +{"a":458} +{"a":459} +{"a":460} +{"a":461} +{"a":462} +{"a":463} +{"a":464} +{"a":465} +{"a":466} +{"a":467} +{"a":468} +{"a":469} +{"a":470} +{"a":471} +{"a":472} +{"a":473} +{"a":474} +{"a":475} +{"a":476} +{"a":477} +{"a":478} +{"a":479} +{"a":480} +{"a":481} +{"a":482} +{"a":483} +{"a":484} +{"a":485} +{"a":486} +{"a":487} +{"a":488} +{"a":489} +{"a":490} +{"a":491} +{"a":492} +{"a":493} +{"a":494} +{"a":495} +{"a":496} +{"a":497} +{"a":498} +{"a":499} +{"a":500} +{"a":501} +{"a":502} +{"a":503} +{"a":504} +{"a":505} +{"a":506} +{"a":507} +{"a":508} +{"a":509} +{"a":510} +{"a":511} +{"a":512} +{"a":513} +{"a":514} +{"a":515} +{"a":516} +{"a":517} +{"a":518} +{"a":519} +{"a":520} +{"a":521} +{"a":522} +{"a":523} +{"a":524} +{"a":525} +{"a":526} +{"a":527} +{"a":528} +{"a":529} +{"a":530} +{"a":531} +{"a":532} +{"a":533} +{"a":534} +{"a":535} +{"a":536} +{"a":537} +{"a":538} +{"a":539} +{"a":540} +{"a":541} +{"a":542} +{"a":543} +{"a":544} +{"a":545} +{"a":546} +{"a":547} +{"a":548} +{"a":549} +{"a":550} +{"a":551} +{"a":552} +{"a":553} +{"a":554} +{"a":555} +{"a":556} +{"a":557} +{"a":558} +{"a":559} +{"a":560} +{"a":561} +{"a":562} +{"a":563} +{"a":564} +{"a":565} +{"a":566} +{"a":567} +{"a":568} +{"a":569} +{"a":570} +{"a":571} +{"a":572} +{"a":573} +{"a":574} +{"a":575} +{"a":576} +{"a":577} +{"a":578} +{"a":579} +{"a":580} +{"a":581} +{"a":582} +{"a":583} +{"a":584} +{"a":585} +{"a":586} +{"a":587} +{"a":588} +{"a":589} +{"a":590} +{"a":591} +{"a":592} +{"a":593} +{"a":594} +{"a":595} +{"a":596} +{"a":597} +{"a":598} +{"a":599} +{"a":600} +{"a":601} +{"a":602} +{"a":603} +{"a":604} +{"a":605} +{"a":606} +{"a":607} +{"a":608} +{"a":609} +{"a":610} +{"a":611} +{"a":612} +{"a":613} +{"a":614} +{"a":615} +{"a":616} +{"a":617} +{"a":618} +{"a":619} +{"a":620} +{"a":621} +{"a":622} +{"a":623} +{"a":624} +{"a":625} +{"a":626} +{"a":627} +{"a":628} +{"a":629} +{"a":630} +{"a":631} +{"a":632} +{"a":633} +{"a":634} +{"a":635} +{"a":636} +{"a":637} +{"a":638} +{"a":639} +{"a":640} +{"a":641} +{"a":642} +{"a":643} +{"a":644} +{"a":645} +{"a":646} +{"a":647} +{"a":648} +{"a":649} +{"a":650} +{"a":651} +{"a":652} +{"a":653} +{"a":654} +{"a":655} +{"a":656} +{"a":657} +{"a":658} +{"a":659} +{"a":660} +{"a":661} +{"a":662} +{"a":663} +{"a":664} +{"a":665} +{"a":666} +{"a":667} +{"a":668} +{"a":669} +{"a":670} +{"a":671} +{"a":672} +{"a":673} +{"a":674} +{"a":675} +{"a":676} +{"a":677} +{"a":678} +{"a":679} +{"a":680} +{"a":681} +{"a":682} +{"a":683} +{"a":684} +{"a":685} +{"a":686} +{"a":687} +{"a":688} +{"a":689} +{"a":690} +{"a":691} +{"a":692} +{"a":693} +{"a":694} +{"a":695} +{"a":696} +{"a":697} +{"a":698} +{"a":699} +{"a":700} +{"a":701} +{"a":702} +{"a":703} +{"a":704} +{"a":705} +{"a":706} +{"a":707} +{"a":708} +{"a":709} +{"a":710} +{"a":711} +{"a":712} +{"a":713} +{"a":714} +{"a":715} +{"a":716} +{"a":717} +{"a":718} +{"a":719} +{"a":720} +{"a":721} +{"a":722} +{"a":723} +{"a":724} +{"a":725} +{"a":726} +{"a":727} +{"a":728} +{"a":729} +{"a":730} +{"a":731} +{"a":732} +{"a":733} +{"a":734} +{"a":735} +{"a":736} +{"a":737} +{"a":738} +{"a":739} +{"a":740} +{"a":741} +{"a":742} +{"a":743} +{"a":744} +{"a":745} +{"a":746} +{"a":747} +{"a":748} +{"a":749} +{"a":750} +{"a":751} +{"a":752} +{"a":753} +{"a":754} +{"a":755} +{"a":756} +{"a":757} +{"a":758} +{"a":759} +{"a":760} +{"a":761} +{"a":762} +{"a":763} +{"a":764} +{"a":765} +{"a":766} +{"a":767} +{"a":768} +{"a":769} +{"a":770} +{"a":771} +{"a":772} +{"a":773} +{"a":774} +{"a":775} +{"a":776} +{"a":777} +{"a":778} +{"a":779} +{"a":780} +{"a":781} +{"a":782} +{"a":783} +{"a":784} +{"a":785} +{"a":786} +{"a":787} +{"a":788} +{"a":789} +{"a":790} +{"a":791} +{"a":792} +{"a":793} +{"a":794} +{"a":795} +{"a":796} +{"a":797} +{"a":798} +{"a":799} +{"a":800} +{"a":801} +{"a":802} +{"a":803} +{"a":804} +{"a":805} +{"a":806} +{"a":807} +{"a":808} +{"a":809} +{"a":810} +{"a":811} +{"a":812} +{"a":813} +{"a":814} +{"a":815} +{"a":816} +{"a":817} +{"a":818} +{"a":819} +{"a":820} +{"a":821} +{"a":822} +{"a":823} +{"a":824} +{"a":825} +{"a":826} +{"a":827} +{"a":828} +{"a":829} +{"a":830} +{"a":831} +{"a":832} +{"a":833} +{"a":834} +{"a":835} +{"a":836} +{"a":837} +{"a":838} +{"a":839} +{"a":840} +{"a":841} +{"a":842} +{"a":843} +{"a":844} +{"a":845} +{"a":846} +{"a":847} +{"a":848} +{"a":849} +{"a":850} +{"a":851} +{"a":852} +{"a":853} +{"a":854} +{"a":855} +{"a":856} +{"a":857} +{"a":858} +{"a":859} +{"a":860} +{"a":861} +{"a":862} +{"a":863} +{"a":864} +{"a":865} +{"a":866} +{"a":867} +{"a":868} +{"a":869} +{"a":870} +{"a":871} +{"a":872} +{"a":873} +{"a":874} +{"a":875} +{"a":876} +{"a":877} +{"a":878} +{"a":879} +{"a":880} +{"a":881} +{"a":882} +{"a":883} +{"a":884} +{"a":885} +{"a":886} +{"a":887} +{"a":888} +{"a":889} +{"a":890} +{"a":891} +{"a":892} +{"a":893} +{"a":894} +{"a":895} +{"a":896} +{"a":897} +{"a":898} +{"a":899} +{"a":900} +{"a":901} +{"a":902} +{"a":903} +{"a":904} +{"a":905} +{"a":906} +{"a":907} +{"a":908} +{"a":909} +{"a":910} +{"a":911} +{"a":912} +{"a":913} +{"a":914} +{"a":915} +{"a":916} +{"a":917} +{"a":918} +{"a":919} +{"a":920} +{"a":921} +{"a":922} +{"a":923} +{"a":924} +{"a":925} +{"a":926} +{"a":927} +{"a":928} +{"a":929} +{"a":930} +{"a":931} +{"a":932} +{"a":933} +{"a":934} +{"a":935} +{"a":936} +{"a":937} +{"a":938} +{"a":939} +{"a":940} +{"a":941} +{"a":942} +{"a":943} +{"a":944} +{"a":945} +{"a":946} +{"a":947} +{"a":948} +{"a":949} +{"a":950} +{"a":951} +{"a":952} +{"a":953} +{"a":954} +{"a":955} +{"a":956} +{"a":957} +{"a":958} +{"a":959} +{"a":960} +{"a":961} +{"a":962} +{"a":963} +{"a":964} +{"a":965} +{"a":966} +{"a":967} +{"a":968} +{"a":969} +{"a":970} +{"a":971} +{"a":972} +{"a":973} +{"a":974} +{"a":975} +{"a":976} +{"a":977} +{"a":978} +{"a":979} +{"a":980} +{"a":981} +{"a":982} +{"a":983} +{"a":984} +{"a":985} +{"a":986} +{"a":987} +{"a":988} +{"a":989} +{"a":990} +{"a":991} +{"a":992} +{"a":993} +{"a":994} +{"a":995} +{"a":996} +{"a":997} +{"a":998} +{"a":999} +{"a":1000} diff --git a/dbms/tests/queries/0_stateless/data_avro/simple.null.avro b/dbms/tests/queries/0_stateless/data_avro/simple.null.avro new file mode 100644 index 00000000000..789ab45101f Binary files /dev/null and b/dbms/tests/queries/0_stateless/data_avro/simple.null.avro differ diff --git a/dbms/tests/queries/0_stateless/data_avro/simple.snappy.avro b/dbms/tests/queries/0_stateless/data_avro/simple.snappy.avro new file mode 100644 index 00000000000..b812ed6c7ea Binary files /dev/null and b/dbms/tests/queries/0_stateless/data_avro/simple.snappy.avro differ diff --git a/dbms/tests/queries/1_stateful/00154_avro.reference b/dbms/tests/queries/1_stateful/00154_avro.reference new file mode 100644 index 00000000000..7e243047e8b --- /dev/null +++ b/dbms/tests/queries/1_stateful/00154_avro.reference @@ -0,0 +1,2 @@ +17300372046749301651 +17300372046749301651 diff --git a/dbms/tests/queries/1_stateful/00154_avro.sql b/dbms/tests/queries/1_stateful/00154_avro.sql new file mode 100644 index 00000000000..3d43a23e516 --- /dev/null +++ b/dbms/tests/queries/1_stateful/00154_avro.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test.avro; + +CREATE TABLE test.avro AS test.hits ENGINE = File(Avro); +INSERT INTO test.avro SELECT * FROM test.hits WHERE intHash64(WatchID) % 100 = 0; + +SELECT sum(cityHash64(*)) FROM test.hits WHERE intHash64(WatchID) % 100 = 0; +SELECT sum(cityHash64(*)) FROM test.avro; + +DROP TABLE test.avro; diff --git a/docker/packager/packager b/docker/packager/packager index 62767cae8f0..a31a387d502 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -177,7 +177,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if unbundled: # TODO: fix build with ENABLE_RDKAFKA - cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DENABLE_READLINE=0 -DENABLE_RDKAFKA=0') + cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0') if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile new file mode 100644 index 00000000000..3ed3b250f0c --- /dev/null +++ b/docker/test/codebrowser/Dockerfile @@ -0,0 +1,47 @@ +# docker build --network=host -t yandex/clickhouse-codebrowser . +# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser +FROM ubuntu:18.04 + +RUN apt-get --allow-unauthenticated update -y \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get --allow-unauthenticated install --yes --no-install-recommends \ + bash \ + sudo \ + wget \ + software-properties-common \ + ca-certificates \ + apt-transport-https \ + build-essential \ + gpg-agent \ + git + +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | sudo apt-key add - +RUN sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' +RUN sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list + +RUN sudo apt-get --yes --allow-unauthenticated update +# To build woboq +RUN sudo apt-get --yes --allow-unauthenticated install cmake clang-8 libllvm8 libclang-8-dev + +# repo versions doesn't work correctly with C++17 +# also we push reports to s3, so we add index.html to subfolder urls +# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b +RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser +RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release && make -j + +ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator +ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator +ENV STATIC_DATA=/woboq_codebrowser/data + +ENV SOURCE_DIRECTORY=/repo_folder +ENV BUILD_DIRECTORY=/build +ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report +ENV SHA=nosha + +CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \ + cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-8 -DCMAKE_C_COMPILER=/usr/bin/clang-8 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \ + mkdir -p $HTML_RESULT_DIRECTORY && \ + $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA && \ + cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\ + $CODEINDEX $HTML_RESULT_DIRECTORY -d "data" && \ + mv $HTML_RESULT_DIRECTORY /test_output diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 098800d5573..4b4501892e7 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -146,7 +146,9 @@ run_tests # Analyze results result_structure="left float, right float, diff float, rd Array(float), query text" -right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where diff < 0.05 and rd[3] > 0.05 order by rd[3] desc" > flap-prone.tsv -right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where diff > 0.05 and diff > rd[3] order by diff desc" > bad-perf.tsv -right/clickhouse local --file '*-client-time.tsv' -S "query text, client float, server float" -q "select *, floor(client/server, 3) p from table order by p desc" > client-time.tsv +right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where abs(diff) < 0.05 and rd[3] > 0.05 order by rd[3] desc" > unstable.tsv +right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where abs(diff) > 0.05 and abs(diff) > rd[3] order by diff desc" > changed-perf.tsv +right/clickhouse local --file '*-client-time.tsv' -S "query text, client float, server float" -q "select client, server, floor(client/server, 3) p, query from table where p > 1.01 order by p desc" > slow-on-client.tsv grep Exception:[^:] *-err.log > run-errors.log + +./report.py > report.html diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 589bb58fe8b..3a4d33326af 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -29,5 +29,5 @@ set -m time ../compare.sh 0 $ref_sha $PR_TO_TEST $SHA_TO_TEST 2>&1 | ts | tee compare.log set +m -7z a /output/output.7z *.log *.tsv +7z a /output/output.7z *.log *.tsv *.html cp compare.log /output diff --git a/docker/test/performance-comparison/eqmed.sql b/docker/test/performance-comparison/eqmed.sql index 5e8d842b7df..cdc7cbec85f 100644 --- a/docker/test/performance-comparison/eqmed.sql +++ b/docker/test/performance-comparison/eqmed.sql @@ -1,10 +1,10 @@ -- input is table(query text, run UInt32, version int, time float) select -- abs(diff_percent) > rd_quantiles_percent[3] fail, - floor(original_medians_array.time_by_version[1], 4) m1, - floor(original_medians_array.time_by_version[2], 4) m2, - floor((m1 - m2) / m1, 3) diff_percent, - arrayMap(x -> floor(x / m1, 3), rd.rd_quantiles) rd_quantiles_percent, + floor(original_medians_array.time_by_version[1], 4) left, + floor(original_medians_array.time_by_version[2], 4) right, + floor((right - left) / left, 3) diff_percent, + arrayMap(x -> floor(x / left, 3), rd.rd_quantiles) rd_quantiles_percent, query from ( diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py new file mode 100755 index 00000000000..64461ba0587 --- /dev/null +++ b/docker/test/performance-comparison/report.py @@ -0,0 +1,105 @@ +#!/usr/bin/python3 + +import collections +import csv +import os +import sys + +doc_template = """ + + + + {header} + + +
+ +

{header}

+{test_part} + + + +""" + +table_template = """ +

{caption}

+ +{header} +{rows} +
+""" + +def tr(x): + return '' + str(x) + '' + +def td(x): + return '' + str(x) + '' + +def th(x): + return '' + str(x) + '' + +def table_row(r): + return tr(''.join([td(f) for f in r])) + +def table_header(r): + return tr(''.join([th(f) for f in r])) + +def tsv_rows(n): + result = '' + with open(n) as fd: + for row in csv.reader(fd, delimiter="\t", quotechar='"'): + result += table_row(row) + return result + +params = collections.defaultdict(str) +params['header'] = "ClickHouse Performance Comparison" +params['test_part'] = (table_template.format_map( + collections.defaultdict(str, + caption = 'Changes in performance', + header = table_header(['Left', 'Right', 'Diff', 'RD', 'Query']), + rows = tsv_rows('changed-perf.tsv'))) + + table_template.format( + caption = 'Slow on client', + header = table_header(['Client', 'Server', 'Ratio', 'Query']), + rows = tsv_rows('slow-on-client.tsv')) + + table_template.format( + caption = 'Unstable', + header = table_header(['Left', 'Right', 'Diff', 'RD', 'Query']), + rows = tsv_rows('unstable.tsv')) + + table_template.format( + caption = 'Run errors', + header = table_header(['A', 'B']), + rows = tsv_rows('run-errors.log')) +) +print(doc_template.format_map(params)) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 1c56eaffb82..454cdb9160d 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -31,5 +31,7 @@ For compatibility with all MySQL clients, it is recommended to specify user pass If user password is specified using [SHA256](../operations/settings/settings_users.md#password_sha256_hex), some clients won't be able to authenticate (mysqljs and old versions of command-line tool mysql). Restrictions: + - prepared queries are not supported + - some data types are sent as strings diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index c13d53eabc7..550a84350d9 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -723,7 +723,7 @@ Example 9004 ``` -## tmp_path +## tmp_path {#server-settings-tmp_path} Path to temporary data for processing large queries. @@ -737,6 +737,17 @@ Path to temporary data for processing large queries. ``` +## tmp_policy {#server-settings-tmp_policy} + +Policy from [`storage_configuration`](mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files. +If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is ignored. + +!!! note + - `move_factor` is ignored + - `keep_free_space_bytes` is ignored + - `max_data_part_size_bytes` is ignored + - you must have exactly one volume in that policy + ## uncompressed_cache_size {#server-settings-uncompressed_cache_size} Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../operations/table_engines/mergetree.md). diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c49a5f25cf8..f380cd2d4d7 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -968,6 +968,24 @@ Possible values: Default value: 0. +## optimize_skip_unused_shards {#settings-optimize_skip_unused_shards} + +Enables or disables skipping of unused shards for SELECT queries that has sharding key condition in PREWHERE/WHERE (assumes that the data is distributed by sharding key, otherwise do nothing). + +Default value: 0 + +## force_optimize_skip_unused_shards {#settings-force_optimize_skip_unused_shards} + +Enables or disables query execution if [`optimize_skip_unused_shards`](#settings-optimize_skip_unused_shards) enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled exception will be thrown. + +Possible values: + +- 0 - Disabled (do not throws) +- 1 - Disable query execution only if the table has sharding key +- 2 - Disable query execution regardless sharding key is defined for the table + +Default value: 0 + ## optimize_throw_if_noop {#setting-optimize_throw_if_noop} Enables or disables throwing an exception if an [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) query didn't perform a merge. diff --git a/docs/en/operations/settings/settings_users.md b/docs/en/operations/settings/settings_users.md index 5289bb19645..04f7900382c 100644 --- a/docs/en/operations/settings/settings_users.md +++ b/docs/en/operations/settings/settings_users.md @@ -62,7 +62,7 @@ Password can be specified in plaintext or in SHA256 (hex format). Example of how to generate a password from shell: ``` - PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | openssl dgst -sha1 -binary | openssl dgst -sha1 + PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-' ``` The first line of the result is the password. The second line is the corresponding double SHA1 hash. diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 77964c7377f..0f3ca33a551 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -395,6 +395,35 @@ Columns: - `query` (String) – The query text. For `INSERT`, it doesn't include the data to insert. - `query_id` (String) – Query ID, if defined. +## system.text_log {#system_tables-text_log} + +Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting. + +Columns: + +- `event_date` (`Date`) - Date of the entry. +- `event_time` (`DateTime`) - Time of the entry. +- `microseconds` (`UInt32`) - Microseconds of the entry. +- `thread_name` (String) — Name of the thread from which the logging was done. +- `thread_number` (UInt32) — Internal thread ID. +- `os_thread_id` (Int32) — OS thread ID. +- `level` (`Enum8`) - Entry level. + - `'Fatal' = 1` + - `'Critical' = 2` + - `'Error' = 3` + - `'Warning' = 4` + - `'Notice' = 5` + - `'Information' = 6` + - `'Debug' = 7` + - `'Trace' = 8` +- `query_id` (`String`) - ID of the query. +- `logger_name` (`LowCardinality(String)`) - Name of the logger (i.e. `DDLWorker`) +- `message` (`String`) - The message itself. +- `revision` (`UInt32`) - ClickHouse revision. +- `source_file` (`LowCardinality(String)`) - Source file from which the logging was done. +- `source_line` (`UInt64`) - Source line from which the logging was done. + + ## system.query_log {#system_tables-query_log} Contains information about execution of queries. For each query, you can see processing start time, duration of processing, error messages and other information. diff --git a/docs/en/query_language/functions/rounding_functions.md b/docs/en/query_language/functions/rounding_functions.md index 2640472f955..ec365235381 100644 --- a/docs/en/query_language/functions/rounding_functions.md +++ b/docs/en/query_language/functions/rounding_functions.md @@ -161,8 +161,6 @@ roundBankers(10.755, 2) = 11,76 - [round](#rounding_functions-round) - - ## roundToExp2(num) Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to the nearest (whole non-negative) degree of two. @@ -180,9 +178,3 @@ Accepts a number. If the number is less than 18, it returns 0. Otherwise, it rou Accept a number, round it down to an element in the specified array. If the value is less than the lowest bound, the lowest bound is returned. [Original article](https://clickhouse.yandex/docs/en/query_language/functions/rounding_functions/) - -## roundBankers(x\[, N\]) - -Rounds a value to a specified number of decimal places. - -The function returns the nearest number of the specified order. In case when given number has equal distance to surrounding numbers, the function always return the number having the nearest even digit (banker's rounding). diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 8448822282d..145779cfccb 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -145,6 +145,8 @@ Q2. Upd. На данный момент исправляются проблемы с регрессиями производительности в отдельных случаях. Кажется, что все проблемы исправлены. Включение по-умолчанию в Q1, но остаётся вторая часть задачи по корректному выделению async части. +Upd. Включили по-умолчанию. Удаление старого кода не раньше, чем после первого релиза, в котором это включено по-умолчанию и всё ещё можно выключить обратно. + ### 2.2. Инфраструктура событий/метрик/ограничений/квот/трассировки. В очереди. https://gist.github.com/alexey-milovidov/d62d73222d83b9319dc519cbb13aeff6 @@ -214,10 +216,12 @@ Upd. На данный момент исправляются проблемы с Требует 3.1. -### 3.3. Исправить катастрофически отвратительно неприемлемый поиск по документации. +### + 3.3. Исправить катастрофически отвратительно неприемлемый поиск по документации. [Иван Блинков](https://github.com/blinkov/) - очень хороший человек. Сам сайт документации основан на технологиях, не удовлетворяющих требованиям задачи, и эти технологии трудно исправить. Задачу будет делать первый встретившийся нам frontend разработчик, которого мы сможем заставить это сделать. +Upd. Иван Блинков сделал эту задачу путём замены треш-технологий на нормальные. + ### 3.4. + Добавить японский язык в документацию. Эту задачу сделает [Иван Блинков](https://github.com/blinkov/), до конца декабря 2019. Сделано. diff --git a/docs/ru/interfaces/mysql.md b/docs/ru/interfaces/mysql.md index 8f01d5afb44..d0a36174d2b 100644 --- a/docs/ru/interfaces/mysql.md +++ b/docs/ru/interfaces/mysql.md @@ -31,5 +31,7 @@ mysql> В случае указания пароля с помощью [SHA256](../operations/settings/settings_users.md#password_sha256_hex) некоторые клиенты не смогут пройти аутентификацию (mysqljs и старые версии стандартного клиента mysql). Ограничения: + - не поддерживаются подготовленные запросы + - некоторые типы данных отправляются как строки diff --git a/docs/ru/operations/settings/settings_users.md b/docs/ru/operations/settings/settings_users.md index 11f7d925671..2f39e8e86de 100644 --- a/docs/ru/operations/settings/settings_users.md +++ b/docs/ru/operations/settings/settings_users.md @@ -62,7 +62,7 @@ Пример создания пароля в командной строке: ``` - PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | openssl dgst -sha1 -binary | openssl dgst -sha1 + PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-' ``` Первая строка результата — пароль. Вторая строка — соответствующий ему двойной хэш SHA1. diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 312fcc48b13..b83c876978e 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -10,7 +10,7 @@ if (DEFINED APPLE_HAVE_CLOCK_GETTIME) target_compile_definitions(apple_rt PUBLIC -DAPPLE_HAVE_CLOCK_GETTIME=${APPLE_HAVE_CLOCK_GETTIME}) endif () -add_library (common +set (COMMON_SRCS src/argsToConfig.cpp src/coverage.cpp src/DateLUT.cpp @@ -65,7 +65,19 @@ add_library (common include/ext/scope_guard.h include/ext/size.h include/ext/unlock_guard.h +) +if (ENABLE_REPLXX) + set (COMMON_SRCS + src/ReplxxLineReader.cpp + include/common/ReplxxLineReader.h + + ${COMMON_SRCS} + ) +endif () + +add_library (common + ${COMMON_SRCS} ${CONFIG_COMMON}) if (USE_INTERNAL_MEMCPY) @@ -92,8 +104,8 @@ if(CCTZ_LIBRARY) target_link_libraries(common PRIVATE ${CCTZ_LIBRARY}) endif() -if (USE_REPLXX) - target_link_libraries(common PRIVATE replxx) +if (ENABLE_REPLXX) + target_link_libraries(common PUBLIC replxx) endif () target_link_libraries (common diff --git a/libs/libcommon/include/common/LineReader.h b/libs/libcommon/include/common/LineReader.h index 120ff76dac6..aa2954db4fc 100644 --- a/libs/libcommon/include/common/LineReader.h +++ b/libs/libcommon/include/common/LineReader.h @@ -22,8 +22,8 @@ public: WordsRange getCompletions(const String & prefix, size_t prefix_length) const; }; - LineReader(const Suggest * suggest, const String & history_file_path, char extender, char delimiter = 0); /// if delimiter != 0, then it's multiline mode - ~LineReader(); + LineReader(const String & history_file_path, char extender, char delimiter = 0); /// if delimiter != 0, then it's multiline mode + virtual ~LineReader() {} /// Reads the whole line until delimiter (in multiline mode) or until the last line without extender. /// If resulting line is empty, it means the user interrupted the input. @@ -31,7 +31,7 @@ public: /// Typical delimiter is ';' (semicolon) and typical extender is '\' (backslash). String readLine(const String & first_prompt, const String & second_prompt); -private: +protected: enum InputStatus { ABORT = 0, @@ -39,19 +39,17 @@ private: INPUT_LINE, }; - String input; - String prev_line; const String history_file_path; + static constexpr char word_break_characters[] = " \t\n\r\"\\'`@$><=;|&{(."; + + String input; + +private: const char extender; const char delimiter; - InputStatus readOneLine(const String & prompt); - void addToHistory(const String & line); + String prev_line; - /// Since CMake doesn't impose restrictions on includes between unrelated targets - /// it's possible that we include this file without USE_REPLXX defined. -#ifdef __clang__ - [[maybe_unused]] -#endif - void * impl; + virtual InputStatus readOneLine(const String & prompt); + virtual void addToHistory(const String &) {} }; diff --git a/libs/libcommon/include/common/ReplxxLineReader.h b/libs/libcommon/include/common/ReplxxLineReader.h new file mode 100644 index 00000000000..47eabbf9330 --- /dev/null +++ b/libs/libcommon/include/common/ReplxxLineReader.h @@ -0,0 +1,18 @@ +#pragma once + +#include "LineReader.h" + +#include + +class ReplxxLineReader : public LineReader +{ +public: + ReplxxLineReader(const Suggest & suggest, const String & history_file_path, char extender, char delimiter = 0); + ~ReplxxLineReader() override; + +private: + InputStatus readOneLine(const String & prompt) override; + void addToHistory(const String & line) override; + + replxx::Replxx rx; +}; diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index 6cee84a5b32..41999bb5cde 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -3,6 +3,5 @@ // .h autogenerated by cmake ! #cmakedefine01 USE_JEMALLOC -#cmakedefine01 USE_REPLXX #cmakedefine01 UNBUNDLED #cmakedefine01 WITH_COVERAGE diff --git a/libs/libcommon/src/LineReader.cpp b/libs/libcommon/src/LineReader.cpp index 6ac1e856347..4a3a737fe7c 100644 --- a/libs/libcommon/src/LineReader.cpp +++ b/libs/libcommon/src/LineReader.cpp @@ -1,26 +1,20 @@ -#include #include -#if USE_REPLXX -#include -#else - -/// We can detect if code is linked with one or another readline variants or open the library dynamically. -#include -extern "C" -{ - char * readline(const char *) __attribute__((__weak__)); - char * (*readline_ptr)(const char *) = readline; -} - -#endif - #include #include #include #include +#ifdef OS_LINUX +/// We can detect if code is linked with one or another readline variants or open the library dynamically. +# include +extern "C" +{ + char * readline(const char *) __attribute__((__weak__)); + char * (*readline_ptr)(const char *) = readline; +} +#endif namespace { @@ -42,8 +36,6 @@ bool hasInputData() return select(1, &fds, nullptr, nullptr, &timeout) == 1; } -constexpr char word_break_characters[] = " \t\n\r\"\\'`@$><=;|&{(."; - } LineReader::Suggest::WordsRange LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const @@ -68,39 +60,12 @@ LineReader::Suggest::WordsRange LineReader::Suggest::getCompletions(const String }); } -LineReader::LineReader(const Suggest * suggest, const String & history_file_path_, char extender_, char delimiter_) +LineReader::LineReader(const String & history_file_path_, char extender_, char delimiter_) : history_file_path(history_file_path_), extender(extender_), delimiter(delimiter_) { -#if USE_REPLXX - impl = new replxx::Replxx; - auto & rx = *(replxx::Replxx*)(impl); - - if (!history_file_path.empty()) - rx.history_load(history_file_path); - - auto callback = [suggest] (const String & context, size_t context_size) - { - auto range = suggest->getCompletions(context, context_size); - return replxx::Replxx::completions_t(range.first, range.second); - }; - - rx.set_completion_callback(callback); - rx.set_complete_on_empty(false); - rx.set_word_break_characters(word_break_characters); -#endif /// FIXME: check extender != delimiter } -LineReader::~LineReader() -{ -#if USE_REPLXX - auto & rx = *(replxx::Replxx*)(impl); - if (!history_file_path.empty()) - rx.history_save(history_file_path); - delete (replxx::Replxx *)impl; -#endif -} - String LineReader::readLine(const String & first_prompt, const String & second_prompt) { String line; @@ -149,14 +114,7 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) { input.clear(); -#if USE_REPLXX - auto & rx = *(replxx::Replxx*)(impl); - const char* cinput = rx.input(prompt); - if (cinput == nullptr) - return (errno != EAGAIN) ? ABORT : RESET_LINE; - input = cinput; -#else - +#ifdef OS_LINUX if (!readline_ptr) { for (auto name : {"libreadline.so", "libreadline.so.0", "libeditline.so", "libeditline.so.0"}) @@ -182,22 +140,14 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) input = line_read; } else +#endif { std::cout << prompt; std::getline(std::cin, input); if (!std::cin.good()) return ABORT; } -#endif trim(input); return INPUT_LINE; } - -void LineReader::addToHistory(const String & line) -{ -#if USE_REPLXX - auto & rx = *(replxx::Replxx*)(impl); - rx.history_add(line); -#endif -} diff --git a/libs/libcommon/src/ReplxxLineReader.cpp b/libs/libcommon/src/ReplxxLineReader.cpp new file mode 100644 index 00000000000..044ea05413d --- /dev/null +++ b/libs/libcommon/src/ReplxxLineReader.cpp @@ -0,0 +1,57 @@ +#include + +#include +#include +#include + +namespace +{ + +/// Trim ending whitespace inplace +void trim(String & s) +{ + s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); +} + +} + +ReplxxLineReader::ReplxxLineReader(const Suggest & suggest, const String & history_file_path_, char extender_, char delimiter_) + : LineReader(history_file_path_, extender_, delimiter_) +{ + if (!history_file_path.empty()) + rx.history_load(history_file_path); + + auto callback = [&suggest] (const String & context, size_t context_size) + { + auto range = suggest.getCompletions(context, context_size); + return replxx::Replxx::completions_t(range.first, range.second); + }; + + rx.set_completion_callback(callback); + rx.set_complete_on_empty(false); + rx.set_word_break_characters(word_break_characters); +} + +ReplxxLineReader::~ReplxxLineReader() +{ + if (!history_file_path.empty()) + rx.history_save(history_file_path); +} + +LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt) +{ + input.clear(); + + const char* cinput = rx.input(prompt); + if (cinput == nullptr) + return (errno != EAGAIN) ? ABORT : RESET_LINE; + input = cinput; + + trim(input); + return INPUT_LINE; +} + +void ReplxxLineReader::addToHistory(const String & line) +{ + rx.history_add(line); +} diff --git a/libs/libloggers/loggers/Loggers.cpp b/libs/libloggers/loggers/Loggers.cpp index cf966f620e2..0fdaa766838 100644 --- a/libs/libloggers/loggers/Loggers.cpp +++ b/libs/libloggers/loggers/Loggers.cpp @@ -27,16 +27,17 @@ static std::string createDirectory(const std::string & file) return path.toString(); }; -void Loggers::setTextLog(std::shared_ptr log) +void Loggers::setTextLog(std::shared_ptr log, int max_priority) { text_log = log; + text_log_max_priority = max_priority; } void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name) { if (split) if (auto log = text_log.lock()) - split->addTextLog(log); + split->addTextLog(log, text_log_max_priority); auto current_logger = config.getString("logger", ""); if (config_logger == current_logger) diff --git a/libs/libloggers/loggers/Loggers.h b/libs/libloggers/loggers/Loggers.h index 525cab1e649..0095516a738 100644 --- a/libs/libloggers/loggers/Loggers.h +++ b/libs/libloggers/loggers/Loggers.h @@ -24,7 +24,7 @@ public: return layer; /// layer setted in inheritor class BaseDaemonApplication. } - void setTextLog(std::shared_ptr log); + void setTextLog(std::shared_ptr log, int max_priority); protected: std::optional layer; @@ -38,5 +38,7 @@ private: std::string config_logger; std::weak_ptr text_log; + int text_log_max_priority = -1; + Poco::AutoPtr split; }; diff --git a/libs/libloggers/loggers/OwnSplitChannel.cpp b/libs/libloggers/loggers/OwnSplitChannel.cpp index 467d7eb4fc2..3b9ded40dc3 100644 --- a/libs/libloggers/loggers/OwnSplitChannel.cpp +++ b/libs/libloggers/loggers/OwnSplitChannel.cpp @@ -70,34 +70,37 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) } - /// Also log to system.text_log table - TextLogElement elem; + /// Also log to system.text_log table, if message is not too noisy + if (text_log_max_priority && msg.getPriority() <= text_log_max_priority) + { + TextLogElement elem; - elem.event_time = msg_ext.time_seconds; - elem.microseconds = msg_ext.time_microseconds; + elem.event_time = msg_ext.time_seconds; + elem.microseconds = msg_ext.time_microseconds; - elem.thread_name = getThreadName(); - elem.thread_number = msg_ext.thread_number; + elem.thread_name = getThreadName(); + elem.thread_number = msg_ext.thread_number; - if (CurrentThread::isInitialized()) - elem.os_thread_id = CurrentThread::get().os_thread_id; - else - elem.os_thread_id = 0; + if (CurrentThread::isInitialized()) + elem.os_thread_id = CurrentThread::get().os_thread_id; + else + elem.os_thread_id = 0; - elem.query_id = msg_ext.query_id; + elem.query_id = msg_ext.query_id; - elem.message = msg.getText(); - elem.logger_name = msg.getSource(); - elem.level = msg.getPriority(); + elem.message = msg.getText(); + elem.logger_name = msg.getSource(); + elem.level = msg.getPriority(); - if (msg.getSourceFile() != nullptr) - elem.source_file = msg.getSourceFile(); + if (msg.getSourceFile() != nullptr) + elem.source_file = msg.getSourceFile(); - elem.source_line = msg.getSourceLine(); + elem.source_line = msg.getSourceLine(); - std::lock_guard lock(text_log_mutex); - if (auto log = text_log.lock()) - log->add(elem); + std::lock_guard lock(text_log_mutex); + if (auto log = text_log.lock()) + log->add(elem); + } } @@ -106,10 +109,11 @@ void OwnSplitChannel::addChannel(Poco::AutoPtr channel) channels.emplace_back(std::move(channel), dynamic_cast(channel.get())); } -void OwnSplitChannel::addTextLog(std::shared_ptr log) +void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_priority) { std::lock_guard lock(text_log_mutex); text_log = log; + text_log_max_priority = max_priority; } } diff --git a/libs/libloggers/loggers/OwnSplitChannel.h b/libs/libloggers/loggers/OwnSplitChannel.h index f475b46a72e..78308e97ab7 100644 --- a/libs/libloggers/loggers/OwnSplitChannel.h +++ b/libs/libloggers/loggers/OwnSplitChannel.h @@ -20,7 +20,7 @@ public: /// Adds a child channel void addChannel(Poco::AutoPtr channel); - void addTextLog(std::shared_ptr log); + void addTextLog(std::shared_ptr log, int max_priority); private: void logSplit(const Poco::Message & msg); @@ -33,6 +33,7 @@ private: std::mutex text_log_mutex; std::weak_ptr text_log; + int text_log_max_priority = -1; }; } diff --git a/utils/ci/build-gcc-from-sources.sh b/utils/ci/build-gcc-from-sources.sh index 0734b22335a..06d9820a022 100755 --- a/utils/ci/build-gcc-from-sources.sh +++ b/utils/ci/build-gcc-from-sources.sh @@ -32,8 +32,8 @@ $SUDO make install popd popd -$SUDO ln -sf /usr/local/bin/gcc /usr/local/bin/gcc-${GCC_GCC_SOURCES_VERSION_SHORT} -$SUDO ln -sf /usr/local/bin/g++ /usr/local/bin/g++-${GCC_GCC_SOURCES_VERSION_SHORT} +$SUDO ln -sf /usr/local/bin/gcc /usr/local/bin/gcc-${GCC_VERSION_SHORT} +$SUDO ln -sf /usr/local/bin/g++ /usr/local/bin/g++-${GCC_VERSION_SHORT} $SUDO ln -sf /usr/local/bin/gcc /usr/local/bin/cc $SUDO ln -sf /usr/local/bin/g++ /usr/local/bin/c++ @@ -43,5 +43,5 @@ $SUDO ldconfig hash gcc g++ gcc --version -export CC=gcc -export CXX=g++ +export CC=gcc-${GCC_VERSION_SHORT} +export CXX=g++-${GCC_VERSION_SHORT} diff --git a/utils/test_history/README.md b/utils/test_history/README.md new file mode 100644 index 00000000000..1de9bf0a4ab --- /dev/null +++ b/utils/test_history/README.md @@ -0,0 +1,59 @@ +## Script for ClickHouse tests history + +Allows to view test-suite history for master branch. + +## Usage + +```bash +$ sudo pip install -r requirements.txt +$ test-history --token XXX --since '2020-01-22 00:00:00' --substr Performance' ++---------------------|---------|--------------------+ +| Date | SHA | Performance test | ++=====================+=========+====================+ +| 2020-01-22 12:54:59 | 47ffa40 | succes | ++---------------------|---------|--------------------+ +| 2020-01-22 13:06:16 | 0d484be | failure | ++---------------------|---------|--------------------+ +| 2020-01-22 14:18:34 | 289f169 | succes | ++---------------------|---------|--------------------+ +| 2020-01-22 14:27:27 | e357c6f | not run | ++---------------------|---------|--------------------+ +| 2020-01-22 15:29:30 | 6cd6b4d | not run | ++---------------------|---------|--------------------+ +| 2020-01-22 16:52:26 | 6fc7a82 | not run | ++---------------------|---------|--------------------+ +| 2020-01-22 16:55:52 | c683c77 | failure | ++---------------------|---------|--------------------+ +| 2020-01-22 16:58:36 | d68f8d1 | pending | ++---------------------|---------|--------------------+ +| 2020-01-22 17:59:43 | ba7ab32 | succes | ++---------------------|---------|--------------------+ +| 2020-01-22 18:32:38 | eadb902 | failure | ++---------------------|---------|--------------------+ +| 2020-01-22 19:11:34 | 8f241ea | succes | ++---------------------|---------|--------------------+ +| 2020-01-22 19:56:49 | f0b7422 | failure | ++---------------------|---------|--------------------+ +| 2020-01-22 21:26:16 | 55be790 | not run | ++---------------------|---------|--------------------+ +| 2020-01-22 22:23:59 | c00636b | not run | ++---------------------|---------|--------------------+ +| 2020-01-22 23:09:23 | 8cfe9a4 | failure | ++---------------------|---------|--------------------+ +| 2020-01-23 00:10:33 | a02b59f | succes | ++---------------------|---------|--------------------+ +| 2020-01-23 05:56:11 | 48b3f33 | failure | ++---------------------|---------|--------------------+ +| 2020-01-23 05:56:54 | d807088 | succes | ++---------------------|---------|--------------------+ +| 2020-01-23 06:01:48 | 2e84949 | failure | ++---------------------|---------|--------------------+ +| 2020-01-23 11:18:19 | b80e3dc | pending | ++---------------------|---------|--------------------+ +| 2020-01-23 11:53:30 | 0e906b2 | pending | ++---------------------|---------|--------------------+ +``` + +### Options + +Script allows to specify start date for commits range in `'%Y-%m-%d %H:%M:%S'` format with `--since` option, default is three days. Also there is `--substr` option which allows to filter test suites by substring occurrence. Github token is required for script and can be found at https://github.com/settings/tokens. diff --git a/utils/test_history/requirements.txt b/utils/test_history/requirements.txt new file mode 100644 index 00000000000..07970e9fab5 --- /dev/null +++ b/utils/test_history/requirements.txt @@ -0,0 +1,3 @@ +pygithub==1.43.5 +tabulate==0.8.6 +termcolor==1.1.0 diff --git a/utils/test_history/test-history b/utils/test_history/test-history new file mode 100755 index 00000000000..783f25ff822 --- /dev/null +++ b/utils/test_history/test-history @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Note: should work with python 2 and 3 +from __future__ import print_function +from github import Github +import datetime +import tabulate +import argparse +from termcolor import colored +import sys + +COLORMAP = { + "success": colored("succes", 'green'), + "failure": colored("failure", 'red'), + "error": colored("error", 'red'), + "pending": colored("pending", 'yellow'), + "not run": colored("not run", 'white'), +} + +def _filter_statuses(statuses): + """ + Squash statuses to latest state + 1. context="first", state="success", update_time=1 + 2. context="second", state="success", update_time=2 + 3. context="first", stat="failure", update_time=3 + =========> + 1. context="second", state="success" + 2. context="first", stat="failure" + """ + filt = {} + for status in sorted(statuses, key=lambda x: x.updated_at): + filt[status.context] = status + return filt.values() + + +def get_filtered_statuses(commit): + return _filter_statuses(commit.get_statuses()) + + +def get_commits(repo, since): + return sorted(repo.get_commits(since=since), key=lambda x: x.commit.author.date) + + +def process_one_commit(commit): + commit_statuses = get_filtered_statuses(commit) + + # very dirty, but don't require additional dependencies + commit_modified = commit.commit.author.date + datetime.timedelta(hours=3) + commit_sha = commit.sha + checks_result = {} + for commit_status in commit_statuses: + state = commit_status.state + check_name = commit_status.context + checks_result[check_name] = state + + return commit_sha, commit_modified, checks_result + + +if __name__ == "__main__": + three_days_ago = datetime.datetime.now() - datetime.timedelta(days=3) + parser = argparse.ArgumentParser("ClickHouse commits history parser") + parser.add_argument("--token", required=True) + parser.add_argument("--since", default=three_days_ago.strftime("%Y-%m-%d %H:%M:%S")) + parser.add_argument("--substr", default="Functional stateful") + + args = parser.parse_args() + + date_since = datetime.datetime.strptime(args.since, "%Y-%m-%d %H:%M:%S") + + gh = Github(args.token) + repo = gh.get_repo('ClickHouse/ClickHouse') + commits = get_commits(repo, date_since) + + longest_header = [] + all_data = [] + for num, commit in enumerate(commits): + commit_sha, commit_modified, check_results = process_one_commit(commit) + mapped_keys = [key for key in check_results.keys() if args.substr in key] + if len(mapped_keys) > len(longest_header): + longest_header = mapped_keys + all_data.append((commit_modified, commit_sha, check_results)) + if (num + 1) % 10 == 0: + print("Processed", num + 1, "commits") + + longest_header = ["Date", "SHA"] + longest_header + + result_data = [] + for row in all_data: + current_result = [row[0].strftime("%Y-%m-%d %H:%M:%S"), row[1][0:7]] + for check_name in longest_header[2:]: + if check_name in row[2]: + check_result = row[2][check_name] + else: + check_result = "not run" + + if sys.stdout.isatty(): + current_result.append(COLORMAP[check_result]) + else: + current_result.append(check_result) + result_data.append(current_result) + + if sys.stdout.isatty(): + longest_header = [colored(h, 'white', attrs=['bold']) for h in longest_header] + + print(tabulate.tabulate(result_data, headers=longest_header, tablefmt="grid")) diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index 7c14ed605fb..7e67f078586 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -1,3 +1,3 @@ add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp) -target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper ${Poco_Foundation_LIBRARY} ${LINE_EDITING_LIBS}) +target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper ${Poco_Foundation_LIBRARY}) INSTALL(TARGETS clickhouse-zookeeper-cli RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse-utils) diff --git a/utils/zookeeper-cli/zookeeper-cli.cpp b/utils/zookeeper-cli/zookeeper-cli.cpp index 5e36ffecdaa..44140423a15 100644 --- a/utils/zookeeper-cli/zookeeper-cli.cpp +++ b/utils/zookeeper-cli/zookeeper-cli.cpp @@ -1,12 +1,13 @@ -#include +#include +#include +#include #include +#include +#include +#include + #include #include -#include -#include -#include -#include -#include void printStat(const Coordination::Stat & s) @@ -69,7 +70,7 @@ int main(int argc, char ** argv) Logger::root().setLevel("trace"); zkutil::ZooKeeper zk(argv[1]); - LineReader lr(nullptr, {}, '\\'); + LineReader lr({}, '\\'); do { diff --git a/website/index.html b/website/index.html index ec686bceefb..fa9abdda140 100644 --- a/website/index.html +++ b/website/index.html @@ -501,7 +501,7 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh ClickHouse source code is published under Apache 2.0 License. Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- +