diff --git a/.github/ISSUE_TEMPLATE/documentation-issue.md b/.github/ISSUE_TEMPLATE/documentation-issue.md index a8f31eadc56..557e5ea43c9 100644 --- a/.github/ISSUE_TEMPLATE/documentation-issue.md +++ b/.github/ISSUE_TEMPLATE/documentation-issue.md @@ -2,8 +2,7 @@ name: Documentation issue about: Report something incorrect or missing in documentation title: '' -labels: documentation -assignees: BayoNet +labels: comp-documentation --- diff --git a/.gitmodules b/.gitmodules index 081724c54c8..10a1419125d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -44,6 +44,7 @@ [submodule "contrib/protobuf"] path = contrib/protobuf url = https://github.com/ClickHouse-Extras/protobuf.git + branch = v3.13.0.1 [submodule "contrib/boost"] path = contrib/boost url = https://github.com/ClickHouse-Extras/boost.git @@ -107,6 +108,7 @@ [submodule "contrib/grpc"] path = contrib/grpc url = https://github.com/ClickHouse-Extras/grpc.git + branch = v1.33.2 [submodule "contrib/aws"] path = contrib/aws url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git @@ -200,3 +202,7 @@ [submodule "contrib/xz"] path = contrib/xz url = https://github.com/xz-mirror/xz +[submodule "contrib/abseil-cpp"] + path = contrib/abseil-cpp + url = https://github.com/ClickHouse-Extras/abseil-cpp.git + branch = lts_2020_02_25 diff --git a/base/common/StringRef.h b/base/common/StringRef.h index b51b95456cb..ac9d7c47b72 100644 --- a/base/common/StringRef.h +++ b/base/common/StringRef.h @@ -1,6 +1,7 @@ #pragma once #include +#include // for std::logic_error #include #include #include diff --git a/base/common/logger_useful.h b/base/common/logger_useful.h index f760d59de45..d3b4d38d546 100644 --- a/base/common/logger_useful.h +++ b/base/common/logger_useful.h @@ -3,7 +3,6 @@ /// Macros for convenient usage of Poco logger. #include -#include #include #include #include diff --git a/base/glibc-compatibility/musl/accept4.c b/base/glibc-compatibility/musl/accept4.c new file mode 100644 index 00000000000..59ab1726bdc --- /dev/null +++ b/base/glibc-compatibility/musl/accept4.c @@ -0,0 +1,19 @@ +#define _GNU_SOURCE +#include +#include +#include +#include "syscall.h" + +int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int flg) +{ + if (!flg) return accept(fd, addr, len); + int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0); + if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret; + ret = accept(fd, addr, len); + if (ret<0) return ret; + if (flg & SOCK_CLOEXEC) + __syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC); + if (flg & SOCK_NONBLOCK) + __syscall(SYS_fcntl, ret, F_SETFL, O_NONBLOCK); + return ret; +} diff --git a/base/glibc-compatibility/musl/epoll.c b/base/glibc-compatibility/musl/epoll.c new file mode 100644 index 00000000000..deff5b101aa --- /dev/null +++ b/base/glibc-compatibility/musl/epoll.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include "syscall.h" + +int epoll_create(int size) +{ + return epoll_create1(0); +} + +int epoll_create1(int flags) +{ + int r = __syscall(SYS_epoll_create1, flags); +#ifdef SYS_epoll_create + if (r==-ENOSYS && !flags) r = __syscall(SYS_epoll_create, 1); +#endif + return __syscall_ret(r); +} + +int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev) +{ + return syscall(SYS_epoll_ctl, fd, op, fd2, ev); +} + +int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs) +{ + int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8); +#ifdef SYS_epoll_wait + if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to); +#endif + return __syscall_ret(r); +} + +int epoll_wait(int fd, struct epoll_event *ev, int cnt, int to) +{ + return epoll_pwait(fd, ev, cnt, to, 0); +} diff --git a/base/glibc-compatibility/musl/eventfd.c b/base/glibc-compatibility/musl/eventfd.c new file mode 100644 index 00000000000..68e489c8364 --- /dev/null +++ b/base/glibc-compatibility/musl/eventfd.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include "syscall.h" + +int eventfd(unsigned int count, int flags) +{ + int r = __syscall(SYS_eventfd2, count, flags); +#ifdef SYS_eventfd + if (r==-ENOSYS && !flags) r = __syscall(SYS_eventfd, count); +#endif + return __syscall_ret(r); +} + +int eventfd_read(int fd, eventfd_t *value) +{ + return (sizeof(*value) == read(fd, value, sizeof(*value))) ? 0 : -1; +} + +int eventfd_write(int fd, eventfd_t value) +{ + return (sizeof(value) == write(fd, &value, sizeof(value))) ? 0 : -1; +} diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c new file mode 100644 index 00000000000..a429273fa1a --- /dev/null +++ b/base/glibc-compatibility/musl/getauxval.c @@ -0,0 +1,45 @@ +#include +#include // __environ +#include + +// We don't have libc struct available here. Compute aux vector manually. +static unsigned long * __auxv = NULL; +static unsigned long __auxv_secure = 0; + +static size_t __find_auxv(unsigned long type) +{ + size_t i; + for (i = 0; __auxv[i]; i += 2) + { + if (__auxv[i] == type) + return i + 1; + } + return (size_t) -1; +} + +__attribute__((constructor)) static void __auxv_init() +{ + size_t i; + for (i = 0; __environ[i]; i++); + __auxv = (unsigned long *) (__environ + i + 1); + + size_t secure_idx = __find_auxv(AT_SECURE); + if (secure_idx != ((size_t) -1)) + __auxv_secure = __auxv[secure_idx]; +} + +unsigned long getauxval(unsigned long type) +{ + if (type == AT_SECURE) + return __auxv_secure; + + if (__auxv) + { + size_t index = __find_auxv(type); + if (index != ((size_t) -1)) + return __auxv[index]; + } + + errno = ENOENT; + return 0; +} diff --git a/base/glibc-compatibility/musl/secure_getenv.c b/base/glibc-compatibility/musl/secure_getenv.c new file mode 100644 index 00000000000..fbd9ef3bdcc --- /dev/null +++ b/base/glibc-compatibility/musl/secure_getenv.c @@ -0,0 +1,8 @@ +#define _GNU_SOURCE +#include +#include + +char * secure_getenv(const char * name) +{ + return getauxval(AT_SECURE) ? NULL : getenv(name); +} diff --git a/base/glibc-compatibility/musl/syscall.h b/base/glibc-compatibility/musl/syscall.h index 70b4688f642..3160357f252 100644 --- a/base/glibc-compatibility/musl/syscall.h +++ b/base/glibc-compatibility/musl/syscall.h @@ -13,3 +13,11 @@ long __syscall(syscall_arg_t, ...); __attribute__((visibility("hidden"))) void *__vdsosym(const char *, const char *); + +#define syscall(...) __syscall_ret(__syscall(__VA_ARGS__)) + +#define socketcall(...) __syscall_ret(__socketcall(__VA_ARGS__)) + +#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f) + +#define socketcall_cp socketcall diff --git a/base/glibc-compatibility/musl/vdso.c b/base/glibc-compatibility/musl/vdso.c index c0dd0f33e4e..b108c4ef752 100644 --- a/base/glibc-compatibility/musl/vdso.c +++ b/base/glibc-compatibility/musl/vdso.c @@ -40,24 +40,10 @@ static int checkver(Verdef *def, int vsym, const char *vername, char *strings) #define OK_TYPES (1<e_phoff); size_t *dynv=0, base=-1; diff --git a/cmake/Modules/FindgRPC.cmake b/cmake/Modules/FindgRPC.cmake index 671d207085b..945d307952b 100644 --- a/cmake/Modules/FindgRPC.cmake +++ b/cmake/Modules/FindgRPC.cmake @@ -6,11 +6,9 @@ Defines the following variables: The include directories of the gRPC framework, including the include directories of the C++ wrapper. ``gRPC_LIBRARIES`` The libraries of the gRPC framework. -``gRPC_UNSECURE_LIBRARIES`` - The libraries of the gRPC framework without SSL. -``_gRPC_CPP_PLUGIN`` +``gRPC_CPP_PLUGIN`` The plugin for generating gRPC client and server C++ stubs from `.proto` files -``_gRPC_PYTHON_PLUGIN`` +``gRPC_PYTHON_PLUGIN`` The plugin for generating gRPC client and server Python stubs from `.proto` files The following :prop_tgt:`IMPORTED` targets are also defined: @@ -19,6 +17,13 @@ The following :prop_tgt:`IMPORTED` targets are also defined: ``grpc_cpp_plugin`` ``grpc_python_plugin`` +Set the following variables to adjust the behaviour of this script: +``gRPC_USE_UNSECURE_LIBRARIES`` + if set gRPC_LIBRARIES will be filled with the unsecure version of the libraries (i.e. without SSL) + instead of the secure ones. +``gRPC_DEBUG` + if set the debug message will be printed. + Add custom commands to process ``.proto`` files to C++:: protobuf_generate_grpc_cpp( [DESCRIPTORS ] [EXPORT_MACRO ] [...]) @@ -242,6 +247,7 @@ find_library(gRPC_LIBRARY NAMES grpc) find_library(gRPC_CPP_LIBRARY NAMES grpc++) find_library(gRPC_UNSECURE_LIBRARY NAMES grpc_unsecure) find_library(gRPC_CPP_UNSECURE_LIBRARY NAMES grpc++_unsecure) +find_library(gRPC_CARES_LIBRARY NAMES cares) set(gRPC_LIBRARIES) if(gRPC_USE_UNSECURE_LIBRARIES) @@ -259,6 +265,7 @@ else() set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_LIBRARY}) endif() endif() +set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CARES_LIBRARY}) # Restore the original find library ordering. if(gRPC_USE_STATIC_LIBS) @@ -278,11 +285,11 @@ else() endif() # Get full path to plugin. -find_program(_gRPC_CPP_PLUGIN +find_program(gRPC_CPP_PLUGIN NAMES grpc_cpp_plugin DOC "The plugin for generating gRPC client and server C++ stubs from `.proto` files") -find_program(_gRPC_PYTHON_PLUGIN +find_program(gRPC_PYTHON_PLUGIN NAMES grpc_python_plugin DOC "The plugin for generating gRPC client and server Python stubs from `.proto` files") @@ -317,14 +324,14 @@ endif() #include(FindPackageHandleStandardArgs.cmake) FIND_PACKAGE_HANDLE_STANDARD_ARGS(gRPC - REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY - gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR _gRPC_CPP_PLUGIN _gRPC_PYTHON_PLUGIN) + REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY gRPC_CARES_LIBRARY + gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR gRPC_CPP_PLUGIN gRPC_PYTHON_PLUGIN) if(gRPC_FOUND) if(gRPC_DEBUG) message(STATUS "gRPC: INCLUDE_DIRS=${gRPC_INCLUDE_DIRS}") message(STATUS "gRPC: LIBRARIES=${gRPC_LIBRARIES}") - message(STATUS "gRPC: CPP_PLUGIN=${_gRPC_CPP_PLUGIN}") - message(STATUS "gRPC: PYTHON_PLUGIN=${_gRPC_PYTHON_PLUGIN}") + message(STATUS "gRPC: CPP_PLUGIN=${gRPC_CPP_PLUGIN}") + message(STATUS "gRPC: PYTHON_PLUGIN=${gRPC_PYTHON_PLUGIN}") endif() endif() diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 0e65568f185..87a30c9effc 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,9 @@ # This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54443) +SET(VERSION_REVISION 54444) SET(VERSION_MAJOR 20) -SET(VERSION_MINOR 12) +SET(VERSION_MINOR 13) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH c53725fb1f846fda074347607ab582fbb9c6f7a1) -SET(VERSION_DESCRIBE v20.12.1.1-prestable) -SET(VERSION_STRING 20.12.1.1) +SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471) +SET(VERSION_DESCRIBE v20.13.1.1-prestable) +SET(VERSION_STRING 20.13.1.1) # end of autochange diff --git a/cmake/find/grpc.cmake b/cmake/find/grpc.cmake index fa283d98225..017a7b094b0 100644 --- a/cmake/find/grpc.cmake +++ b/cmake/find/grpc.cmake @@ -37,8 +37,8 @@ if(NOT USE_INTERNAL_GRPC_LIBRARY) if(NOT gRPC_INCLUDE_DIRS OR NOT gRPC_LIBRARIES) message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system gRPC library") set(EXTERNAL_GRPC_LIBRARY_FOUND 0) - elseif(NOT _gRPC_CPP_PLUGIN) - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grcp_cpp_plugin") + elseif(NOT gRPC_CPP_PLUGIN) + message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grpc_cpp_plugin") set(EXTERNAL_GRPC_LIBRARY_FOUND 0) else() set(EXTERNAL_GRPC_LIBRARY_FOUND 1) @@ -53,8 +53,8 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY) else() set(gRPC_LIBRARIES grpc grpc++) endif() - set(_gRPC_CPP_PLUGIN $) - set(_gRPC_PROTOC_EXECUTABLE $) + set(gRPC_CPP_PLUGIN $) + set(gRPC_PYTHON_PLUGIN $) include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake") @@ -62,4 +62,4 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY) set(USE_GRPC 1) endif() -message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${_gRPC_CPP_PLUGIN}") +message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${gRPC_CPP_PLUGIN}") diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp new file mode 160000 index 00000000000..4f3b686f86c --- /dev/null +++ b/contrib/abseil-cpp @@ -0,0 +1 @@ +Subproject commit 4f3b686f86c3ebaba7e4e926e62a79cb1c659a54 diff --git a/contrib/cctz b/contrib/cctz index 7a2db4ece6e..260ba195ef6 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 7a2db4ece6e0f1b246173cbdb62711ae258ee841 +Subproject commit 260ba195ef6c489968bae8c88c62a67cdac5ff9d diff --git a/contrib/grpc b/contrib/grpc index a6570b863cf..7436366ceb3 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit a6570b863cf76c9699580ba51c7827d5bffaac43 +Subproject commit 7436366ceb341ba5c00ea29f1645e02a2b70bf93 diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 5ab70d83429..efb0f1c4f43 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -1,6 +1,7 @@ set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc") set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc") +# Use re2 from ClickHouse contrib, not from gRPC third_party. if(NOT RE2_INCLUDE_DIR) message(FATAL_ERROR " grpc: The location of the \"re2\" library is unknown") endif() @@ -8,6 +9,7 @@ set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE) set(_gRPC_RE2_INCLUDE_DIR "${RE2_INCLUDE_DIR}") set(_gRPC_RE2_LIBRARIES "${RE2_LIBRARY}") +# Use zlib from ClickHouse contrib, not from gRPC third_party. if(NOT ZLIB_INCLUDE_DIRS) message(FATAL_ERROR " grpc: The location of the \"zlib\" library is unknown") endif() @@ -15,6 +17,7 @@ set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE) set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_INCLUDE_DIRS}") set(_gRPC_ZLIB_LIBRARIES "${ZLIB_LIBRARIES}") +# Use protobuf from ClickHouse contrib, not from gRPC third_party. if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY) message(FATAL_ERROR " grpc: The location of the \"protobuf\" library is unknown") elseif (NOT Protobuf_PROTOC_EXECUTABLE) @@ -29,21 +32,33 @@ set(_gRPC_PROTOBUF_PROTOC "protoc") set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}") set(_gRPC_PROTOBUF_PROTOC_LIBRARIES "${Protobuf_PROTOC_LIBRARY}") +# Use OpenSSL from ClickHouse contrib, not from gRPC third_party. set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR}) set(_gRPC_SSL_LIBRARIES ${OPENSSL_LIBRARIES}) +# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party. +set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) +set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") +if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt") + message(FATAL_ERROR " grpc: submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive") +endif() +add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp") + +# Choose to build static or shared library for c-ares. +if (MAKE_STATIC_LIBRARIES) + set(CARES_STATIC ON CACHE BOOL "" FORCE) + set(CARES_SHARED OFF CACHE BOOL "" FORCE) +else () + set(CARES_STATIC OFF CACHE BOOL "" FORCE) + set(CARES_SHARED ON CACHE BOOL "" FORCE) +endif () + # We don't want to build C# extensions. set(gRPC_BUILD_CSHARP_EXT OFF) -# We don't want to build abseil tests, so we temporarily switch BUILD_TESTING off. -set(_gRPC_ORIG_BUILD_TESTING ${BUILD_TESTING}) -set(BUILD_TESTING OFF) - add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}") -set(BUILD_TESTING ${_gRPC_ORIG_BUILD_TESTING}) - # The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes, # so we need to redefine it back. include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake") diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 82b3b9c0de5..3afff30eee7 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -22,7 +22,16 @@ set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-st set(LIBUNWIND_ASM_SOURCES ${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S ${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S) -set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C) + +# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1] +# Workaround these two issues by compiling as C. +# +# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771 +if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19) + set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C) +else() + enable_language(ASM) +endif() set(LIBUNWIND_SOURCES ${LIBUNWIND_CXX_SOURCES} diff --git a/contrib/protobuf b/contrib/protobuf index 445d1ae73a4..73b12814204 160000 --- a/contrib/protobuf +++ b/contrib/protobuf @@ -1 +1 @@ -Subproject commit 445d1ae73a450b1e94622e7040989aa2048402e3 +Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee diff --git a/debian/changelog b/debian/changelog index 3da82efd47e..5ea6b472e46 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (20.12.1.1) unstable; urgency=low +clickhouse (20.13.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Thu, 05 Nov 2020 21:52:47 +0300 + -- clickhouse-release Mon, 23 Nov 2020 10:29:24 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 2223b942429..3ef6b8c8b32 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.12.1.* +ARG version=20.13.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/packager/unbundled/Dockerfile b/docker/packager/unbundled/Dockerfile index 261edf1a86c..2f501f76e68 100644 --- a/docker/packager/unbundled/Dockerfile +++ b/docker/packager/unbundled/Dockerfile @@ -56,6 +56,7 @@ RUN apt-get update \ libprotoc-dev \ libgrpc++-dev \ protobuf-compiler-grpc \ + libc-ares-dev \ rapidjson-dev \ libsnappy-dev \ libparquet-dev \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 1ce6e427409..f7e107a2fc9 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.12.1.* +ARG version=20.13.1.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index cd2bead5616..8e3b5193874 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.12.1.* +ARG version=20.13.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/coverage/Dockerfile b/docker/test/coverage/Dockerfile index 32020951539..cea1a63cf6f 100644 --- a/docker/test/coverage/Dockerfile +++ b/docker/test/coverage/Dockerfile @@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build ENV OUTPUT_DIR=/output ENV IGNORE='.*contrib.*' -CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \ +RUN apt-get update && apt-get install cmake --yes --no-install-recommends + +CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \ dpkg -i /package_folder/clickhouse-common-static_*.deb; \ - llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \ - llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \ + llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \ + llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \ genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR} diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index aef967b6b41..c95344eeca2 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -15,6 +15,9 @@ stage=${stage:-} # empty parameter. read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" +# Run only matching tests. +FASTTEST_FOCUS=${FASTTEST_FOCUS:-""} + FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}") FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}") FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}") @@ -291,7 +294,7 @@ TESTS_TO_SKIP=( 01563_distributed_query_finish ) -time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" +time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" # substr is to remove semicolon after test name readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt") diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 76cadc3ce11..004bac02918 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -30,7 +30,7 @@ RUN apt-get update \ tzdata \ vim \ wget \ - && pip3 --no-cache-dir install clickhouse_driver scipy \ + && pip3 --no-cache-dir install 'clickhouse-driver>=0.1.5' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ && apt-get clean \ diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 337f13690b6..1c54479aab3 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -14,10 +14,12 @@ import string import sys import time import traceback +import logging import xml.etree.ElementTree as et from threading import Thread from scipy import stats +logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING') total_start_seconds = time.perf_counter() stage_start_seconds = total_start_seconds @@ -46,6 +48,8 @@ parser.add_argument('--profile-seconds', type=int, default=0, help='For how many parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.') +parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.") +parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.") args = parser.parse_args() reportStageEnd('start') @@ -146,20 +150,21 @@ for i, s in enumerate(servers): reportStageEnd('connect') -# Run drop queries, ignoring errors. Do this before all other activity, because -# clickhouse_driver disconnects on error (this is not configurable), and the new -# connection loses the changes in settings. -drop_query_templates = [q.text for q in root.findall('drop_query')] -drop_queries = substitute_parameters(drop_query_templates) -for conn_index, c in enumerate(all_connections): - for q in drop_queries: - try: - c.execute(q) - print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') - except: - pass +if not args.use_existing_tables: + # Run drop queries, ignoring errors. Do this before all other activity, + # because clickhouse_driver disconnects on error (this is not configurable), + # and the new connection loses the changes in settings. + drop_query_templates = [q.text for q in root.findall('drop_query')] + drop_queries = substitute_parameters(drop_query_templates) + for conn_index, c in enumerate(all_connections): + for q in drop_queries: + try: + c.execute(q) + print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') + except: + pass -reportStageEnd('drop-1') + reportStageEnd('drop-1') # Apply settings. # If there are errors, report them and continue -- maybe a new test uses a setting @@ -171,12 +176,9 @@ reportStageEnd('drop-1') settings = root.findall('settings/*') for conn_index, c in enumerate(all_connections): for s in settings: - try: - q = f"set {s.tag} = '{s.text}'" - c.execute(q) - print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') - except: - print(traceback.format_exc(), file=sys.stderr) + # requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings + # (https://github.com/mymarilyn/clickhouse-driver/pull/142) + c.settings[s.tag] = s.text reportStageEnd('settings') @@ -194,37 +196,40 @@ for t in tables: reportStageEnd('preconditions') -# Run create and fill queries. We will run them simultaneously for both servers, -# to save time. -# The weird search is to keep the relative order of elements, which matters, and -# etree doesn't support the appropriate xpath query. -create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')] -create_queries = substitute_parameters(create_query_templates) +if not args.use_existing_tables: + # Run create and fill queries. We will run them simultaneously for both + # servers, to save time. The weird XML search + filter is because we want to + # keep the relative order of elements, and etree doesn't support the + # appropriate xpath query. + create_query_templates = [q.text for q in root.findall('./*') + if q.tag in ('create_query', 'fill_query')] + create_queries = substitute_parameters(create_query_templates) -# Disallow temporary tables, because the clickhouse_driver reconnects on errors, -# and temporary tables are destroyed. We want to be able to continue after some -# errors. -for q in create_queries: - if re.search('create temporary table', q, flags=re.IGNORECASE): - print(f"Temporary tables are not allowed in performance tests: '{q}'", - file = sys.stderr) - sys.exit(1) + # Disallow temporary tables, because the clickhouse_driver reconnects on + # errors, and temporary tables are destroyed. We want to be able to continue + # after some errors. + for q in create_queries: + if re.search('create temporary table', q, flags=re.IGNORECASE): + print(f"Temporary tables are not allowed in performance tests: '{q}'", + file = sys.stderr) + sys.exit(1) -def do_create(connection, index, queries): - for q in queries: - connection.execute(q) - print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}') + def do_create(connection, index, queries): + for q in queries: + connection.execute(q) + print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}') -threads = [Thread(target = do_create, args = (connection, index, create_queries)) - for index, connection in enumerate(all_connections)] + threads = [ + Thread(target = do_create, args = (connection, index, create_queries)) + for index, connection in enumerate(all_connections)] -for t in threads: - t.start() + for t in threads: + t.start() -for t in threads: - t.join() + for t in threads: + t.join() -reportStageEnd('create') + reportStageEnd('create') # By default, test all queries. queries_to_run = range(0, len(test_queries)) @@ -403,10 +408,11 @@ print(f'profile-total\t{profile_total_seconds}') reportStageEnd('run') # Run drop queries -drop_queries = substitute_parameters(drop_query_templates) -for conn_index, c in enumerate(all_connections): - for q in drop_queries: - c.execute(q) - print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') +if not args.keep_created_tables and not args.use_existing_tables: + drop_queries = substitute_parameters(drop_query_templates) + for conn_index, c in enumerate(all_connections): + for q in drop_queries: + c.execute(q) + print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') reportStageEnd('drop-2') diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 0aedb67e572..382b486dda3 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -10,6 +10,11 @@ RUN apt-get update --yes \ gpg-agent \ debsig-verify \ strace \ + protobuf-compiler \ + protobuf-compiler-grpc \ + libprotoc-dev \ + libgrpc++-dev \ + libc-ares-dev \ --yes --no-install-recommends #RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add - @@ -33,7 +38,8 @@ RUN set -x \ && dpkg -i "${PKG_VERSION}.deb" CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ - && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF && ninja re2_st \ + && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \ + && ninja re2_st clickhouse_grpc_protos \ && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \ plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log diff --git a/docker/test/stateful_with_coverage/Dockerfile b/docker/test/stateful_with_coverage/Dockerfile index f5d66ed5013..ac6645b9463 100644 --- a/docker/test/stateful_with_coverage/Dockerfile +++ b/docker/test/stateful_with_coverage/Dockerfile @@ -1,12 +1,12 @@ # docker build -t yandex/clickhouse-stateful-test-with-coverage . -FROM yandex/clickhouse-stateless-test +FROM yandex/clickhouse-stateless-test-with-coverage RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ - python3-requests + python3-requests procps psmisc COPY s3downloader /s3downloader COPY run.sh /run.sh diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index aaf7e0a44ac..5fc6350fad8 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -1,40 +1,44 @@ #!/bin/bash kill_clickhouse () { - kill "$(pgrep -u clickhouse)" 2>/dev/null + echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' + pkill -f "clickhouse-server" 2>/dev/null - for _ in {1..10} + + for _ in {1..120} do - if ! kill -0 "$(pgrep -u clickhouse)"; then - echo "No clickhouse process" - break - else - echo "Process $(pgrep -u clickhouse) still alive" - sleep 10 - fi + if ! pkill -0 -f "clickhouse-server" ; then break ; fi + echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S' + sleep 1 done + + if pkill -0 -f "clickhouse-server" + then + pstree -apgT + jobs + echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S' + return 1 + fi } start_clickhouse () { LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & -} - -wait_llvm_profdata () { - while kill -0 "$(pgrep llvm-profdata-10)" + counter=0 + until clickhouse-client --query "SELECT 1" do - echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive" - sleep 3 + if [ "$counter" -gt 120 ] + then + echo "Cannot start clickhouse-server" + cat /var/log/clickhouse-server/stdout.log + tail -n1000 /var/log/clickhouse-server/stderr.log + tail -n1000 /var/log/clickhouse-server/clickhouse-server.log + break + fi + sleep 0.5 + counter=$((counter + 1)) done } -merge_client_files_in_background () { - client_files=$(ls /client_*profraw 2>/dev/null) - if [ -n "$client_files" ] - then - llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw" - rm "$client_files" - fi -} chmod 777 / @@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/ # install test configs /usr/share/clickhouse-test/config/install.sh -function start() -{ - counter=0 - until clickhouse-client --query "SELECT 1" - do - if [ "$counter" -gt 120 ] - then - echo "Cannot start clickhouse-server" - cat /var/log/clickhouse-server/stdout.log - tail -n1000 /var/log/clickhouse-server/stderr.log - tail -n1000 /var/log/clickhouse-server/clickhouse-server.log - break - fi - timeout 120 service clickhouse-server start - sleep 0.5 - counter=$((counter + 1)) - done -} - -start +start_clickhouse # shellcheck disable=SC2086 # No quotes because I want to split it into words. if ! /s3downloader --dataset-names $DATASETS; then @@ -81,25 +66,20 @@ fi chmod 777 -R /var/lib/clickhouse -while /bin/true; do - merge_client_files_in_background - sleep 2 -done & -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DATABASE test" kill_clickhouse start_clickhouse -sleep 10 +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" @@ -109,15 +89,10 @@ fi # more idiologically correct. read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse -wait_llvm_profdata - sleep 3 -wait_llvm_profdata # 100% merged all parts - - cp /*.profraw /profraw ||: diff --git a/docker/test/stateless_with_coverage/Dockerfile b/docker/test/stateless_with_coverage/Dockerfile index 1d6a85adf9e..f7379ba5568 100644 --- a/docker/test/stateless_with_coverage/Dockerfile +++ b/docker/test/stateless_with_coverage/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-stateless-with-coverage-test . +# docker build -t yandex/clickhouse-stateless-test-with-coverage . # TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs) FROM yandex/clickhouse-test-base @@ -28,7 +28,9 @@ RUN apt-get update -y \ lsof \ unixodbc \ wget \ - qemu-user-static + qemu-user-static \ + procps \ + psmisc RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 758591df618..4e4d9430a11 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -2,27 +2,41 @@ kill_clickhouse () { echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' - kill "$(pgrep -u clickhouse)" 2>/dev/null + pkill -f "clickhouse-server" 2>/dev/null - for _ in {1..10} + + for _ in {1..120} do - if ! kill -0 "$(pgrep -u clickhouse)"; then - echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S' - break - else - echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S' - sleep 10 - fi + if ! pkill -0 -f "clickhouse-server" ; then break ; fi + echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S' + sleep 1 done - echo "Will try to send second kill signal for sure" - kill "$(pgrep -u clickhouse)" 2>/dev/null - sleep 5 - echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' + if pkill -0 -f "clickhouse-server" + then + pstree -apgT + jobs + echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S' + return 1 + fi } start_clickhouse () { LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & + counter=0 + until clickhouse-client --query "SELECT 1" + do + if [ "$counter" -gt 120 ] + then + echo "Cannot start clickhouse-server" + cat /var/log/clickhouse-server/stdout.log + tail -n1000 /var/log/clickhouse-server/stderr.log + tail -n1000 /var/log/clickhouse-server/clickhouse-server.log + break + fi + sleep 0.5 + counter=$((counter + 1)) + done } chmod 777 / @@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/ start_clickhouse -sleep 10 - - if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" fi diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 932facc9ddc..16c6a74d94e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -152,7 +152,7 @@ You can specify default arguments for `Replicated` table engine in the server co ```xml /clickhouse/tables/{shard}/{database}/{table} -{replica} +{replica} ``` In this case, you can omit arguments when creating tables: diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 45533d3733f..2afeabc7956 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -44,11 +44,10 @@ stages, such as query planning or distributed queries. To be useful, the tracing information has to be exported to a monitoring system that supports OpenTelemetry, such as Jaeger or Prometheus. ClickHouse avoids -a dependency on a particular monitoring system, instead only -providing the tracing data conforming to the standard. A natural way to do so -in an SQL RDBMS is a system table. OpenTelemetry trace span information +a dependency on a particular monitoring system, instead only providing the +tracing data through a system table. OpenTelemetry trace span information [required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span) -is stored in the system table called `system.opentelemetry_span_log`. +is stored in the `system.opentelemetry_span_log` table. The table must be enabled in the server configuration, see the `opentelemetry_span_log` element in the default config file `config.xml`. It is enabled by default. @@ -67,3 +66,31 @@ The table has the following columns: The tags or attributes are saved as two parallel arrays, containing the keys and values. Use `ARRAY JOIN` to work with them. + +## Integration with monitoring systems + +At the moment, there is no ready tool that can export the tracing data from +ClickHouse to a monitoring system. + +For testing, it is possible to setup the export using a materialized view with the URL engine over the `system.opentelemetry_span_log` table, which would push the arriving log data to an HTTP endpoint of a trace collector. For example, to push the minimal span data to a Zipkin instance running at `http://localhost:9411`, in Zipkin v2 JSON format: + +```sql +CREATE MATERIALIZED VIEW default.zipkin_spans +ENGINE = URL('http://127.0.0.1:9411/api/v2/spans', 'JSONEachRow') +SETTINGS output_format_json_named_tuples_as_objects = 1, + output_format_json_array_of_rows = 1 AS +SELECT + lower(hex(reinterpretAsFixedString(trace_id))) AS traceId, + lower(hex(parent_span_id)) AS parentId, + lower(hex(span_id)) AS id, + operation_name AS name, + start_time_us AS timestamp, + finish_time_us - start_time_us AS duration, + cast(tuple('clickhouse'), 'Tuple(serviceName text)') AS localEndpoint, + cast(tuple( + attribute.values[indexOf(attribute.names, 'db.statement')]), + 'Tuple("db.statement" text)') AS tags +FROM system.opentelemetry_span_log +``` + +In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 0bd423f4d7a..ba899754b18 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2317,4 +2317,10 @@ Possible values: Default value: `1`. +## output_format_tsv_null_representation {#output_format_tsv_null_representation} + +Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format. + +Default value: `\N`. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/operations/system-tables/replicated_fetches.md b/docs/en/operations/system-tables/replicated_fetches.md new file mode 100644 index 00000000000..bc7e6335c0d --- /dev/null +++ b/docs/en/operations/system-tables/replicated_fetches.md @@ -0,0 +1,70 @@ +# system.replicated_fetches {#system_tables-replicated_fetches} + +Contains information about currently running background fetches. + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. + +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. + +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started. + +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1. + +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches. + +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches. + +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. + +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part. + +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part. + +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica. + +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica. + +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica. + +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme. + +- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier. + +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression. + +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. + +**Example** + +``` sql +SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: default +table: t +elapsed: 7.243039876 +progress: 0.41832135995612835 +result_part_name: all_0_0_0 +result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/ +partition_id: all +total_size_bytes_compressed: 1052783726 +bytes_read_compressed: 440401920 +source_replica_path: /clickhouse/test/t/replicas/1 +source_replica_hostname: node1 +source_replica_port: 9009 +interserver_scheme: http +URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false +to_detached: 0 +thread_id: 54 +``` + +**See Also** + +- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated) + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) diff --git a/docs/en/operations/utilities/clickhouse-obfuscator.md b/docs/en/operations/utilities/clickhouse-obfuscator.md index 8a2ea1eecf6..7fd608fcac0 100644 --- a/docs/en/operations/utilities/clickhouse-obfuscator.md +++ b/docs/en/operations/utilities/clickhouse-obfuscator.md @@ -1,42 +1,42 @@ -# ClickHouse obfuscator - -Simple tool for table data obfuscation. - -It reads input table and produces output table, that retain some properties of input, but contains different data. -It allows to publish almost real production data for usage in benchmarks. - -It is designed to retain the following properties of data: -- cardinalities of values (number of distinct values) for every column and for every tuple of columns; -- conditional cardinalities: number of distinct values of one column under condition on value of another column; -- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats; -- probability distributions of length of strings; -- probability of zero values of numbers; empty strings and arrays, NULLs; -- data compression ratio when compressed with LZ77 and entropy family of codecs; -- continuity (magnitude of difference) of time values across table; continuity of floating point values. -- date component of DateTime values; -- UTF-8 validity of string values; -- string values continue to look somewhat natural. - -Most of the properties above are viable for performance testing: - -reading data, filtering, aggregation and sorting will work at almost the same speed -as on original data due to saved cardinalities, magnitudes, compression ratios, etc. - -It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed. -Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret. - -It use some cryptographic primitives to transform data, but from the cryptographic point of view, -It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it. - -It may retain some data you don't want to publish. - -It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data. -For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value. -So, the user will be able to count exact ratio of mobile traffic. - -Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address. -If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others, -It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them. -And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters. - -This tool works fine only with reasonable amount of data (at least 1000s of rows). +# ClickHouse obfuscator + +A simple tool for table data obfuscation. + +It reads an input table and produces an output table, that retains some properties of input, but contains different data. +It allows publishing almost real production data for usage in benchmarks. + +It is designed to retain the following properties of data: +- cardinalities of values (number of distinct values) for every column and every tuple of columns; +- conditional cardinalities: number of distinct values of one column under the condition on the value of another column; +- probability distributions of the absolute value of integers; the sign of signed integers; exponent and sign for floats; +- probability distributions of the length of strings; +- probability of zero values of numbers; empty strings and arrays, `NULL`s; + +- data compression ratio when compressed with LZ77 and entropy family of codecs; +- continuity (magnitude of difference) of time values across the table; continuity of floating-point values; +- date component of `DateTime` values; + +- UTF-8 validity of string values; +- string values look natural. + +Most of the properties above are viable for performance testing: + +reading data, filtering, aggregatio, and sorting will work at almost the same speed +as on original data due to saved cardinalities, magnitudes, compression ratios, etc. + +It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed. +Some transformations are one to one and could be reversed, so you need to have a large seed and keep it in secret. + +It uses some cryptographic primitives to transform data but from the cryptographic point of view, it doesn't do it properly, that is why you should not consider the result as secure unless you have another reason. The result may retain some data you don't want to publish. + + +It always leaves 0, 1, -1 numbers, dates, lengths of arrays, and null flags exactly as in source data. +For example, you have a column `IsMobile` in your table with values 0 and 1. In transformed data, it will have the same value. + +So, the user will be able to count the exact ratio of mobile traffic. + +Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address. +If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them. +You should look at the working algorithm of this tool works, and fine-tune its command line parameters. + +This tool works fine only with an average amount of data (at least 1000s of rows). diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 270b7d8db39..543a5d3fed8 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big └────────┘ ``` -The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`. - Now you can use the `groupArray` function to create an array from the `y` column: ``` sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md new file mode 100644 index 00000000000..ea44d5f1ddd --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -0,0 +1,37 @@ +--- +toc_priority: 150 +--- + +## initializeAggregation {#initializeaggregation} + +Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`. +Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`. + +**Syntax** + +``` sql +initializeAggregation (aggregate_function, column_1, column_2); +``` + +**Parameters** + +- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string). +- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string). + +**Returned value(s)** + +Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument. +For example for functions with the suffix `State` the return type will be `AggregateFunction`. + +**Example** + +Query: + +```sql +SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000); +``` +Result: + +┌─uniqMerge(state)─┐ +│ 3 │ +└──────────────────┘ diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 5f4d31225b8..63b356e27e6 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -535,18 +535,7 @@ dateDiff('unit', startdate, enddate, [timezone]) - `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal). - Supported values: - - | unit | - | ---- | - |second | - |minute | - |hour | - |day | - |week | - |month | - |quarter | - |year | + Supported values: second, minute, hour, day, week, month, quarter, year. - `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index ad63a5b72ac..0da74ce1b0e 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -115,7 +115,21 @@ Returns the “first significant subdomain”. This is a non-standard concept sp Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above). -For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +For example: + +- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. + +### cutToFirstSignificantSubdomainWithWWW {#cuttofirstsignificantsubdomainwithwww} + +Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www". + +For example: + +- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. ### port(URL\[, default_port = 0\]) {#port} diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index dbfd5431861..71586e15a31 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -27,9 +27,9 @@ It is applicable when selecting data from tables that use the [MergeTree](../../ ### Drawbacks {#drawbacks} -Queries that use `FINAL` are executed not as fast as similar queries that don’t, because: +Queries that use `FINAL` are executed slightly slower than similar queries that don’t, because: -- Query is executed in a single thread and data is merged during query execution. +- Data is merged during query execution. - Queries with `FINAL` read primary key columns in addition to the columns specified in the query. **In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##} diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index 6cb99f285f2..500a09dcbef 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -6,7 +6,7 @@ toc_title: GROUP BY `GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows: -- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”. +- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”. - All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. - Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. @@ -45,6 +45,154 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value. +## WITH ROLLUP Modifier {#with-rollup-modifier} + +`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table. + +The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression. + +In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line. + +!!! note "Note" + Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. + +**Example** + +Consider the table t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; +``` +As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left: + +- `GROUP BY year, month, day`; +- `GROUP BY year, month` (and `day` column is filled with zeros); +- `GROUP BY year` (now `month, day` columns are both filled with zeros); +- and totals (and all three key expression columns are zeros). + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + +## WITH CUBE Modifier {#with-cube-modifier} + +`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table. + +In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line. + +!!! note "Note" + Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. + +**Example** + +Consider the table t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE; +``` + +As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations: + +- `GROUP BY year, month, day` +- `GROUP BY year, month` +- `GROUP BY year, day` +- `GROUP BY year` +- `GROUP BY month, day` +- `GROUP BY month` +- `GROUP BY day` +- and totals. + +Columns, excluded from `GROUP BY`, are filled with zeros. + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 0 │ 5 │ 2 │ +│ 2019 │ 0 │ 5 │ 1 │ +│ 2020 │ 0 │ 15 │ 2 │ +│ 2019 │ 0 │ 15 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 5 │ 2 │ +│ 0 │ 10 │ 15 │ 1 │ +│ 0 │ 10 │ 5 │ 1 │ +│ 0 │ 1 │ 15 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 0 │ 4 │ +│ 0 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 5 │ 3 │ +│ 0 │ 0 │ 15 │ 3 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + + ## WITH TOTALS Modifier {#with-totals-modifier} If the `WITH TOTALS` modifier is specified, another row will be calculated. This row will have key columns containing default values (zeros or empty lines), and columns of aggregate functions with the values calculated across all the rows (the “total” values). @@ -88,8 +236,6 @@ SELECT FROM hits ``` -However, in contrast to standard SQL, if the table doesn’t have any rows (either there aren’t any at all, or there aren’t any after using WHERE to filter), an empty result is returned, and not the result from one of the rows containing the initial values of aggregate functions. - As opposed to MySQL (and conforming to standard SQL), you can’t get some value of some column that is not in a key or aggregate function (except constant expressions). To work around this, you can use the ‘any’ aggregate function (get the first encountered value) or ‘min/max’. Example: @@ -105,10 +251,6 @@ GROUP BY domain For every different key value encountered, `GROUP BY` calculates a set of aggregate function values. -`GROUP BY` is not supported for array columns. - -A constant can’t be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`. - ## Implementation Details {#implementation-details} Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types. diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 3107f791eb9..60c769c4660 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -20,12 +20,12 @@ SELECT [DISTINCT] expr_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON )|(USING ) [PREWHERE expr] [WHERE expr] -[GROUP BY expr_list] [WITH TOTALS] +[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS] [HAVING expr] [ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [LIMIT [offset_value, ]n BY columns] [LIMIT [n, ]m] [WITH TIES] -[UNION ALL ...] +[UNION ...] [INTO OUTFILE filename] [FORMAT format] ``` @@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li - [SELECT clause](#select-clause) - [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) - [LIMIT clause](../../../sql-reference/statements/select/limit.md) -- [UNION ALL clause](../../../sql-reference/statements/select/union-all.md) +- [UNION clause](../../../sql-reference/statements/select/union-all.md) - [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md) - [FORMAT clause](../../../sql-reference/statements/select/format.md) @@ -159,4 +159,111 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation. -{## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##} +## SELECT modifiers {#select-modifiers} + +You can use the following modifiers in `SELECT` queries. + +### APPLY {#apply-modifier} + +Allows you to invoke some function for each row returned by an outer table expression of a query. + +**Syntax:** + +``` sql +SELECT APPLY( ) FROM [db.]table_name +``` + +**Example:** + +``` sql +CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i); +INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23); +SELECT * APPLY(sum) FROM columns_transformers; +``` + +``` +┌─sum(i)─┬─sum(j)─┬─sum(k)─┐ +│ 220 │ 18 │ 347 │ +└────────┴────────┴────────┘ +``` + +### EXCEPT {#except-modifier} + +Specifies the names of one or more columns to exclude from the result. All matching column names are omitted from the output. + +**Syntax:** + +``` sql +SELECT EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name +``` + +**Example:** + +``` sql +SELECT * EXCEPT (i) from columns_transformers; +``` + +``` +┌──j─┬───k─┐ +│ 10 │ 324 │ +│ 8 │ 23 │ +└────┴─────┘ +``` + +### REPLACE {#replace-modifier} + +Specifies one or more [expression aliases](../../../sql-reference/syntax.md#syntax-expression_aliases). Each alias must match a column name from the `SELECT *` statement. In the output column list, the column that matches the alias is replaced by the expression in that `REPLACE`. + +This modifier does not change the names or order of columns. However, it can change the value and the value type. + +**Syntax:** + +``` sql +SELECT REPLACE( AS col_name) from [db.]table_name +``` + +**Example:** + +``` sql +SELECT * REPLACE(i + 1 AS i) from columns_transformers; +``` + +``` +┌───i─┬──j─┬───k─┐ +│ 101 │ 10 │ 324 │ +│ 121 │ 8 │ 23 │ +└─────┴────┴─────┘ +``` + +### Modifier Combinations {#modifier-combinations} + +You can use each modifier separately or combine them. + +**Examples:** + +Using the same modifier multiple times. + +``` sql +SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers; +``` + +``` +┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐ +│ 2 │ 3 │ +└──────────────────────────┴──────────────────────────┘ +``` + +Using multiple modifiers in a single query. + +``` sql +SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers; +``` + +``` +┌─sum(plus(i, 1))─┬─sum(k)─┐ +│ 222 │ 347 │ +└─────────────────┴────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) + diff --git a/docs/en/sql-reference/statements/select/union-all.md b/docs/en/sql-reference/statements/select/union-all.md index 5230363609e..f150efbdc80 100644 --- a/docs/en/sql-reference/statements/select/union-all.md +++ b/docs/en/sql-reference/statements/select/union-all.md @@ -1,5 +1,5 @@ --- -toc_title: UNION ALL +toc_title: UNION --- # UNION ALL Clause {#union-all-clause} @@ -25,10 +25,13 @@ Type casting is performed for unions. For example, if two queries being combined Queries that are parts of `UNION ALL` can’t be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. -## Limitations {#limitations} +# UNION DISTINCT Clause {#union-distinct-clause} +The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`. + +# UNION Clause {#union-clause} +By default, `UNION` has the same behavior as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception. -Only `UNION ALL` is supported. The regular `UNION` (`UNION DISTINCT`) is not supported. If you need `UNION DISTINCT`, you can write `SELECT DISTINCT` from a subquery containing `UNION ALL`. ## Implementation Details {#implementation-details} -Queries that are parts of `UNION ALL` can be run simultaneously, and their results can be mixed together. +Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 4fdae1cd255..af0fc3e6137 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2187,4 +2187,10 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x); Значение по умолчанию: `1`. +## output_format_tsv_null_representation {#output_format_tsv_null_representation} + +Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV. + +Значение по умолчанию: `\N`. + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) diff --git a/docs/ru/operations/system-tables/replicated_fetches.md b/docs/ru/operations/system-tables/replicated_fetches.md new file mode 100644 index 00000000000..94584f390ee --- /dev/null +++ b/docs/ru/operations/system-tables/replicated_fetches.md @@ -0,0 +1,70 @@ +# system.replicated_fetches {#system_tables-replicated_fetches} + +Содержит информацию о выполняемых в данный момент фоновых операциях скачивания кусков данных с других реплик. + +Столбцы: + +- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных. + +- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. + +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — время, прошедшее от момента начала скачивания куска, в секундах. + +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — доля выполненной работы от 0 до 1. + +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — имя скачиваемого куска. + +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к скачиваемому куску. + +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции. + +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в скачиваемом куске в байтах. + +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатой информации, считанной из скачиваемого куска, в байтах. + +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к исходной реплике. + +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста исходной реплики. + +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — номер порта исходной реплики. + +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — имя межсерверной схемы. + +- `URI` ([String](../../sql-reference/data-types/string.md)) — универсальный идентификатор ресурса. + +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на использование выражения `TO DETACHED` в текущих фоновых операциях. + +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор потока. + +**Пример** + +``` sql +SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: default +table: t +elapsed: 7.243039876 +progress: 0.41832135995612835 +result_part_name: all_0_0_0 +result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/ +partition_id: all +total_size_bytes_compressed: 1052783726 +bytes_read_compressed: 440401920 +source_replica_path: /clickhouse/test/t/replicas/1 +source_replica_hostname: node1 +source_replica_port: 9009 +interserver_scheme: http +URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false +to_detached: 0 +thread_id: 54 +``` + +**Смотрите также** + +- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated) + +[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) diff --git a/docs/ru/operations/utilities/clickhouse-obfuscator.md b/docs/ru/operations/utilities/clickhouse-obfuscator.md new file mode 100644 index 00000000000..a52d538965b --- /dev/null +++ b/docs/ru/operations/utilities/clickhouse-obfuscator.md @@ -0,0 +1,43 @@ +# Обфускатор ClickHouse + +Простой инструмент для обфускации табличных данных. + +Он считывает данные входной таблицы и создает выходную таблицу, которая сохраняет некоторые свойства входных данных, но при этом содержит другие данные. + +Это позволяет публиковать практически реальные данные и использовать их в тестах на производительность. + +Обфускатор предназначен для сохранения следующих свойств данных: +- кардинальность (количество уникальных данных) для каждого столбца и каждого кортежа столбцов; +- условная кардинальность: количество уникальных данных одного столбца в соответствии со значением другого столбца; +- вероятностные распределения абсолютного значения целых чисел; знак числа типа Int; показатель степени и знак для чисел с плавающей запятой; +- вероятностное распределение длины строк; +- вероятность нулевых значений чисел; пустые строки и массивы, `NULL`; +- степень сжатия данных алгоритмом LZ77 и семейством энтропийных кодеков; + +- непрерывность (величина разницы) значений времени в таблице; непрерывность значений с плавающей запятой; +- дату из значений `DateTime`; + +- кодировка UTF-8 значений строки; +- строковые значения выглядят естественным образом. + + +Большинство перечисленных выше свойств пригодны для тестирования производительности. Чтение данных, фильтрация, агрегирование и сортировка будут работать почти с той же скоростью, что и исходные данные, благодаря сохраненной кардинальности, величине, степени сжатия и т. д. + +Он работает детерминированно. Вы задаёте значение инициализатора, а преобразование полностью определяется входными данными и инициализатором. + +Некоторые преобразования выполняются один к одному, и их можно отменить. Поэтому нужно использовать большое значение инициализатора и хранить его в секрете. + + +Обфускатор использует некоторые криптографические примитивы для преобразования данных, но, с криптографической точки зрения, результат будет небезопасным. В нем могут сохраниться данные, которые не следует публиковать. + + +Он всегда оставляет без изменений числа 0, 1, -1, даты, длины массивов и нулевые флаги. +Например, если у вас есть столбец `IsMobile` в таблице со значениями 0 и 1, то в преобразованных данных он будет иметь то же значение. + +Таким образом, пользователь сможет посчитать точное соотношение мобильного трафика. + +Давайте рассмотрим случай, когда у вас есть какие-то личные данные в таблице (например, электронная почта пользователя), и вы не хотите их публиковать. +Если ваша таблица достаточно большая и содержит несколько разных электронных почтовых адресов, и ни один из них не встречается часто, то обфускатор полностью анонимизирует все данные. Но, если у вас есть небольшое количество разных значений в столбце, он может скопировать некоторые из них. +В этом случае вам следует посмотреть на алгоритм работы инструмента и настроить параметры командной строки. + +Обфускатор полезен в работе со средним объемом данных (не менее 1000 строк). diff --git a/docs/ru/sql-reference/aggregate-functions/index.md b/docs/ru/sql-reference/aggregate-functions/index.md index e7f6acee738..4a7768f587f 100644 --- a/docs/ru/sql-reference/aggregate-functions/index.md +++ b/docs/ru/sql-reference/aggregate-functions/index.md @@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big └────────┘ ``` -Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`. - Теперь с помощью функции `groupArray` сформируем массив из столбца `y`: ``` sql diff --git a/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md new file mode 100644 index 00000000000..a2e3764193e --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -0,0 +1,40 @@ +--- +toc_priority: 150 +--- + +## initializeAggregation {#initializeaggregation} + +Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`. +Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`. + +**Синтаксис** + +``` sql +initializeAggregation (aggregate_function, column_1, column_2); +``` + +**Параметры** + +- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string). +- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`. + +Пример: + +Возвращаемый тип функций с суффиксом `State` — `AggregateFunction`. + +**Пример** + +Запрос: + +```sql +SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000); +``` +Результат: + +┌─uniqMerge(state)─┐ +│ 3 │ +└──────────────────┘ diff --git a/docs/ru/sql-reference/statements/select/from.md b/docs/ru/sql-reference/statements/select/from.md index 491bbfe892b..8facf140118 100644 --- a/docs/ru/sql-reference/statements/select/from.md +++ b/docs/ru/sql-reference/statements/select/from.md @@ -27,9 +27,9 @@ toc_title: FROM ### Недостатки {#drawbacks} -Запросы, которые используют `FINAL` выполняются не так быстро, как аналогичные запросы без него, потому что: +Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что: -- Запрос выполняется в одном потоке, и данные мёржатся во время выполнения запроса. +- Данные мёржатся во время выполнения запроса. - Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе. **В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##} diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index a0454ef1d91..0c8a29d0c26 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -43,6 +43,153 @@ toc_title: GROUP BY Если в `GROUP BY` передать несколько ключей, то в результате мы получим все комбинации выборки, как если бы `NULL` был конкретным значением. +## Модификатор WITH ROLLUP {#with-rollup-modifier} + +Модификатор `WITH ROLLUP` применяется для подсчета подытогов для ключевых выражений. При этом учитывается порядок следования ключевых выражений в списке `GROUP BY`. Подытоги подсчитываются в обратном порядке: сначала для последнего ключевого выражения в списке, потом для предпоследнего и так далее вплоть до самого первого ключевого выражения. + +Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка. + +!!! note "Примечание" + Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. + +**Пример** + +Рассмотрим таблицу t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Запрос: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; +``` + +Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из четырех таблиц с подытогами, которые как бы "сворачиваются" справа налево: + +- `GROUP BY year, month, day`; +- `GROUP BY year, month` (а колонка `day` заполнена нулями); +- `GROUP BY year` (теперь обе колонки `month, day` заполнены нулями); +- и общий итог (все три колонки с ключевыми выражениями заполнены нулями). + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + +## Модификатор WITH CUBE {#with-cube-modifier} + +Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`. + +Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка. + +!!! note "Примечание" + Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. + +**Пример** + +Рассмотрим таблицу t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE; +``` + +Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из восьми таблиц с подытогами — по таблице для каждой комбинации ключевых выражений: + +- `GROUP BY year, month, day` +- `GROUP BY year, month` +- `GROUP BY year, day` +- `GROUP BY year` +- `GROUP BY month, day` +- `GROUP BY month` +- `GROUP BY day` +- и общий итог. + +Колонки, которые не участвуют в `GROUP BY`, заполнены нулями. + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 0 │ 5 │ 2 │ +│ 2019 │ 0 │ 5 │ 1 │ +│ 2020 │ 0 │ 15 │ 2 │ +│ 2019 │ 0 │ 15 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 5 │ 2 │ +│ 0 │ 10 │ 15 │ 1 │ +│ 0 │ 10 │ 5 │ 1 │ +│ 0 │ 1 │ 15 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 0 │ 4 │ +│ 0 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 5 │ 3 │ +│ 0 │ 0 │ 15 │ 3 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + + ## Модификатор WITH TOTALS {#with-totals-modifier} Если указан модификатор `WITH TOTALS`, то будет посчитана ещё одна строчка, в которой в столбцах-ключах будут содержаться значения по умолчанию (нули, пустые строки), а в столбцах агрегатных функций - значения, посчитанные по всем строкам («тотальные» значения). @@ -86,8 +233,6 @@ SELECT FROM hits ``` -Но, в отличие от стандартного SQL, если в таблице нет строк (вообще нет или после фильтрации с помощью WHERE), в качестве результата возвращается пустой результат, а не результат из одной строки, содержащий «начальные» значения агрегатных функций. - В отличие от MySQL (и в соответствии со стандартом SQL), вы не можете получить какое-нибудь значение некоторого столбца, не входящего в ключ или агрегатную функцию (за исключением константных выражений). Для обхода этого вы можете воспользоваться агрегатной функцией any (получить первое попавшееся значение) или min/max. Пример: @@ -103,10 +248,6 @@ GROUP BY domain GROUP BY вычисляет для каждого встретившегося различного значения ключей, набор значений агрегатных функций. -Не поддерживается GROUP BY по столбцам-массивам. - -Не поддерживается указание констант в качестве аргументов агрегатных функций. Пример: `sum(1)`. Вместо этого, вы можете избавиться от констант. Пример: `count()`. - ## Детали реализации {#implementation-details} Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки. diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index f5fe2788370..c2e05f05079 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -18,7 +18,7 @@ SELECT [DISTINCT] expr_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON )|(USING ) [PREWHERE expr] [WHERE expr] -[GROUP BY expr_list] [WITH TOTALS] +[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS] [HAVING expr] [ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [LIMIT [offset_value, ]n BY columns] diff --git a/docs/zh/getting-started/playground.md b/docs/zh/getting-started/playground.md index 192203c6fe6..3eac3905f23 100644 --- a/docs/zh/getting-started/playground.md +++ b/docs/zh/getting-started/playground.md @@ -21,15 +21,15 @@ toc_title: "\u266A\u64CD\u573A\u266A" ClickHouse体验还有如下: [ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse) -实例托管 [Yandex云](https://cloud.yandex.com/). -更多信息 [云提供商](../commercial/cloud.md). +实例托管 [Yandex云](https://cloud.yandex.com/)。 +更多信息 [云提供商](../commercial/cloud.md)。 ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的. 体验平台后端只是一个ClickHouse集群,没有任何额外的服务器端应用程序。 体验平台也同样提供了ClickHouse HTTPS服务端口。 -您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 司机 -有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md). +您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 驱动。 +有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md)。 | 参数 | 值 | |:---------|:--------------------------------------| diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md index a5c30e46f4c..73896d3f8c1 100644 --- a/docs/zh/operations/monitoring.md +++ b/docs/zh/operations/monitoring.md @@ -33,10 +33,10 @@ ClickHouse 收集的指标项: - 服务用于计算的资源占用的各种指标。 - 关于查询处理的常见统计信息。 -可以在 [系统指标](system-tables/metrics.md#system_tables-metrics) ,[系统事件](system-tables/events.md#system_tables-events) 以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 +可以在[系统指标](system-tables/metrics.md#system_tables-metrics),[系统事件](system-tables/events.md#system_tables-events)以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)等系统表查看所有的指标项。 -可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 +可以配置ClickHouse向[Graphite](https://github.com/graphite-project)推送监控信息并导入指标。参考[Graphite监控](server-configuration-parameters/settings.md#server_configuration_parameters-graphite)配置文件。在配置指标导出之前,需要参考[Graphite官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建Graphite服务。 -此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。 +此外,您可以通过HTTP API监视服务器可用性。将HTTP GET请求发送到`/ping`。如果服务器可用,它将以 `200 OK` 响应。 -要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 +要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回`200 OK`。 如果副本滞后,请求将返回`503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 diff --git a/docs/zh/sql-reference/functions/bitmap-functions.md b/docs/zh/sql-reference/functions/bitmap-functions.md index d2018f5d9c1..5a6baf2f217 100644 --- a/docs/zh/sql-reference/functions/bitmap-functions.md +++ b/docs/zh/sql-reference/functions/bitmap-functions.md @@ -6,7 +6,7 @@ 我们使用RoaringBitmap实际存储位图对象,当基数小于或等于32时,它使用Set保存。当基数大于32时,它使用RoaringBitmap保存。这也是为什么低基数集的存储更快的原因。 -有关RoaringBitmap的更多信息,请参阅:[呻吟声](https://github.com/RoaringBitmap/CRoaring)。 +有关RoaringBitmap的更多信息,请参阅:[RoaringBitmap](https://github.com/RoaringBitmap/CRoaring)。 ## bitmapBuild {#bitmapbuild} diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 3817bc62bcb..d9c5dc78fe4 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -112,6 +112,8 @@ add_subdirectory (obfuscator) add_subdirectory (install) add_subdirectory (git-import) +#add_subdirectory (grpc-client) + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 5348a9e36c5..e4858eeda8b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -2515,7 +2515,7 @@ public: { std::string traceparent = options["opentelemetry-traceparent"].as(); std::string error; - if (!context.getClientInfo().parseTraceparentHeader( + if (!context.getClientInfo().client_trace_context.parseTraceparentHeader( traceparent, error)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -2526,7 +2526,7 @@ public: if (options.count("opentelemetry-tracestate")) { - context.getClientInfo().opentelemetry_tracestate = + context.getClientInfo().client_trace_context.tracestate = options["opentelemetry-tracestate"].as(); } diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index a129dc7efcc..2f19fc47fd2 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -62,6 +62,9 @@ decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries) { std::exception_ptr exception; + if (max_tries == 0) + throw Exception("Cannot perform zero retries", ErrorCodes::LOGICAL_ERROR); + for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) { try @@ -605,7 +608,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t settings_push.replication_alter_partitions_sync = 2; query_alter_ast_string += " ALTER TABLE " + getQuotedTable(original_table) + - " ATTACH PARTITION " + partition_name + + ((partition_name == "'all'") ? " ATTACH PARTITION ID " : " ATTACH PARTITION ") + partition_name + " FROM " + getQuotedTable(helping_table); LOG_DEBUG(log, "Executing ALTER query: {}", query_alter_ast_string); @@ -636,7 +639,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t if (!task_table.isReplicatedTable()) { query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) + - " PARTITION " + partition_name + " DEDUPLICATE;"; + ((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;"; LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string); @@ -807,7 +810,7 @@ bool ClusterCopier::tryDropPartitionPiece( DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); String query = "ALTER TABLE " + getQuotedTable(helping_table); - query += " DROP PARTITION " + task_partition.name + ""; + query += ((task_partition.name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + task_partition.name + ""; /// TODO: use this statement after servers will be updated up to 1.1.54310 // query += " DROP PARTITION ID '" + task_partition.name + "'"; @@ -1567,7 +1570,7 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT DatabaseAndTableName original_table = task_table.table_push; DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - String query = "ALTER TABLE " + getQuotedTable(helping_table) + " DROP PARTITION " + partition_name; + String query = "ALTER TABLE " + getQuotedTable(helping_table) + ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + partition_name; const ClusterPtr & cluster_push = task_table.cluster_push; Settings settings_push = task_cluster->settings_push; @@ -1670,14 +1673,24 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard) { + std::set res; + createShardInternalTables(timeouts, task_shard, false); TaskTable & task_table = task_shard.task_table; + const String & partition_name = queryToString(task_table.engine_push_partition_key_ast); + + if (partition_name == "'all'") + { + res.emplace("'all'"); + return res; + } + String query; { WriteBufferFromOwnString wb; - wb << "SELECT DISTINCT " << queryToString(task_table.engine_push_partition_key_ast) << " AS partition FROM" + wb << "SELECT DISTINCT " << partition_name << " AS partition FROM" << " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC"; query = wb.str(); } @@ -1692,7 +1705,6 @@ std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti local_context.setSettings(task_cluster->settings_pull); Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()); - std::set res; if (block) { ColumnWithTypeAndName & column = block.getByPosition(0); @@ -1803,7 +1815,7 @@ UInt64 ClusterCopier::executeQueryOnCluster( if (execution_mode == ClusterExecutionMode::ON_EACH_NODE) max_successful_executions_per_shard = 0; - std::atomic origin_replicas_number; + std::atomic origin_replicas_number = 0; /// We need to execute query on one replica at least auto do_for_shard = [&] (UInt64 shard_index, Settings shard_settings) diff --git a/programs/grpc-client/CMakeLists.txt b/programs/grpc-client/CMakeLists.txt new file mode 100644 index 00000000000..d848434e918 --- /dev/null +++ b/programs/grpc-client/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +get_filename_component(rpc_proto "${CMAKE_CURRENT_SOURCE_DIR}/../server/grpc_protos/GrpcConnection.proto" ABSOLUTE) +protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS ${rpc_proto}) +PROTOBUF_GENERATE_GRPC_CPP(GRPC_SRCS GRPC_HDRS ${rpc_proto}) + +add_executable(grpc-client grpc_client.cpp ${PROTO_SRCS} ${PROTO_HDRS} ${GRPC_SRCS} ${GRPC_HDRS}) +target_link_libraries(grpc-client PUBLIC grpc++ PUBLIC libprotobuf PUBLIC daemon) \ No newline at end of file diff --git a/programs/grpc-client/grpc_client.cpp b/programs/grpc-client/grpc_client.cpp new file mode 100644 index 00000000000..5345b3e7d33 --- /dev/null +++ b/programs/grpc-client/grpc_client.cpp @@ -0,0 +1,173 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "GrpcConnection.grpc.pb.h" + +class GRPCClient +{ + public: + explicit GRPCClient(std::shared_ptr channel) + : stub_(GRPCConnection::GRPC::NewStub(channel)) + {} + std::string Query(const GRPCConnection::User& userInfo, + const std::string& query, + std::vector insert_data = {}) + { + GRPCConnection::QueryRequest request; + grpc::Status status; + GRPCConnection::QueryResponse reply; + grpc::ClientContext context; + auto deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(10000); + context.set_deadline(deadline); + + auto user = std::make_unique(userInfo); + auto querySettigs = std::make_unique(); + int id = rand(); + request.set_allocated_user_info(user.release()); + // interactive_delay in miliseconds + request.set_interactive_delay(1000); + + querySettigs->set_query(query); + querySettigs->set_format("Values"); + querySettigs->set_query_id(std::to_string(id)); + querySettigs->set_data_stream((insert_data.size() != 0)); + (*querySettigs->mutable_settings())["max_query_size"] ="100"; + + + request.set_allocated_query_info(querySettigs.release()); + + void* got_tag = (void*)1; + bool ok = false; + + std::unique_ptr > reader(stub_->Query(&context)); + reader->Write(request); + + auto write = [&reply, &reader, &insert_data]() + { + GRPCConnection::QueryRequest request_insert; + for (const auto& data : insert_data) + { + request_insert.set_insert_data(data); + if (reply.exception_occured().empty()) + { + reader->Write(request_insert); + } + else + { + break; + } + } + request_insert.set_insert_data(""); + if (reply.exception_occured().empty()) + { + reader->Write(request_insert); + } + // reader->WritesDone(); + }; + std::thread write_thread(write); + write_thread.detach(); + + while (reader->Read(&reply)) + { + + if (!reply.output().empty()) + { + std::cout << "Query Part:\n " << id<< reply.output()<<'\n'; + } + else if (reply.progress().read_rows() + || reply.progress().read_bytes() + || reply.progress().total_rows_to_read() + || reply.progress().written_rows() + || reply.progress().written_bytes()) + { + std::cout << "Progress " << id<< ":{\n" << "read_rows: " << reply.progress().read_rows() << '\n' + << "read_bytes: " << reply.progress().read_bytes() << '\n' + << "total_rows_to_read: " << reply.progress().total_rows_to_read() << '\n' + << "written_rows: " << reply.progress().written_rows() << '\n' + << "written_bytes: " << reply.progress().written_bytes() << '\n'; + + + } + else if (!reply.totals().empty()) + { + std::cout << "Totals:\n " << id << " " << reply.totals() <<'\n'; + } + else if (!reply.extremes().empty()) + { + std::cout << "Extremes:\n " << id << " " << reply.extremes() <<'\n'; + } + } + + if (status.ok() && reply.exception_occured().empty()) + { + return ""; + } + else if (status.ok() && !reply.exception_occured().empty()) + { + return reply.exception_occured(); + } + else + { + return "RPC failed"; + } + } + + private: + std::unique_ptr stub_; +}; + +int main(int argc, char** argv) +{ + GRPCConnection::User userInfo1; + userInfo1.set_user("default"); + userInfo1.set_password(""); + userInfo1.set_quota("default"); + + std::cout << "Try: " << argv[1] << std::endl; + grpc::ChannelArguments ch_args; + ch_args.SetMaxReceiveMessageSize(-1); + GRPCClient client( + grpc::CreateCustomChannel(argv[1], grpc::InsecureChannelCredentials(), ch_args)); + { + std::cout << client.Query(userInfo1, "CREATE TABLE t (a UInt8) ENGINE = Memory") << std::endl; + std::cout << client.Query(userInfo1, "CREATE TABLE t (a UInt8) ENGINE = Memory") << std::endl; + std::cout << client.Query(userInfo1, "INSERT INTO t VALUES", {"(1),(2),(3)", "(4),(6),(5)"}) << std::endl; + std::cout << client.Query(userInfo1, "INSERT INTO t_not_defined VALUES", {"(1),(2),(3)", "(4),(6),(5)"}) << std::endl; + std::cout << client.Query(userInfo1, "SELECT a FROM t ORDER BY a") << std::endl; + std::cout << client.Query(userInfo1, "DROP TABLE t") << std::endl; + } + { + std::cout << client.Query(userInfo1, "SELECT count() FROM numbers(1)") << std::endl; + std::cout << client.Query(userInfo1, "SELECT 100") << std::endl; + std::cout << client.Query(userInfo1, "SELECT count() FROM numbers(10000000000)") << std::endl; + std::cout << client.Query(userInfo1, "SELECT count() FROM numbers(100)") << std::endl; + } + { + std::cout << client.Query(userInfo1, "CREATE TABLE arrays_test (s String, arr Array(UInt8)) ENGINE = Memory;") << std::endl; + std::cout << client.Query(userInfo1, "INSERT INTO arrays_test VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []);") << std::endl; + std::cout << client.Query(userInfo1, "SELECT s FROM arrays_test") << std::endl; + std::cout << client.Query(userInfo1, "DROP TABLE arrays_test") << std::endl; + std::cout << client.Query(userInfo1, "") << std::endl; + } + + {//Check null return from pipe + std::cout << client.Query(userInfo1, "CREATE TABLE table2 (x UInt8, y UInt8) ENGINE = Memory;") << std::endl; + std::cout << client.Query(userInfo1, "SELECT x FROM table2") << std::endl; + std::cout << client.Query(userInfo1, "DROP TABLE table2") << std::endl; + } + {//Check Totals + std::cout << client.Query(userInfo1, "CREATE TABLE tabl (x UInt8, y UInt8) ENGINE = Memory;") << std::endl; + std::cout << client.Query(userInfo1, "INSERT INTO tabl VALUES (1, 2), (2, 4), (3, 2), (3, 3), (3, 4);") << std::endl; + std::cout << client.Query(userInfo1, "SELECT sum(x), y FROM tabl GROUP BY y WITH TOTALS") << std::endl; + std::cout << client.Query(userInfo1, "DROP TABLE tabl") << std::endl; + } + + return 0; +} diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp index 24aa8e32ddb..3b26e192a07 100644 --- a/programs/odbc-bridge/ODBCBridge.cpp +++ b/programs/odbc-bridge/ODBCBridge.cpp @@ -109,6 +109,14 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options) .argument("err-log-path") .binding("logger.errorlog")); + options.addOption(Poco::Util::Option("stdout-path", "", "stdout log path, default console") + .argument("stdout-path") + .binding("logger.stdout")); + + options.addOption(Poco::Util::Option("stderr-path", "", "stderr log path, default console") + .argument("stderr-path") + .binding("logger.stderr")); + using Me = std::decay_t; options.addOption(Poco::Util::Option("help", "", "produce this help message") .binding("help") @@ -127,6 +135,27 @@ void ODBCBridge::initialize(Application & self) config().setString("logger", "ODBCBridge"); + /// Redirect stdout, stderr to specified files. + /// Some libraries and sanitizers write to stderr in case of errors. + const auto stdout_path = config().getString("logger.stdout", ""); + if (!stdout_path.empty()) + { + if (!freopen(stdout_path.c_str(), "a+", stdout)) + throw Poco::OpenFileException("Cannot attach stdout to " + stdout_path); + + /// Disable buffering for stdout. + setbuf(stdout, nullptr); + } + const auto stderr_path = config().getString("logger.stderr", ""); + if (!stderr_path.empty()) + { + if (!freopen(stderr_path.c_str(), "a+", stderr)) + throw Poco::OpenFileException("Cannot attach stderr to " + stderr_path); + + /// Disable buffering for stderr. + setbuf(stderr, nullptr); + } + buildLoggers(config(), logger(), self.commandName()); BaseDaemon::logRevision(); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 951ece89929..26339c5ad3f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -64,6 +64,7 @@ #include #include #include +#include #if !defined(ARCADIA_BUILD) @@ -84,6 +85,11 @@ # include #endif +#if USE_GRPC +# include +#endif + + namespace CurrentMetrics { extern const Metric Revision; @@ -806,7 +812,7 @@ int Server::main(const std::vector & /*args*/) http_params->setTimeout(settings.http_receive_timeout); http_params->setKeepAliveTimeout(keep_alive_timeout); - std::vector> servers; + std::vector servers; std::vector listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host"); @@ -1035,6 +1041,15 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for PostgreSQL compatibility protocol: " + address.toString()); }); +#if USE_GRPC + create_server("grpc_port", [&](UInt16 port) + { + Poco::Net::SocketAddress server_address(listen_host, port); + servers.emplace_back(std::make_unique(*this, make_socket_address(listen_host, port))); + LOG_INFO(log, "Listening for gRPC protocol: " + server_address.toString()); + }); +#endif + /// Prometheus (if defined and not setup yet with http_port) create_server("prometheus.port", [&](UInt16 port) { @@ -1056,7 +1071,7 @@ int Server::main(const std::vector & /*args*/) global_context->enableNamedSessions(); for (auto & server : servers) - server->start(); + server.start(); { String level_str = config().getString("text_log.level", ""); @@ -1088,8 +1103,8 @@ int Server::main(const std::vector & /*args*/) int current_connections = 0; for (auto & server : servers) { - server->stop(); - current_connections += server->currentConnections(); + server.stop(); + current_connections += server.currentConnections(); } if (current_connections) @@ -1109,7 +1124,7 @@ int Server::main(const std::vector & /*args*/) { current_connections = 0; for (auto & server : servers) - current_connections += server->currentConnections(); + current_connections += server.currentConnections(); if (!current_connections) break; sleep_current_ms += sleep_one_ms; diff --git a/programs/server/config.d/logging_no_rotate.xml b/programs/server/config.d/logging_no_rotate.xml new file mode 120000 index 00000000000..cd66c69b3ed --- /dev/null +++ b/programs/server/config.d/logging_no_rotate.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/logging_no_rotate.xml \ No newline at end of file diff --git a/programs/server/config.xml b/programs/server/config.xml index a03270aa7b9..bfd1cc2395b 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -11,6 +11,9 @@ trace /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log + 1000M 10 @@ -131,6 +134,34 @@ 4096 3 + + + + + + + + + + + + + + 100 diff --git a/programs/server/play.html b/programs/server/play.html index 22eea0002ca..37869228c04 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -1,6 +1,7 @@ + ClickHouse Query @@ -286,6 +288,8 @@
 (Ctrl+Enter) + + 🌑🌞
@@ -299,50 +303,117 @@