From a055e3308713e4c13f997b4a771015021b5a5ab7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 07:28:55 +0300 Subject: [PATCH 001/330] Add libevent & AMQP-CPP libraries --- .gitmodules | 6 + CMakeLists.txt | 2 + cmake/find/amqpcpp.cmake | 20 + cmake/find/libevent.cmake | 22 + contrib/AMQP-CPP | 1 + contrib/CMakeLists.txt | 8 + contrib/amqpcpp-cmake/CMakeLists.txt | 44 ++ contrib/libevent | 1 + contrib/libevent-cmake/CMakeLists.txt | 42 ++ contrib/libevent-cmake/evconfig-private.h | 39 ++ contrib/libevent-cmake/event-config.h | 516 ++++++++++++++++++++++ src/CMakeLists.txt | 7 + 12 files changed, 708 insertions(+) create mode 100644 cmake/find/amqpcpp.cmake create mode 100644 cmake/find/libevent.cmake create mode 160000 contrib/AMQP-CPP create mode 100644 contrib/amqpcpp-cmake/CMakeLists.txt create mode 160000 contrib/libevent create mode 100644 contrib/libevent-cmake/CMakeLists.txt create mode 100644 contrib/libevent-cmake/evconfig-private.h create mode 100644 contrib/libevent-cmake/event-config.h diff --git a/.gitmodules b/.gitmodules index f7a16b84d37..bc4654e3b61 100644 --- a/.gitmodules +++ b/.gitmodules @@ -157,3 +157,9 @@ [submodule "contrib/openldap"] path = contrib/openldap url = https://github.com/openldap/openldap.git +[submodule "contrib/AMQP-CPP"] + path = contrib/AMQP-CPP + url = https://github.com/CopernicaMarketingSoftware/AMQP-CPP.git +[submodule "contrib/libevent"] + path = contrib/libevent + url = https://github.com/libevent/libevent.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 53dfd1df1cb..5e9a642c903 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -345,6 +345,8 @@ include (cmake/find/sparsehash.cmake) include (cmake/find/re2.cmake) include (cmake/find/libgsasl.cmake) include (cmake/find/rdkafka.cmake) +include (cmake/find/libevent.cmake) +include (cmake/find/amqpcpp.cmake) include (cmake/find/capnp.cmake) include (cmake/find/llvm.cmake) include (cmake/find/opencl.cmake) diff --git a/cmake/find/amqpcpp.cmake b/cmake/find/amqpcpp.cmake new file mode 100644 index 00000000000..147824ff395 --- /dev/null +++ b/cmake/find/amqpcpp.cmake @@ -0,0 +1,20 @@ +SET(ENABLE_AMQPCPP 1) +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt") + message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init --recursive") + set (ENABLE_AMQPCPP 0) +endif () + +if (ENABLE_AMQPCPP) + + set (USE_AMQPCPP 1) + set (AMQPCPP_LIBRARY AMQP-CPP) + + set (AMQPCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include") + + list (APPEND AMQPCPP_INCLUDE_DIR + "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include" + "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP") + +endif() + +message (STATUS "Using AMQP-CPP=${USE_AMQPCPP}: ${AMQPCPP_INCLUDE_DIR} : ${AMQPCPP_LIBRARY}") diff --git a/cmake/find/libevent.cmake b/cmake/find/libevent.cmake new file mode 100644 index 00000000000..2f714b43475 --- /dev/null +++ b/cmake/find/libevent.cmake @@ -0,0 +1,22 @@ +SET(ENABLE_LIBEVENT 1) +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libevent/CMakeLists.txt") + message (WARNING "submodule contrib/libevent is missing. to fix try run: + \n git submodule update --init --recursive") + + set (ENABLE_LIBEVENT 0) +endif () + +if (ENABLE_LIBEVENT) + + set (USE_LIBEVENT 1) + set (LIBEVENT_LIBRARY LIBEVENT) + + set (LIBEVENT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libevent") + + list (APPEND LIBEVENT_INCLUDE_DIR + "${ClickHouse_SOURCE_DIR}/contrib/libevent/include/event2" + "${ClickHouse_SOURCE_DIR}/contrib/libevent/include") + +endif() + +message (STATUS "Using libevent=${USE_LIBEVENT}: ${LIBEVENT_INCLUDE_DIR} : ${LIBEVENT_LIBRARY}") diff --git a/contrib/AMQP-CPP b/contrib/AMQP-CPP new file mode 160000 index 00000000000..1c08399ab0a --- /dev/null +++ b/contrib/AMQP-CPP @@ -0,0 +1 @@ +Subproject commit 1c08399ab0ab9e4042ef8e2bbe9e208e5dcbc13b diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1031285eac7..ea90f7129f2 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -317,3 +317,11 @@ endif() if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() + +if (USE_AMQPCPP) + add_subdirectory (amqpcpp-cmake) +endif() + +if (USE_LIBEVENT) + add_subdirectory(libevent-cmake) +endif() diff --git a/contrib/amqpcpp-cmake/CMakeLists.txt b/contrib/amqpcpp-cmake/CMakeLists.txt new file mode 100644 index 00000000000..eae3122e216 --- /dev/null +++ b/contrib/amqpcpp-cmake/CMakeLists.txt @@ -0,0 +1,44 @@ +set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP) + +set (SRCS + ${LIBRARY_DIR}/src/array.cpp + ${LIBRARY_DIR}/src/channel.cpp + ${LIBRARY_DIR}/src/channelimpl.cpp + ${LIBRARY_DIR}/src/connectionimpl.cpp + ${LIBRARY_DIR}/src/deferredcancel.cpp + ${LIBRARY_DIR}/src/deferredconfirm.cpp + ${LIBRARY_DIR}/src/deferredconsumer.cpp + ${LIBRARY_DIR}/src/deferredextreceiver.cpp + ${LIBRARY_DIR}/src/deferredget.cpp + ${LIBRARY_DIR}/src/deferredpublisher.cpp + ${LIBRARY_DIR}/src/deferredreceiver.cpp + ${LIBRARY_DIR}/src/field.cpp + ${LIBRARY_DIR}/src/flags.cpp + ${LIBRARY_DIR}/src/linux_tcp/openssl.cpp + ${LIBRARY_DIR}/src/linux_tcp/tcpconnection.cpp + ${LIBRARY_DIR}/src/receivedframe.cpp + ${LIBRARY_DIR}/src/table.cpp + ${LIBRARY_DIR}/src/watchable.cpp +) + +add_library(amqp-cpp ${SRCS}) + +target_compile_options (amqp-cpp + PUBLIC + -Wno-old-style-cast + -Wno-inconsistent-missing-destructor-override + -Wno-deprecated + -Wno-unused-parameter + -Wno-shadow + -Wno-tautological-type-limit-compare + -Wno-extra-semi +# NOTE: disable all warnings at last because the warning: + # "conversion function converting 'XXX' to itself will never be used" + # doesn't have it's own diagnostic flag yet. + -w +) + +target_include_directories (amqp-cpp PUBLIC ${LIBRARY_DIR}/include) + +target_link_libraries (amqp-cpp PUBLIC libevent ssl) + diff --git a/contrib/libevent b/contrib/libevent new file mode 160000 index 00000000000..eee26deed38 --- /dev/null +++ b/contrib/libevent @@ -0,0 +1 @@ +Subproject commit eee26deed38fc7a6b6780b54628b007a2810efcd diff --git a/contrib/libevent-cmake/CMakeLists.txt b/contrib/libevent-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f99bc221482 --- /dev/null +++ b/contrib/libevent-cmake/CMakeLists.txt @@ -0,0 +1,42 @@ +set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libevent) + +set(SRCS + ${LIBRARY_DIR}/buffer.c + ${LIBRARY_DIR}/bufferevent_filter.c + ${LIBRARY_DIR}/bufferevent_pair.c + ${LIBRARY_DIR}/bufferevent_ratelim.c + ${LIBRARY_DIR}/bufferevent_sock.c + ${LIBRARY_DIR}/bufferevent.c + ${LIBRARY_DIR}/event.c + ${LIBRARY_DIR}/evmap.c + ${LIBRARY_DIR}/evthread.c + ${LIBRARY_DIR}/evutil_rand.c + ${LIBRARY_DIR}/evutil_time.c + ${LIBRARY_DIR}/evutil.c + ${LIBRARY_DIR}/listener.c + ${LIBRARY_DIR}/log.c + ${LIBRARY_DIR}/signal.c + ${LIBRARY_DIR}/strlcpy.c + ${LIBRARY_DIR}/watch.c +) + +if (OS_LINUX) + list (APPEND SRCS + ${LIBRARY_DIR}/epoll.c + ${LIBRARY_DIR}/poll.c + ${LIBRARY_DIR}/select.c + ) + +elseif (OS_DARWIN) + list (APPEND SRCS ${LIBRARY_DIR}/kqueue.c) +endif () + +add_library(libevent ${SRCS}) + +target_compile_options (libevent PUBLIC -Wno-reserved-id-macro) + +if (OS_LINUX) + target_include_directories (libevent PUBLIC linux) +endif () + +target_include_directories (libevent PUBLIC ${LIBRARY_DIR}/include) diff --git a/contrib/libevent-cmake/evconfig-private.h b/contrib/libevent-cmake/evconfig-private.h new file mode 100644 index 00000000000..a39d2b71fbc --- /dev/null +++ b/contrib/libevent-cmake/evconfig-private.h @@ -0,0 +1,39 @@ +#ifndef EVCONFIG_PRIVATE_H_INCLUDED_ +#define EVCONFIG_PRIVATE_H_INCLUDED_ + +/* Enable extensions on AIX 3, Interix. */ +/* #undef _ALL_SOURCE */ + +/* Enable GNU extensions on systems that have them. */ +#define _GNU_SOURCE 1 + +/* Enable threading extensions on Solaris. */ +/* #undef _POSIX_PTHREAD_SEMANTICS */ + +/* Enable extensions on HP NonStop. */ +/* #undef _TANDEM_SOURCE */ + +/* Enable general extensions on Solaris. */ +/* #undef __EXTENSIONS__ */ + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ + +/* Define to 1 if on MINIX. */ +/* #undef _MINIX */ + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Enable POSIX.2 extensions on QNX for getopt */ +#ifdef __QNX__ +/* #undef __EXT_POSIX2 */ +#endif + +#endif diff --git a/contrib/libevent-cmake/event-config.h b/contrib/libevent-cmake/event-config.h new file mode 100644 index 00000000000..09067412490 --- /dev/null +++ b/contrib/libevent-cmake/event-config.h @@ -0,0 +1,516 @@ +/* event-config.h + * + * This file was generated by cmake when the makefiles were generated. + * + * DO NOT EDIT THIS FILE. + * + * Do not rely on macros in this file existing in later versions. + */ +#ifndef EVENT2_EVENT_CONFIG_H_INCLUDED_ +#define EVENT2_EVENT_CONFIG_H_INCLUDED_ + +/* Numeric representation of the version */ +#define EVENT__NUMERIC_VERSION 0x02020001 +#define EVENT__PACKAGE_VERSION "2.2.0" + +#define EVENT__VERSION_MAJOR 2 +#define EVENT__VERSION_MINOR 2 +#define EVENT__VERSION_PATCH 0 + +/* Version number of package */ +#define EVENT__VERSION "2.2.0-alpha-dev" + +/* Name of package */ +#define EVENT__PACKAGE "libevent" + +/* Define to the address where bug reports for this package should be sent. */ +#define EVENT__PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define EVENT__PACKAGE_NAME "" + +/* Define to the full name and version of this package. */ +#define EVENT__PACKAGE_STRING "" + +/* Define to the one symbol short name of this package. */ +#define EVENT__PACKAGE_TARNAME "" + +/* Define if libevent should build without support for a debug mode */ +/* #undef EVENT__DISABLE_DEBUG_MODE */ + +/* Define if libevent should not allow replacing the mm functions */ +/* #undef EVENT__DISABLE_MM_REPLACEMENT */ + +/* Define if libevent should not be compiled with thread support */ +/* #undef EVENT__DISABLE_THREAD_SUPPORT */ + +/* Define to 1 if you have the `accept4' function. */ +#define EVENT__HAVE_ACCEPT4 1 + +/* Define to 1 if you have the `arc4random' function. */ +/* #undef EVENT__HAVE_ARC4RANDOM */ + +/* Define to 1 if you have the `arc4random_buf' function. */ +/* #undef EVENT__HAVE_ARC4RANDOM_BUF */ + +/* Define to 1 if you have the `arc4random_addrandom' function. */ +/* #undef EVENT__HAVE_ARC4RANDOM_ADDRANDOM */ + +/* Define if clock_gettime is available in libc */ +#define EVENT__DNS_USE_CPU_CLOCK_FOR_ID 1 + +/* Define is no secure id variant is available */ +/* #undef EVENT__DNS_USE_GETTIMEOFDAY_FOR_ID */ +/* #undef EVENT__DNS_USE_FTIME_FOR_ID */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +#define EVENT__HAVE_CLOCK_GETTIME 1 + +/* Define to 1 if you have the declaration of `CTL_KERN'. */ +#define EVENT__HAVE_DECL_CTL_KERN 1 + +/* Define to 1 if you have the declaration of `KERN_ARND'. */ +#define EVENT__HAVE_DECL_KERN_ARND 0 + +/* Define to 1 if you have `getrandom' function. */ +#define EVENT__HAVE_GETRANDOM 1 + +/* Define if /dev/poll is available */ +/* #undef EVENT__HAVE_DEVPOLL */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_NETDB_H 1 + +/* Define to 1 if fd_mask type is defined */ +#define EVENT__HAVE_FD_MASK 1 + +/* Define to 1 if the header file defines TAILQ_FOREACH. */ +#define EVENT__HAVE_TAILQFOREACH 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_DLFCN_H 1 + +/* Define if your system supports the epoll system calls */ +#define EVENT__HAVE_EPOLL 1 + +/* Define to 1 if you have the `epoll_create1' function. */ +#define EVENT__HAVE_EPOLL_CREATE1 1 + +/* Define to 1 if you have the `epoll_ctl' function. */ +#define EVENT__HAVE_EPOLL_CTL 1 + +/* Define to 1 if you have the `eventfd' function. */ +#define EVENT__HAVE_EVENTFD 1 + +/* Define if your system supports event ports */ +/* #undef EVENT__HAVE_EVENT_PORTS */ + +/* Define to 1 if you have the `fcntl' function. */ +#define EVENT__HAVE_FCNTL 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define EVENT__HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `getegid' function. */ +#define EVENT__HAVE_GETEGID 1 + +/* Define to 1 if you have the `geteuid' function. */ +#define EVENT__HAVE_GETEUID 1 + +/* TODO: Check for different gethostname argument counts. CheckPrototypeDefinition.cmake can be used. */ +/* Define this if you have any gethostbyname_r() */ +#define EVENT__HAVE_GETHOSTBYNAME_R 1 + +/* Define this if gethostbyname_r takes 3 arguments */ +/* #undef EVENT__HAVE_GETHOSTBYNAME_R_3_ARG */ + +/* Define this if gethostbyname_r takes 5 arguments */ +/* #undef EVENT__HAVE_GETHOSTBYNAME_R_5_ARG */ + +/* Define this if gethostbyname_r takes 6 arguments */ +#define EVENT__HAVE_GETHOSTBYNAME_R_6_ARG 1 + +/* Define to 1 if you have the `getifaddrs' function. */ +#define EVENT__HAVE_GETIFADDRS 1 + +/* Define to 1 if you have the `getnameinfo' function. */ +#define EVENT__HAVE_GETNAMEINFO 1 + +/* Define to 1 if you have the `getprotobynumber' function. */ +#define EVENT__HAVE_GETPROTOBYNUMBER 1 + +/* Define to 1 if you have the `getservbyname' function. */ +#define EVENT__HAVE_GETSERVBYNAME 1 + +/* Define to 1 if you have the `gettimeofday' function. */ +#define EVENT__HAVE_GETTIMEOFDAY 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_IFADDRS_H 1 + +/* Define to 1 if you have the `inet_ntop' function. */ +#define EVENT__HAVE_INET_NTOP 1 + +/* Define to 1 if you have the `inet_pton' function. */ +#define EVENT__HAVE_INET_PTON 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `issetugid' function. */ +/* #undef EVENT__HAVE_ISSETUGID */ + +/* Define to 1 if you have the `kqueue' function. */ +/* #undef EVENT__HAVE_KQUEUE */ + +/* Define if the system has zlib */ +#define EVENT__HAVE_LIBZ 1 + +/* Define to 1 if you have the `mach_absolute_time' function. */ +/* #undef EVENT__HAVE_MACH_ABSOLUTE_TIME */ + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_MACH_MACH_TIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_MACH_MACH_H */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mmap' function. */ +#define EVENT__HAVE_MMAP 1 + +/* Define to 1 if you have the `nanosleep' function. */ +#define EVENT__HAVE_NANOSLEEP 1 + +/* Define to 1 if you have the `usleep' function. */ +#define EVENT__HAVE_USLEEP 1 + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_NETINET_IN6_H */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_NETINET_IN_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_NETINET_TCP_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_UN_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_AFUNIX_H */ + +/* Define if the system has openssl */ +#define EVENT__HAVE_OPENSSL 1 + +/* Define to 1 if you have the `pipe' function. */ +#define EVENT__HAVE_PIPE 1 + +/* Define to 1 if you have the `pipe2' function. */ +#define EVENT__HAVE_PIPE2 1 + +/* Define to 1 if you have the `poll' function. */ +#define EVENT__HAVE_POLL 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_POLL_H 1 + +/* Define to 1 if you have the `port_create' function. */ +/* #undef EVENT__HAVE_PORT_CREATE */ + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_PORT_H */ + +/* Define if we have pthreads on this system */ +#define EVENT__HAVE_PTHREADS 1 + +/* Define to 1 if you have the `putenv' function. */ +#define EVENT__HAVE_PUTENV 1 + +/* Define to 1 if the system has the type `sa_family_t'. */ +#define EVENT__HAVE_SA_FAMILY_T 1 + +/* Define to 1 if you have the `select' function. */ +#define EVENT__HAVE_SELECT 1 + +/* Define to 1 if you have the `setenv' function. */ +#define EVENT__HAVE_SETENV 1 + +/* Define if F_SETFD is defined in */ +#define EVENT__HAVE_SETFD 1 + +/* Define to 1 if you have the `setrlimit' function. */ +#define EVENT__HAVE_SETRLIMIT 1 + +/* Define to 1 if you have the `sendfile' function. */ +#define EVENT__HAVE_SENDFILE 1 + +/* Define to 1 if you have the `sigaction' function. */ +#define EVENT__HAVE_SIGACTION 1 + +/* Define to 1 if you have the `signal' function. */ +#define EVENT__HAVE_SIGNAL 1 + +/* Define to 1 if you have the `strsignal' function. */ +#define EVENT__HAVE_STRSIGNAL 1 + +/* Define to 1 if you have the `splice' function. */ +#define EVENT__HAVE_SPLICE 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STDDEF_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STRING_H 1 + +/* Define to 1 if you have the `strlcpy' function. */ +/* #undef EVENT__HAVE_STRLCPY */ + +/* Define to 1 if you have the `strsep' function. */ +#define EVENT__HAVE_STRSEP 1 + +/* Define to 1 if you have the `strtok_r' function. */ +#define EVENT__HAVE_STRTOK_R 1 + +/* Define to 1 if you have the `strtoll' function. */ +#define EVENT__HAVE_STRTOLL 1 + +/* Define to 1 if you have the `_gmtime64_s' function. */ +/* #undef EVENT__HAVE__GMTIME64_S */ + +/* Define to 1 if you have the `_gmtime64' function. */ +/* #undef EVENT__HAVE__GMTIME64 */ + +/* Define to 1 if the system has the type `struct addrinfo'. */ +#define EVENT__HAVE_STRUCT_ADDRINFO 1 + +/* Define to 1 if the system has the type `struct in6_addr'. */ +#define EVENT__HAVE_STRUCT_IN6_ADDR 1 + +/* Define to 1 if `s6_addr16' is member of `struct in6_addr'. */ +#define EVENT__HAVE_STRUCT_IN6_ADDR_S6_ADDR16 1 + +/* Define to 1 if `s6_addr32' is member of `struct in6_addr'. */ +#define EVENT__HAVE_STRUCT_IN6_ADDR_S6_ADDR32 1 + +/* Define to 1 if the system has the type `struct sockaddr_in6'. */ +#define EVENT__HAVE_STRUCT_SOCKADDR_IN6 1 + +/* Define to 1 if `sin6_len' is member of `struct sockaddr_in6'. */ +/* #undef EVENT__HAVE_STRUCT_SOCKADDR_IN6_SIN6_LEN */ + +/* Define to 1 if `sin_len' is member of `struct sockaddr_in'. */ +/* #undef EVENT__HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ + +/* Define to 1 if the system has the type `struct sockaddr_un'. */ +#define EVENT__HAVE_STRUCT_SOCKADDR_UN 1 + +/* Define to 1 if the system has the type `struct sockaddr_storage'. */ +#define EVENT__HAVE_STRUCT_SOCKADDR_STORAGE 1 + +/* Define to 1 if `ss_family' is a member of `struct sockaddr_storage'. */ +#define EVENT__HAVE_STRUCT_SOCKADDR_STORAGE_SS_FAMILY 1 + +/* Define to 1 if `__ss_family' is a member of `struct sockaddr_storage'. */ +/* #undef EVENT__HAVE_STRUCT_SOCKADDR_STORAGE___SS_FAMILY */ + +/* Define to 1 if the system has the type `struct linger'. */ +#define EVENT__HAVE_STRUCT_LINGER 1 + +/* Define to 1 if you have the `sysctl' function. */ +/* #undef EVENT__HAVE_SYSCTL */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_EPOLL_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_EVENTFD_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_SYS_EVENT_H */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_IOCTL_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_MMAN_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_QUEUE_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_RESOURCE_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_SENDFILE_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_RANDOM_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_SYS_SYSCTL_H */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_TIMERFD_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_UIO_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_ERRNO_H 1 + +/* Define if TAILQ_FOREACH is defined in */ +#define EVENT__HAVE_TAILQFOREACH 1 + +/* Define if timeradd is defined in */ +#define EVENT__HAVE_TIMERADD 1 + +/* Define if timerclear is defined in */ +#define EVENT__HAVE_TIMERCLEAR 1 + +/* Define if timercmp is defined in */ +#define EVENT__HAVE_TIMERCMP 1 + + +/* Define to 1 if you have the `timerfd_create' function. */ +#define EVENT__HAVE_TIMERFD_CREATE 1 + +/* Define if timerisset is defined in */ +#define EVENT__HAVE_TIMERISSET 1 + +/* Define to 1 if the system has the type `uint8_t'. */ +#define EVENT__HAVE_UINT8_T 1 + +/* Define to 1 if the system has the type `uint16_t'. */ +#define EVENT__HAVE_UINT16_T 1 + +/* Define to 1 if the system has the type `uint32_t'. */ +#define EVENT__HAVE_UINT32_T 1 + +/* Define to 1 if the system has the type `uint64_t'. */ +#define EVENT__HAVE_UINT64_T 1 + +/* Define to 1 if the system has the type `uintptr_t'. */ +#define EVENT__HAVE_UINTPTR_T 1 + +/* Define to 1 if you have the `umask' function. */ +#define EVENT__HAVE_UMASK 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_UNISTD_H 1 + +/* Define to 1 if you have the `unsetenv' function. */ +#define EVENT__HAVE_UNSETENV 1 + +/* Define to 1 if you have the `vasprintf' function. */ +#define EVENT__HAVE_VASPRINTF 1 + +/* Define if kqueue works correctly with pipes */ +/* #undef EVENT__HAVE_WORKING_KQUEUE */ + +#ifdef __USE_UNUSED_DEFINITIONS__ +/* Define to necessary symbol if this constant uses a non-standard name on your system. */ +/* XXX: Hello, this isn't even used, nor is it defined anywhere... - Ellzey */ +#define EVENT__PTHREAD_CREATE_JOINABLE +#endif + +/* The size of `pthread_t', as computed by sizeof. */ +#define EVENT__SIZEOF_PTHREAD_T 8 + +/* The size of a `int', as computed by sizeof. */ +#define EVENT__SIZEOF_INT 4 + +/* The size of a `long', as computed by sizeof. */ +#define EVENT__SIZEOF_LONG 8 + +/* The size of a `long long', as computed by sizeof. */ +#define EVENT__SIZEOF_LONG_LONG 8 + +/* The size of `off_t', as computed by sizeof. */ +#define EVENT__SIZEOF_OFF_T 8 + +#define EVENT__SIZEOF_SSIZE_T 8 + + +/* The size of a `short', as computed by sizeof. */ +#define EVENT__SIZEOF_SHORT 2 + +/* The size of `size_t', as computed by sizeof. */ +#define EVENT__SIZEOF_SIZE_T 8 + +/* Define to 1 if you can safely include both and . */ +/* #undef EVENT__TIME_WITH_SYS_TIME */ + +/* The size of `socklen_t', as computed by sizeof. */ +#define EVENT__SIZEOF_SOCKLEN_T 4 + +/* The size of 'void *', as computer by sizeof */ +#define EVENT__SIZEOF_VOID_P 8 + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* why not c++? + * + * and are we really expected to use EVENT__inline everywhere, + * shouldn't we just do: + * ifdef EVENT__inline + * define inline EVENT__inline + * + * - Ellzey + */ + +#define EVENT__inline inline +#endif + +#define EVENT__HAVE___func__ 1 +#define EVENT__HAVE___FUNCTION__ 1 + +/* Define to `unsigned' if does not define. */ +#define EVENT__size_t size_t + +/* Define to unsigned int if you dont have it */ +#define EVENT__socklen_t socklen_t + +/* Define to `int' if does not define. */ +#define EVENT__ssize_t ssize_t + +#endif /* \EVENT2_EVENT_CONFIG_H_INCLUDED_ */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 222a3e486f9..f720b5c1c85 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -72,6 +72,10 @@ if(USE_RDKAFKA) add_headers_and_sources(dbms Storages/Kafka) endif() +if (USE_AMQPCPP) + add_headers_and_sources(dbms Storages/RabbitMQ) +endif() + if (USE_AWS_S3) add_headers_and_sources(dbms Disks/S3) endif() @@ -253,6 +257,9 @@ if (USE_RDKAFKA) endif() endif() +if (USE_AMQPCPP) + dbms_target_link_libraries(PUBLIC amqp-cpp) +endif() if(RE2_INCLUDE_DIR) target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) From 3b75f214c59d1a674f283a9f4675005ef5f04f61 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 08:30:38 +0300 Subject: [PATCH 002/330] Register RabbitMQ storage --- src/Core/Settings.h | 1 + src/Core/config_core.h.in | 1 + src/Storages/RabbitMQ/RabbitMQSettings.cpp | 44 ++++ src/Storages/RabbitMQ/RabbitMQSettings.h | 26 +++ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 249 +++++++++++++++++++++ src/Storages/RabbitMQ/StorageRabbitMQ.h | 62 +++++ src/Storages/registerStorages.cpp | 4 + src/Storages/registerStorages.h | 4 + 8 files changed, 391 insertions(+) create mode 100644 src/Storages/RabbitMQ/RabbitMQSettings.cpp create mode 100644 src/Storages/RabbitMQ/RabbitMQSettings.h create mode 100644 src/Storages/RabbitMQ/StorageRabbitMQ.cpp create mode 100644 src/Storages/RabbitMQ/StorageRabbitMQ.h diff --git a/src/Core/Settings.h b/src/Core/Settings.h index eda76584f0b..9cd6287a75d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -71,6 +71,7 @@ struct Settings : public SettingsCollection M(SettingMilliseconds, connection_pool_max_wait_ms, 0, "The wait time when the connection pool is full.", 0) \ M(SettingMilliseconds, replace_running_query_max_wait_ms, 5000, "The wait time for running query with the same query_id to finish when setting 'replace_running_query' is active.", 0) \ M(SettingMilliseconds, kafka_max_wait_ms, 5000, "The wait time for reading from Kafka before retry.", 0) \ + M(SettingMilliseconds, rabbitmq_max_wait_ms, 5000, "The wait time for reading from RabbitMQ before retry.", 0) \ M(SettingUInt64, poll_interval, DBMS_DEFAULT_POLL_INTERVAL, "Block at the query wait loop on the server for the specified number of seconds.", 0) \ M(SettingUInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \ M(SettingUInt64, distributed_connections_pool_size, DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE, "Maximum number of connections with one remote server in the pool.", 0) \ diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 620c23c21cc..5991c12a1f2 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -5,6 +5,7 @@ #cmakedefine01 USE_ICU #cmakedefine01 USE_MYSQL #cmakedefine01 USE_RDKAFKA +#cmakedefine01 USE_AMQPCPP #cmakedefine01 USE_EMBEDDED_COMPILER #cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_SSL diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.cpp b/src/Storages/RabbitMQ/RabbitMQSettings.cpp new file mode 100644 index 00000000000..ed8d4ad801a --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQSettings.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_COLLECTION(RabbitMQSettings, LIST_OF_RABBITMQ_SETTINGS) + +void RabbitMQSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); + else + e.rethrow(); + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} +} + diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h new file mode 100644 index 00000000000..0b0f58169fa --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace DB +{ + class ASTStorage; + + struct RabbitMQSettings : public SettingsCollection + { + +#define LIST_OF_RABBITMQ_SETTINGS(M) \ + M(SettingString, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ + M(SettingString, rabbitmq_routing_key, "5672", "A routing key to connect producer->exchange->queue<->consumer.", 0) \ + M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exhange name, to which messages are sent. Needed to bind queues to it.", 0) \ + M(SettingString, rabbitmq_format, "", "The message format.", 0) \ + M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ + M(SettingUInt64, rabbitmq_bind_by_id, 0, "A flag which indicates that binding should be done in range [0, num_consumers).", 0) \ + M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ + M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ + M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ + + DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) + + void loadFromQuery(ASTStorage & storage_def); + }; +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp new file mode 100644 index 00000000000..98e7e97e2e1 --- /dev/null +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +StorageRabbitMQ::StorageRabbitMQ( + const StorageID & table_id_, + Context & context_, + const ColumnsDescription & columns_, + const String & host_port_, + const String & routing_key_, + const String & exchange_name_, + const String & format_name_, + char row_delimiter_, + size_t num_consumers_, + bool bind_by_id_, + size_t num_queues_, + bool hash_exchange_) + : IStorage(table_id_) + , global_context(context_.getGlobalContext()) + , rabbitmq_context(Context(global_context)) + , routing_key(global_context.getMacros()->expand(routing_key_)) + , exchange_name(exchange_name_) + , format_name(global_context.getMacros()->expand(format_name_)) + , row_delimiter(row_delimiter_) + , num_consumers(num_consumers_) + , bind_by_id(bind_by_id_) + , num_queues(num_queues_) + , hash_exchange(hash_exchange_) + , log(&Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) + , semaphore(0, num_consumers_) + , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) +{ +} + + +void registerStorageRabbitMQ(StorageFactory & factory) +{ + auto creator_fn = [](const StorageFactory::Arguments & args) + { + ASTs & engine_args = args.engine_args; + size_t args_count = engine_args.size(); + bool has_settings = args.storage_def->settings; + + RabbitMQSettings rabbitmq_settings; + if (has_settings) + { + rabbitmq_settings.loadFromQuery(*args.storage_def); + } + + String host_port = rabbitmq_settings.rabbitmq_host_port; + if (args_count >= 1) + { + const auto * ast = engine_args[0]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + host_port = safeGet(ast->value); + } + else + { + throw Exception(String("RabbitMQ host:port must be a string"), ErrorCodes::BAD_ARGUMENTS); + } + } + + String routing_key = rabbitmq_settings.rabbitmq_routing_key.value; + if (args_count >= 2) + { + const auto * ast = engine_args[1]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + routing_key = safeGet(ast->value); + } + else + { + throw Exception(String("RabbitMQ routing key must be a string"), ErrorCodes::BAD_ARGUMENTS); + } + } + + String exchange = rabbitmq_settings.rabbitmq_exchange_name.value; + if (args_count >= 3) + { + engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context); + + const auto * ast = engine_args[2]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + exchange = safeGet(ast->value); + } + } + + String format = rabbitmq_settings.rabbitmq_format.value; + if (args_count >= 4) + { + engine_args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[3], args.local_context); + + const auto * ast = engine_args[3]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + format = safeGet(ast->value); + } + else + { + throw Exception("Format must be a string", ErrorCodes::BAD_ARGUMENTS); + } + } + + char row_delimiter = rabbitmq_settings.rabbitmq_row_delimiter; + if (args_count >= 5) + { + engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], args.local_context); + + const auto * ast = engine_args[4]->as(); + String arg; + if (ast && ast->value.getType() == Field::Types::String) + { + arg = safeGet(ast->value); + } + else + { + throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS); + } + if (arg.size() > 1) + { + throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS); + } + else if (arg.empty()) + { + row_delimiter = '\0'; + } + else + { + row_delimiter = arg[0]; + } + } + + size_t bind_by_id = static_cast(rabbitmq_settings.rabbitmq_bind_by_id); + if (args_count >= 6) + { + const auto * ast = engine_args[5]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + bind_by_id = static_cast(safeGet(ast->value)); + } + else + { + throw Exception("Hash exchange flag must be a boolean", ErrorCodes::BAD_ARGUMENTS); + } + } + + UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; + if (args_count >= 7) + { + const auto * ast = engine_args[6]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + num_consumers = safeGet(ast->value); + } + else + { + throw Exception("Number of consumers must be a positive integer", ErrorCodes::BAD_ARGUMENTS); + } + } + + UInt64 num_queues = rabbitmq_settings.rabbitmq_num_queues; + if (args_count >= 8) + { + const auto * ast = engine_args[7]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + num_consumers = safeGet(ast->value); + } + else + { + throw Exception("Number of queues must be a positive integer", ErrorCodes::BAD_ARGUMENTS); + } + } + + size_t hash_exchange = static_cast(rabbitmq_settings.rabbitmq_hash_exchange); + if (args_count >= 9) + { + const auto * ast = engine_args[8]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + hash_exchange = static_cast(safeGet(ast->value)); + } + else + { + throw Exception("Hash exchange flag must be a boolean", ErrorCodes::BAD_ARGUMENTS); + } + } + + return StorageRabbitMQ::create(args.table_id, args.context, args.columns, host_port, routing_key, exchange, + format, row_delimiter, num_consumers, bind_by_id, num_queues, hash_exchange); + }; + + factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); + +} + + +NamesAndTypesList StorageRabbitMQ::getVirtuals() const +{ + return NamesAndTypesList{ + {"_exchange", std::make_shared()}, + {"_routingKey", std::make_shared()} + }; +} + +} + diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h new file mode 100644 index 00000000000..37b8c2b1078 --- /dev/null +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -0,0 +1,62 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +using ChannelPtr = std::shared_ptr; + +class StorageRabbitMQ final: public ext::shared_ptr_helper, public IStorage +{ + friend struct ext::shared_ptr_helper; + +public: + std::string getName() const override { return "RabbitMQ"; } + bool supportsSettings() const override { return true; } + +protected: + StorageRabbitMQ( + const StorageID & table_id_, + Context & context_, + const ColumnsDescription & columns_, + const String & host_port_, + const String & routing_key_, const String & exchange_name_, + const String & format_name_, char row_delimiter_, + size_t num_consumers_, bool bind_by_id_, size_t num_queues_, bool hash_exchange); + +private: + Context global_context; + Context rabbitmq_context; + + String routing_key; + const String exchange_name; + + const String format_name; + char row_delimiter; + size_t num_consumers; + size_t num_created_consumers = 0; + + bool bind_by_id; + size_t num_queues; + const bool hash_exchange; + + Poco::Logger * log; + std::pair parsed_address; + + Poco::Semaphore semaphore; + std::mutex mutex; + + size_t consumer_id = 0; + + BackgroundSchedulePool::TaskHolder task; + std::atomic stream_cancelled{false}; +}; + +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 5ad26b70803..c349a4e5c8f 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -46,6 +46,10 @@ void registerStorages() #if USE_RDKAFKA registerStorageKafka(factory); #endif + + #if USE_AMQPCPP + registerStorageRabbitMQ(factory); + #endif } } diff --git a/src/Storages/registerStorages.h b/src/Storages/registerStorages.h index c9874551073..2823f5c2d2c 100644 --- a/src/Storages/registerStorages.h +++ b/src/Storages/registerStorages.h @@ -47,6 +47,10 @@ void registerStorageMySQL(StorageFactory & factory); void registerStorageKafka(StorageFactory & factory); #endif +#if USE_AMQPCPP +void registerStorageRabbitMQ(StorageFactory & factory); +#endif + void registerStorages(); } From 41b99edc044ae0fc820d99a9e8e69a59a27ca76c Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 09:22:12 +0300 Subject: [PATCH 003/330] Add base for RabbitMQ integration tests --- .../compose/docker_compose_rabbitmq.yml | 12 ++ tests/integration/helpers/cluster.py | 24 +++- .../test_storage_rabbitmq/__init__.py | 0 .../configs/log_conf.xml | 11 ++ .../configs/rabbitmq.xml | 5 + .../test_storage_rabbitmq/configs/users.xml | 25 ++++ .../integration/test_storage_rabbitmq/test.py | 123 ++++++++++++++++++ .../test_rabbitmq_json.reference | 50 +++++++ 8 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 docker/test/integration/compose/docker_compose_rabbitmq.yml create mode 100644 tests/integration/test_storage_rabbitmq/__init__.py create mode 100644 tests/integration/test_storage_rabbitmq/configs/log_conf.xml create mode 100644 tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml create mode 100644 tests/integration/test_storage_rabbitmq/configs/users.xml create mode 100644 tests/integration/test_storage_rabbitmq/test.py create mode 100644 tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference diff --git a/docker/test/integration/compose/docker_compose_rabbitmq.yml b/docker/test/integration/compose/docker_compose_rabbitmq.yml new file mode 100644 index 00000000000..7ebee3c0ea5 --- /dev/null +++ b/docker/test/integration/compose/docker_compose_rabbitmq.yml @@ -0,0 +1,12 @@ +version: '2.2' + +services: + rabbitmq1: + image: rabbitmq:3-management + hostname: rabbitmq1 + ports: + - "5672:5672" + - "15672:15672" + environment: + RABBITMQ_DEFAULT_USER: "root" + RABBITMQ_DEFAULT_PASS: "clickhouse" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 53c36ff8924..6d9ca1b7861 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -108,12 +108,14 @@ class ClickHouseCluster: self.base_zookeeper_cmd = None self.base_mysql_cmd = [] self.base_kafka_cmd = [] + self.base_rabbitmq_cmd = [] self.pre_zookeeper_commands = [] self.instances = {} self.with_zookeeper = False self.with_mysql = False self.with_postgres = False self.with_kafka = False + self.with_rabbitmq = False self.with_odbc_drivers = False self.with_hdfs = False self.with_mongo = False @@ -143,7 +145,7 @@ class ClickHouseCluster: return cmd def add_instance(self, name, config_dir=None, main_configs=None, user_configs=None, macros=None, - with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, + with_zookeeper=False, with_mysql=False, with_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, with_minio=False, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", @@ -167,7 +169,7 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs or [], user_configs or [], macros or {}, with_zookeeper, - self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, + self.zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, self.base_configs_dir, self.server_bin_path, self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables or {}, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, @@ -231,6 +233,13 @@ class ClickHouseCluster: self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_kafka.yml')] cmds.append(self.base_kafka_cmd) + if with_rabbitmq and not self.with_rabbitmq: + self.with_rabbitmq = True + self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_rabbitmq.yml')]) + self.base_rabbitmq_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_rabbitmq.yml')] + cmds.append(self.base_rabbitmq_cmd) + if with_hdfs and not self.with_hdfs: self.with_hdfs = True self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_hdfs.yml')]) @@ -482,6 +491,10 @@ class ClickHouseCluster: self.kafka_docker_id = self.get_instance_docker_id('kafka1') self.wait_schema_registry_to_start(120) + if self.with_rabbitmq and self.base_rabbitmq_cmd: + subprocess_check_call(self.base_rabbitmq_cmd + common_opts + ['--renew-anon-volumes']) + self.rabbitmq_docker_id = self.get_instance_docker_id('rabbitmq1') + if self.with_hdfs and self.base_hdfs_cmd: subprocess_check_call(self.base_hdfs_cmd + common_opts) self.wait_hdfs_to_start(120) @@ -621,7 +634,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", @@ -648,6 +661,7 @@ class ClickHouseInstance: self.with_mysql = with_mysql self.with_kafka = with_kafka + self.with_rabbitmq = with_rabbitmq self.with_mongo = with_mongo self.with_redis = with_redis self.with_minio = with_minio @@ -993,6 +1007,9 @@ class ClickHouseInstance: depends_on.append("kafka1") depends_on.append("schema-registry") + if self.with_rabbitmq: + depends_on.append("rabbitmq1") + if self.with_zookeeper: depends_on.append("zoo1") depends_on.append("zoo2") @@ -1072,3 +1089,4 @@ class ClickHouseKiller(object): def __exit__(self, exc_type, exc_val, exc_tb): self.clickhouse_node.restore_clickhouse() + diff --git a/tests/integration/test_storage_rabbitmq/__init__.py b/tests/integration/test_storage_rabbitmq/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_rabbitmq/configs/log_conf.xml b/tests/integration/test_storage_rabbitmq/configs/log_conf.xml new file mode 100644 index 00000000000..f9d15e572aa --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/log_conf.xml @@ -0,0 +1,11 @@ + + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml b/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml new file mode 100644 index 00000000000..33a8a43fb1a --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml @@ -0,0 +1,5 @@ + + + earliest + + diff --git a/tests/integration/test_storage_rabbitmq/configs/users.xml b/tests/integration/test_storage_rabbitmq/configs/users.xml new file mode 100644 index 00000000000..246e6b069ef --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/users.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py new file mode 100644 index 00000000000..475b89f6c60 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -0,0 +1,123 @@ +import os.path as p +import random +import threading +import time +import pytest + +from random import randrange +import pika +from sys import getdefaultencoding + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.client import QueryRuntimeException +from helpers.network import PartitionManager + +import json +import subprocess + +from google.protobuf.internal.encoder import _VarintBytes + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + config_dir='configs', + main_configs=['configs/rabbitmq.xml','configs/log_conf.xml'], + with_rabbitmq=True) +rabbitmq_id = '' + + +# Helpers + +def check_rabbitmq_is_available(): + p = subprocess.Popen(('docker', + 'exec', + '-i', + rabbitmq_id, + 'rabbitmqctl', + 'await_startup'), + stdout=subprocess.PIPE) + p.communicate() + return p.returncode == 0 + + +def enable_consistent_hash_plugin(): + p = subprocess.Popen(('docker', + 'exec', + '-i', + rabbitmq_id, + "rabbitmq-plugins", "enable", "rabbitmq_consistent_hash_exchange"), + stdout=subprocess.PIPE) + p.communicate() + return p.returncode == 0 + + +def wait_rabbitmq_is_available(max_retries=50): + retries = 0 + while True: + if check_rabbitmq_is_available(): + break + else: + retries += 1 + if retries > max_retries: + raise "RabbitMQ is not available" + print("Waiting for RabbitMQ to start up") + time.sleep(1) + + +def wait_rabbitmq_plugin_enabled(max_retries=50): + retries = 0 + while True: + if enable_consistent_hash_plugin(): + break + else: + retries += 1 + if retries > max_retries: + raise "RabbitMQ plugin is not available" + print("Waiting for plugin") + time.sleep(1) + + +def rabbitmq_check_result(result, check=False, ref_file='test_rabbitmq_json.reference'): + fpath = p.join(p.dirname(__file__), ref_file) + with open(fpath) as reference: + if check: + assert TSV(result) == TSV(reference) + else: + return TSV(result) == TSV(reference) + + +# Fixtures + +@pytest.fixture(scope="module") +def rabbitmq_cluster(): + try: + global rabbitmq_id + cluster.start() + rabbitmq_id = instance.cluster.rabbitmq_docker_id + print("rabbitmq_id is {}".format(rabbitmq_id)) + instance.query('CREATE DATABASE test') + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def rabbitmq_setup_teardown(): + wait_rabbitmq_is_available() + wait_rabbitmq_plugin_enabled() + print("RabbitMQ is available - running test") + yield # run test + instance.query('DROP TABLE IF EXISTS test.rabbitmq') + + +# Tests + + + +if __name__ == '__main__': + cluster.start() + raw_input("Cluster created, press any key to destroy...") + cluster.shutdown() + diff --git a/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference b/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference new file mode 100644 index 00000000000..959bb2aad74 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference @@ -0,0 +1,50 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 From aeffab3fdb07f7d7ca60f3e9791181a8657c15d2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 09:40:49 +0000 Subject: [PATCH 004/330] Enable SELECT and CREATE MV queries with engine RabbitMQ --- src/Storages/RabbitMQ/Buffer_fwd.h | 11 + .../RabbitMQ/RabbitMQBlockInputStream.cpp | 156 ++++++++++ .../RabbitMQ/RabbitMQBlockInputStream.h | 41 +++ src/Storages/RabbitMQ/RabbitMQHandler.cpp | 32 +++ src/Storages/RabbitMQ/RabbitMQHandler.h | 28 ++ src/Storages/RabbitMQ/RabbitMQSettings.cpp | 2 - src/Storages/RabbitMQ/RabbitMQSettings.h | 3 +- .../ReadBufferFromRabbitMQConsumer.cpp | 268 ++++++++++++++++++ .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 81 ++++++ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 232 ++++++++++++++- src/Storages/RabbitMQ/StorageRabbitMQ.h | 38 ++- 11 files changed, 883 insertions(+), 9 deletions(-) create mode 100644 src/Storages/RabbitMQ/Buffer_fwd.h create mode 100644 src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp create mode 100644 src/Storages/RabbitMQ/RabbitMQBlockInputStream.h create mode 100644 src/Storages/RabbitMQ/RabbitMQHandler.cpp create mode 100644 src/Storages/RabbitMQ/RabbitMQHandler.h create mode 100644 src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp create mode 100644 src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h diff --git a/src/Storages/RabbitMQ/Buffer_fwd.h b/src/Storages/RabbitMQ/Buffer_fwd.h new file mode 100644 index 00000000000..f0ef010c518 --- /dev/null +++ b/src/Storages/RabbitMQ/Buffer_fwd.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +namespace DB +{ + +class ReadBufferFromRabbitMQConsumer; +using ConsumerBufferPtr = std::shared_ptr; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp new file mode 100644 index 00000000000..89ea490e842 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -0,0 +1,156 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +RabbitMQBlockInputStream::RabbitMQBlockInputStream( + StorageRabbitMQ & storage_, const Context & context_, const Names & columns, Poco::Logger * log_) + : storage(storage_) + , context(context_) + , column_names(columns) + , log(log_) + , non_virtual_header(storage.getSampleBlockNonMaterialized()) + , virtual_header(storage.getSampleBlockForColumns({"_exchange", "_routingKey"})) +{ +} + + +RabbitMQBlockInputStream::~RabbitMQBlockInputStream() +{ + if (!claimed) + return; + + storage.pushReadBuffer(buffer); +} + + +Block RabbitMQBlockInputStream::getHeader() const +{ + return storage.getSampleBlockForColumns(column_names); +} + + +void RabbitMQBlockInputStream::readPrefixImpl() +{ + auto timeout = std::chrono::milliseconds(context.getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds()); + + buffer = storage.popReadBuffer(timeout); + claimed = !!buffer; + + if (!buffer || finished) + return; + + buffer->subscribeConsumer(); +} + + +Block RabbitMQBlockInputStream::readImpl() +{ + if (!buffer || finished) + return Block(); + + finished = true; + + MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); + MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); + + auto input_format = FormatFactory::instance().getInputFormat( + storage.getFormatName(), *buffer, non_virtual_header, context, 1); + + InputPort port(input_format->getPort().getHeader(), input_format.get()); + connect(input_format->getPort(), port); + port.setNeeded(); + + auto read_rabbitmq_message = [&] + { + size_t new_rows = 0; + + while (true) + { + auto status = input_format->prepare(); + + switch (status) + { + case IProcessor::Status::Ready: + input_format->work(); + break; + + case IProcessor::Status::Finished: + input_format->resetParser(); + return new_rows; + + case IProcessor::Status::PortFull: + { + auto chunk = port.pull(); + + auto chunk_rows = chunk.getNumRows(); + new_rows += chunk_rows; + + auto columns = chunk.detachColumns(); + + for (size_t i = 0, s = columns.size(); i < s; ++i) + { + result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size()); + } + break; + } + case IProcessor::Status::NeedData: + case IProcessor::Status::Async: + case IProcessor::Status::Wait: + case IProcessor::Status::ExpandPipeline: + throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR); + } + } + }; + + size_t total_rows = 0; + + while (true) + { + if (buffer->eof()) + break; + + auto new_rows = read_rabbitmq_message(); + + auto _exchange = storage.getExchangeName(); + auto _routingKey = storage.getRoutingKey(); + + for (size_t i = 0; i < new_rows; ++i) + { + virtual_columns[0]->insert(_exchange); + virtual_columns[1]->insert(_routingKey); + } + + total_rows = total_rows + new_rows; + buffer->allowNext(); + + if (!new_rows || !checkTimeLimit()) + break; + } + + if (total_rows == 0) + return Block(); + + auto result_block = non_virtual_header.cloneWithColumns(std::move(result_columns)); + auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); + + LOG_DEBUG(log, "Total amount of rows is " + std::to_string(result_block.rows())); + + for (const auto & column : virtual_block.getColumnsWithTypeAndName()) + { + result_block.insert(column); + } + + return ConvertingBlockInputStream( + std::make_shared(result_block), + getHeader(), + ConvertingBlockInputStream::MatchColumnsMode::Name) + .read(); +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h new file mode 100644 index 00000000000..c82fd68a680 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ +class RabbitMQBlockInputStream : public IBlockInputStream +{ + +public: + RabbitMQBlockInputStream( + StorageRabbitMQ & storage_, + const Context & context_, + const Names & columns, + Poco::Logger * log_); + + ~RabbitMQBlockInputStream() override; + + String getName() const override { return storage.getName(); } + Block getHeader() const override; + + void readPrefixImpl() override; + Block readImpl() override; + //void readSuffixImpl() override; + +private: + StorageRabbitMQ & storage; + Context context; + Names column_names; + Poco::Logger * log; + bool finished = false, claimed = false; + const Block non_virtual_header, virtual_header; + + ConsumerBufferPtr buffer; +}; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp new file mode 100644 index 00000000000..b18d6bf2cfb --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -0,0 +1,32 @@ +#include +#include + +namespace DB +{ + +RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : + LibEventHandler(evbase_), + evbase(evbase_), + log(log_) +{ +} + + +void RabbitMQHandler::onError(AMQP::TcpConnection * /*connection*/, const char * message) +{ + LOG_ERROR(log, "Library error report: " << message); + stop(); +} + + +void RabbitMQHandler::startNonBlock() +{ + event_base_loop(evbase, EVLOOP_NONBLOCK); +} + +void RabbitMQHandler::stop() +{ + event_base_loopbreak(evbase); +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h new file mode 100644 index 00000000000..94a559cad38 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class RabbitMQHandler : public AMQP::LibEventHandler +{ + +public: + RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); + + void onError(AMQP::TcpConnection * connection, const char * message) override; + void startNonBlock(); + void stop(); + +private: + event_base * evbase; + Poco::Logger * log; +}; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.cpp b/src/Storages/RabbitMQ/RabbitMQSettings.cpp index ed8d4ad801a..efb73396515 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSettings.cpp @@ -5,7 +5,6 @@ #include #include - namespace DB { @@ -41,4 +40,3 @@ void RabbitMQSettings::loadFromQuery(ASTStorage & storage_def) } } } - diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 0b0f58169fa..f4c62756703 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -1,4 +1,5 @@ #pragma once + #include namespace DB @@ -14,7 +15,7 @@ namespace DB M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exhange name, to which messages are sent. Needed to bind queues to it.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ - M(SettingUInt64, rabbitmq_bind_by_id, 0, "A flag which indicates that binding should be done in range [0, num_consumers).", 0) \ + M(SettingUInt64, rabbitmq_bind_by_id, 0, "A flag which indicates that binding should be done in range [0, num_consumers * num_queues).", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp new file mode 100644 index 00000000000..a9f804aaa02 --- /dev/null +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -0,0 +1,268 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( + std::pair & parsed_address, + const String & exchange_name_, + const String & routing_key_, + const size_t channel_id_, + Poco::Logger * log_, + char row_delimiter_, + const bool bind_by_id_, + const bool hash_exchange_, + const size_t num_queues_, + const std::atomic & stopped_) + : ReadBuffer(nullptr, 0) + , evbase(event_base_new()) + , eventHandler(evbase, log) + , connection(&eventHandler, + AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , exchange_name(exchange_name_) + , routing_key(routing_key_) + , channel_id(channel_id_) + , log(log_) + , row_delimiter(row_delimiter_) + , bind_by_id(bind_by_id_) + , hash_exchange(hash_exchange_) + , num_queues(num_queues_) + , stopped(stopped_) +{ + /* It turned out to be very important to make a different connection each time the object of this class is created, + * because in case when num_consumers > 1 - inputStreams run asynchronously and if they share the same connection, + * then they also will share the same event loop. But it will mean that if one stream's consumer starts event loop, + * then it will run all callbacks on the connection - including other stream's consumer's callbacks - + * it result in asynchronous run of the same code and lead to occasional seg faults. + */ + while (!connection.ready()) + { + event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + } + + consumer_channel = std::make_shared(&connection); + + messages.clear(); + current = messages.begin(); + + /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. + * By default there is one queue per consumer. + */ + for (size_t queue_id = 0; queue_id < num_queues; ++queue_id) + { + initQueueBindings(queue_id); + } +} + + +ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() +{ + connection.close(); + + messages.clear(); + current = messages.begin(); + BufferBase::set(nullptr, 0, 0); +} + + +void ReadBufferFromRabbitMQConsumer::initExchange() +{ + /* As there are 5 different types of exchanges and the type should be set as a parameter while publishing the message, + * then for uniformity this parameter should always be set as fanout-exchange type. In current implementation, the exchange, + * to which messages a published, will be bound to the exchange of the needed type, which will distribute messages according to its type. + */ + consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) + { + exchange_declared = false; + LOG_ERROR(log, "Failed to declare fanout exchange: " << message); + }); + + if (hash_exchange) + { + current_exchange_name = exchange_name + "_hash"; + consumer_channel->declareExchange(current_exchange_name, AMQP::consistent_hash).onError([&](const char * message) + { + exchange_declared = false; + }); + + consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * message) + { + exchange_declared = false; + }); + } + else + { + current_exchange_name = exchange_name + "_direct"; + consumer_channel->declareExchange(current_exchange_name, AMQP::direct).onError([&](const char * message) + { + exchange_declared = false; + }); + + consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * message) + { + exchange_declared = false; + }); + } +} + + +void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) +{ + if (!exchange_declared) + { + initExchange(); + exchange_declared = true; + } + + bool bindings_ok = false, bindings_error = false; + + consumer_channel->declareQueue(AMQP::exclusive) + .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) + { + queues.emplace_back(queue_name_); + + String binding_key = routing_key; + + if (bind_by_id && !hash_exchange) + { + if (queues.size() == 1) + { + binding_key = routing_key + "_" + std::to_string(channel_id); + } + else + { + binding_key = routing_key + "_" + std::to_string(channel_id + queue_id); + } + } + + LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); + + consumer_channel->bindQueue(current_exchange_name, queue_name_, binding_key) + .onSuccess([&] + { + bindings_ok = true; + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding: " << message); + }); + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to declare queue on the channel: " << message); + }); + + while (!bindings_ok && !bindings_error) + { + startNonBlockEventLoop(); + } +} + + +void ReadBufferFromRabbitMQConsumer::subscribeConsumer() +{ + if (subscribed) + return; + + LOG_TRACE(log, "Subscribing to " + std::to_string(queues.size()) + " queues"); + + for (auto & queue : queues) + { + subscribe(queue); + } + + subscribed = true; +} + + +void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) +{ + bool consumer_ok = false, consumer_error = false; + + consumer_channel->consume(queue_name, AMQP::noack) + .onSuccess([&](const std::string & consumer) + { + if (consumerTag == "") + consumerTag = consumer; + + consumer_ok = true; + + LOG_TRACE(log, "Consumer " + consumerTag + " is subscribed to queue " + queue_name); + }) + .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) + { + size_t message_size = message.bodySize(); + + if (message_size && message.body() != nullptr) + { + String message_received = std::string(message.body(), message.body() + message_size); + + if (row_delimiter != '\0') + message_received += row_delimiter; + + //LOG_TRACE(log, "Consumer " + consumerTag + " received the message " + message_received); + + received.push_back(message_received); + } + }) + .onError([&](const char * message) + { + consumer_error = true; + LOG_ERROR(log, "Consumer failed: " << message); + }); + + while (!consumer_ok && !consumer_error) + { + startNonBlockEventLoop(); + } +} + + +void ReadBufferFromRabbitMQConsumer::startNonBlockEventLoop() +{ + eventHandler.startNonBlock(); +} + + +bool ReadBufferFromRabbitMQConsumer::nextImpl() +{ + if (stopped || !allowed) + return false; + + if (current == messages.end()) + { + if (received.empty()) + { + /* Run the onReceived callbacks to save the messages that have been received by now + */ + startNonBlockEventLoop(); + } + + if (received.empty()) + { + LOG_TRACE(log, "Stalled"); + return false; + } + + messages.clear(); + messages.swap(received); + current = messages.begin(); + } + + auto new_position = const_cast(current->data()); + BufferBase::set(new_position, current->size(), 0); + + ++current; + allowed = false; + + return true; +} + +} diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h new file mode 100644 index 00000000000..7592fb53bfc --- /dev/null +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -0,0 +1,81 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace Poco +{ + class Logger; +} + +namespace DB +{ + +using ChannelPtr = std::shared_ptr; + +class ReadBufferFromRabbitMQConsumer : public ReadBuffer +{ + +public: + ReadBufferFromRabbitMQConsumer( + std::pair & parsed_address, + const String & exchange_name_, + const String & routing_key_, + const size_t channel_id_, + Poco::Logger * log_, + char row_delimiter_, + const bool bind_by_id_, + const bool hash_exchange_, + const size_t num_queues_, + const std::atomic & stopped_); + + ~ReadBufferFromRabbitMQConsumer() override; + + void allowNext() { allowed = true; } // Allow to read next message. + void subscribeConsumer(); + +private: + using Messages = std::vector; + using Queues = std::vector; + + event_base * evbase; + RabbitMQHandler eventHandler; + AMQP::TcpConnection connection; + ChannelPtr consumer_channel; + + const String & exchange_name; + const String & routing_key; + const size_t channel_id; + const bool bind_by_id; + const bool hash_exchange; + + Poco::Logger * log; + char row_delimiter; + bool stalled = false; + bool allowed = true; + const std::atomic & stopped; + + std::atomic exchange_declared = false; + const size_t num_queues; + String consumerTag; // ID for the consumer + Queues queues; + bool subscribed = false; + String current_exchange_name; + + Messages received; + Messages messages; + Messages::iterator current; + + bool nextImpl() override; + + void initExchange(); + void initQueueBindings(const size_t queue_id); + void subscribe(const String & queue_name); + void startNonBlockEventLoop(); + +}; +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 98e7e97e2e1..7e7da953d80 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -12,6 +13,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -26,11 +30,9 @@ #include #include #include - -#include -#include #include + namespace DB { @@ -42,6 +44,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } + StorageRabbitMQ::StorageRabbitMQ( const StorageID & table_id_, Context & context_, @@ -70,6 +73,228 @@ StorageRabbitMQ::StorageRabbitMQ( , semaphore(0, num_consumers_) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) { + rabbitmq_context.makeQueryContext(); + + setColumns(columns_); + task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); + task->deactivate(); + + /// Enable a different routing algorithm. + bind_by_id = num_consumers > 1 || num_queues > 1 || bind_by_id; +} + + +Pipes StorageRabbitMQ::read( + const Names & column_names, + const SelectQueryInfo & /* query_info */, + const Context & context, + QueryProcessingStage::Enum /* processed_stage */, + size_t /* max_block_size */, + unsigned /* num_streams */) +{ + if (num_created_consumers == 0) + return {}; + + Pipes pipes; + pipes.reserve(num_created_consumers); + + for (size_t i = 0; i < num_created_consumers; ++i) + { + pipes.emplace_back(std::make_shared(std::make_shared( + *this, context, column_names, log))); + } + + LOG_DEBUG(log, "Starting reading " << pipes.size() << " streams"); + return pipes; +} + + +void StorageRabbitMQ::startup() +{ + for (size_t i = 0; i < num_consumers; ++i) + { + try + { + pushReadBuffer(createReadBuffer()); + ++num_created_consumers; + } + catch (const AMQP::Exception &) + { + tryLogCurrentException(log); + } + } + + task->activateAndSchedule(); +} + + +void StorageRabbitMQ::shutdown() +{ + stream_cancelled = true; + + for (size_t i = 0; i < num_created_consumers; ++i) + { + auto buffer = popReadBuffer(); + } + + task->deactivate(); +} + + +void StorageRabbitMQ::pushReadBuffer(ConsumerBufferPtr buffer) +{ + std::lock_guard lock(mutex); + buffers.push_back(buffer); + semaphore.set(); +} + + +ConsumerBufferPtr StorageRabbitMQ::popReadBuffer() +{ + return popReadBuffer(std::chrono::milliseconds::zero()); +} + + +ConsumerBufferPtr StorageRabbitMQ::popReadBuffer(std::chrono::milliseconds timeout) +{ + // Wait for the first free buffer + if (timeout == std::chrono::milliseconds::zero()) + semaphore.wait(); + else + { + if (!semaphore.tryWait(timeout.count())) + return nullptr; + } + + // Take the first available buffer from the list + std::lock_guard lock(mutex); + auto buffer = buffers.back(); + buffers.pop_back(); + + return buffer; +} + + +ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() +{ + if (update_channel_id) + next_channel_id += num_queues; + update_channel_id = true; + + return std::make_shared(parsed_address, exchange_name, routing_key, next_channel_id, + log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); +} + + +bool StorageRabbitMQ::checkDependencies(const StorageID & table_id) +{ + // Check if all dependencies are attached + auto dependencies = DatabaseCatalog::instance().getDependencies(table_id); + if (dependencies.empty()) + return true; + + // Check the dependencies are ready? + for (const auto & db_tab : dependencies) + { + auto table = DatabaseCatalog::instance().tryGetTable(db_tab); + if (!table) + return false; + + // If it materialized view, check it's target table + auto * materialized_view = dynamic_cast(table.get()); + if (materialized_view && !materialized_view->tryGetTargetTable()) + return false; + + // Check all its dependencies + if (!checkDependencies(db_tab)) + return false; + } + + return true; +} + + +void StorageRabbitMQ::threadFunc() +{ + try + { + auto table_id = getStorageID(); + // Check if at least one direct dependency is attached + size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); + + if (dependencies_count) + { + // Keep streaming as long as there are attached views and streaming is not cancelled + while (!stream_cancelled && num_created_consumers > 0) + { + if (!checkDependencies(table_id)) + break; + + LOG_DEBUG(log, "Started streaming to " << dependencies_count << " attached views"); + + if (!streamToViews()) + break; + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + /// Wait for attached views + if (!stream_cancelled) + task->scheduleAfter(500); +} + + +bool StorageRabbitMQ::streamToViews() +{ + auto table_id = getStorageID(); + auto table = DatabaseCatalog::instance().getTable(table_id); + if (!table) + throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + + InterpreterInsertQuery interpreter(insert, rabbitmq_context, false, true, true); + auto block_io = interpreter.execute(); + + // Create a stream for each consumer and join them in a union stream + BlockInputStreams streams; + streams.reserve(num_created_consumers); + + for (size_t i = 0; i < num_created_consumers; ++i) + { + auto stream = std::make_shared(*this, rabbitmq_context, block_io.out->getHeader().getNames(), log); + streams.emplace_back(stream); + + // Limit read batch to maximum block size to allow DDL + IBlockInputStream::LocalLimits limits; + const Settings & settings = global_context.getSettingsRef(); + limits.speed_limits.max_execution_time = settings.stream_flush_interval_ms; + limits.timeout_overflow_mode = OverflowMode::BREAK; + stream->setLimits(limits); + } + + // Join multiple streams if necessary + BlockInputStreamPtr in; + if (streams.size() > 1) + in = std::make_shared(streams, nullptr, streams.size()); + else + in = streams[0]; + + std::atomic stub = {false}; + copyData(*in, *block_io.out, &stub); + + // Check whether the limits were applied during query execution + bool limits_applied = false; + const BlockStreamProfileInfo & info = in->getProfileInfo(); + limits_applied = info.hasAppliedLimit(); + + return limits_applied; } @@ -246,4 +471,3 @@ NamesAndTypesList StorageRabbitMQ::getVirtuals() const } } - diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 37b8c2b1078..8a3a48135b8 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -1,4 +1,5 @@ #pragma once + #include #include #include @@ -6,8 +7,11 @@ #include #include #include +#include +#include #include + namespace DB { @@ -19,8 +23,30 @@ class StorageRabbitMQ final: public ext::shared_ptr_helper, pub public: std::string getName() const override { return "RabbitMQ"; } + bool supportsSettings() const override { return true; } + void startup() override; + void shutdown() override; + + Pipes read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + void pushReadBuffer(ConsumerBufferPtr buf); + ConsumerBufferPtr popReadBuffer(); + ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + + const String & getExchangeName() const { return exchange_name; } + const String & getRoutingKey() const { return routing_key; } + + const String & getFormatName() const { return format_name; } + NamesAndTypesList getVirtuals() const override; + protected: StorageRabbitMQ( const StorageID & table_id_, @@ -31,6 +57,7 @@ protected: const String & format_name_, char row_delimiter_, size_t num_consumers_, bool bind_by_id_, size_t num_queues_, bool hash_exchange); + private: Context global_context; Context rabbitmq_context; @@ -42,7 +69,6 @@ private: char row_delimiter; size_t num_consumers; size_t num_created_consumers = 0; - bool bind_by_id; size_t num_queues; const bool hash_exchange; @@ -52,11 +78,19 @@ private: Poco::Semaphore semaphore; std::mutex mutex; + std::vector buffers; /// available buffers for RabbitMQ consumers - size_t consumer_id = 0; + size_t next_channel_id = 0; + bool update_channel_id = false; BackgroundSchedulePool::TaskHolder task; std::atomic stream_cancelled{false}; + + ConsumerBufferPtr createReadBuffer(); + + void threadFunc(); + bool streamToViews(); + bool checkDependencies(const StorageID & table_id); }; } From 1760f01f74ee8d7fdc7ac39526ce1d041ff2fa2c Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 09:42:56 +0000 Subject: [PATCH 005/330] Add tests for RabbitMQ read-only part --- .../integration/test_storage_rabbitmq/test.py | 726 ++++++++++++++++++ 1 file changed, 726 insertions(+) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 475b89f6c60..815a84c1999 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -114,6 +114,732 @@ def rabbitmq_setup_teardown(): # Tests +@pytest.mark.timeout(180) +def test_rabbitmq_select_from_new_syntax_table(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'new', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(25): + messages.append(json.dumps({'key': i, 'value': i})) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='new', body=message) + + messages = [] + for i in range(25, 50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='new', body=message) + + result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=False) + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_select_from_old_syntax_table(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ('rabbitmq1:5672', 'old', 'clickhouse-exchange', 'JSONEachRow', '\\n'); + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='old', body=message) + + result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_select_empty(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'empty', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + assert int(instance.query('SELECT count() FROM test.rabbitmq')) == 0 + + +@pytest.mark.timeout(180) +def test_rabbitmq_json_without_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'json', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'JSONEachRow' + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = '' + for i in range(25): + messages += json.dumps({'key': i, 'value': i}) + '\n' + + all_messages = [messages] + for message in all_messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='json', body=message) + + messages = '' + for i in range(25, 50): + messages += json.dumps({'key': i, 'value': i}) + '\n' + all_messages = [messages] + for message in all_messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='json', body=message) + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_csv_with_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'csv', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'CSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append('{i}, {i}'.format(i=i)) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='csv', body=message) + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'tsv', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append('{i}\t{i}'.format(i=i)) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='tsv', body=message) + + result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_materialized_view(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'mv', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mv', body=message) + + while True: + result = instance.query('SELECT * FROM test.view') + if (rabbitmq_check_result(result)): + break; + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_materialized_view_with_subquery(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'mvsq', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM (SELECT * FROM test.rabbitmq); + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mvsq', body=message) + + while True: + result = instance.query('SELECT * FROM test.view') + if rabbitmq_check_result(result): + break + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + connection.close(); + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_many_materialized_views(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view1; + DROP TABLE IF EXISTS test.view2; + DROP TABLE IF EXISTS test.consumer1; + DROP TABLE IF EXISTS test.consumer2; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'mmv', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view1 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.view2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS + SELECT * FROM test.rabbitmq; + CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS + SELECT * FROM test.rabbitmq; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mmv', body=message) + + while True: + result1 = instance.query('SELECT * FROM test.view1') + result2 = instance.query('SELECT * FROM test.view2') + if rabbitmq_check_result(result1) and rabbitmq_check_result(result2): + break + + instance.query(''' + DROP TABLE test.consumer1; + DROP TABLE test.consumer2; + DROP TABLE test.view1; + DROP TABLE test.view2; + ''') + + rabbitmq_check_result(result1, True) + rabbitmq_check_result(result2, True) + + +@pytest.mark.timeout(240) +def test_rabbitmq_big_message(rabbitmq_cluster): + # Create batchs of messages of size ~100Kb + rabbitmq_messages = 1000 + batch_messages = 1000 + messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(rabbitmq_messages)] + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value String) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'big', + rabbitmq_format = 'JSONEachRow'; + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='big', body=message) + + while True: + result = instance.query('SELECT count() FROM test.view') + if int(result) == batch_messages * rabbitmq_messages: + break + + connection.close() + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + assert int(result) == rabbitmq_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_sharding_between_tables(rabbitmq_cluster): + + NUMBER_OF_CONCURRENT_CONSUMERS = 10 + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + for consumer_id in range(NUMBER_OF_CONCURRENT_CONSUMERS): + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + print("Setting up {}".format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + CREATE TABLE test.{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_hash_exchange = 1, + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.{0}; + '''.format(table_name)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + + def produce(): + # init connection here because otherwise python rabbitmq client fails sometimes + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'topic_' + str(randrange(0, NUMBER_OF_CONCURRENT_CONSUMERS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + time.sleep(1) + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for consumer_id in range(NUMBER_OF_CONCURRENT_CONSUMERS): + print("dropping rabbitmq_consumer{}".format(consumer_id)) + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + '''.format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): + + NUM_CHANNELS = 5 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'clickhouse', + rabbitmq_num_consumers = 5, + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'clickhouse_' + str(randrange(0, NUM_CHANNELS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): + + NUM_QUEUES = 4 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_queues = 4, + rabbitmq_routing_key = 'clickhouse', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'clickhouse_' + str(randrange(0, NUM_QUEUES)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster): + + NUM_CONSUMERS = 10 + NUM_QUEUES = 2 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_queues = 2, + rabbitmq_num_consumers = 10, + rabbitmq_routing_key = 'clickhouse', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'clickhouse_' + str(randrange(0, NUM_QUEUES * NUM_CONSUMERS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_read_only_combo(rabbitmq_cluster): + + NUM_MV = 5; + NUM_CONSUMERS = 4 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_routing_key = 'clickhouse', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + ''') + + for mv_id in range(NUM_MV): + table_name = 'view{}'.format(mv_id) + print("Setting up {}".format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + CREATE TABLE test.{0} (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.{0}_mv TO test.{0} AS + SELECT * FROM test.rabbitmq; + '''.format(table_name)) + + time.sleep(2) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'clickhouse_' + str(randrange(0, NUM_CONSUMERS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = 0 + for view in range(NUM_MV): + result += int(instance.query('SELECT count() FROM test.view{0}'.format(view))) + if int(result) == messages_num * threads_num * NUM_MV: + break + time.sleep(1) + + for thread in threads: + thread.join() + + for mv_id in range(NUM_MV): + table_name = 'view{}'.format(mv_id) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + '''.format(table_name)) + + + assert int(result) == messages_num * threads_num * NUM_MV, 'ClickHouse lost some messages: {}'.format(result) if __name__ == '__main__': From fe8d285e11061efa82e7f92aabd2b5d976fc36fe Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 12:52:16 +0000 Subject: [PATCH 006/330] Fix libevent build --- contrib/libevent-cmake/{ => linux}/evconfig-private.h | 0 contrib/libevent-cmake/{ => linux/event2}/event-config.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename contrib/libevent-cmake/{ => linux}/evconfig-private.h (100%) rename contrib/libevent-cmake/{ => linux/event2}/event-config.h (100%) diff --git a/contrib/libevent-cmake/evconfig-private.h b/contrib/libevent-cmake/linux/evconfig-private.h similarity index 100% rename from contrib/libevent-cmake/evconfig-private.h rename to contrib/libevent-cmake/linux/evconfig-private.h diff --git a/contrib/libevent-cmake/event-config.h b/contrib/libevent-cmake/linux/event2/event-config.h similarity index 100% rename from contrib/libevent-cmake/event-config.h rename to contrib/libevent-cmake/linux/event2/event-config.h From c3569882bbcc33c047d9d9b9424bf06b9a50a3bf Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 18:24:40 +0000 Subject: [PATCH 007/330] Update version of docker_compose_rabbitmq.yml --- docker/test/integration/compose/docker_compose_rabbitmq.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/compose/docker_compose_rabbitmq.yml b/docker/test/integration/compose/docker_compose_rabbitmq.yml index 7ebee3c0ea5..1e9c3777505 100644 --- a/docker/test/integration/compose/docker_compose_rabbitmq.yml +++ b/docker/test/integration/compose/docker_compose_rabbitmq.yml @@ -1,4 +1,4 @@ -version: '2.2' +version: '2.3' services: rabbitmq1: From 14c67c6ae63500eb54a9644844e9c42d087324bc Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 26 May 2020 17:34:57 +0000 Subject: [PATCH 008/330] Fixes --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 13 ++- .../RabbitMQ/RabbitMQBlockInputStream.h | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 4 +- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- src/Storages/RabbitMQ/RabbitMQSettings.h | 1 - .../ReadBufferFromRabbitMQConsumer.cpp | 82 ++++++++++++------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 5 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 35 +++----- src/Storages/RabbitMQ/StorageRabbitMQ.h | 13 +-- .../integration/test_storage_rabbitmq/test.py | 14 ++-- 10 files changed, 94 insertions(+), 77 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 89ea490e842..d498a36f95b 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -5,6 +5,11 @@ #include #include +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DB { @@ -117,13 +122,13 @@ Block RabbitMQBlockInputStream::readImpl() auto new_rows = read_rabbitmq_message(); - auto _exchange = storage.getExchangeName(); - auto _routingKey = storage.getRoutingKey(); + auto exchange_name = storage.getExchangeName(); + auto routing_key = storage.getRoutingKey(); for (size_t i = 0; i < new_rows; ++i) { - virtual_columns[0]->insert(_exchange); - virtual_columns[1]->insert(_routingKey); + virtual_columns[0]->insert(exchange_name); + virtual_columns[1]->insert(routing_key); } total_rows = total_rows + new_rows; diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h index c82fd68a680..fbdb40bded8 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h @@ -25,7 +25,7 @@ public: void readPrefixImpl() override; Block readImpl() override; - //void readSuffixImpl() override; + ///void readSuffixImpl() override; private: StorageRabbitMQ & storage; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index b18d6bf2cfb..aa72ab51878 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -12,14 +12,14 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : } -void RabbitMQHandler::onError(AMQP::TcpConnection * /*connection*/, const char * message) +void RabbitMQHandler::onError(AMQP::TcpConnection * , const char * message) { LOG_ERROR(log, "Library error report: " << message); stop(); } -void RabbitMQHandler::startNonBlock() +void RabbitMQHandler::start() { event_base_loop(evbase, EVLOOP_NONBLOCK); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 94a559cad38..5b8a08be548 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -17,7 +17,7 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void startNonBlock(); + void start(); void stop(); private: diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index f4c62756703..509ed68b8d3 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -15,7 +15,6 @@ namespace DB M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exhange name, to which messages are sent. Needed to bind queues to it.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ - M(SettingUInt64, rabbitmq_bind_by_id, 0, "A flag which indicates that binding should be done in range [0, num_consumers * num_queues).", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index a9f804aaa02..5cdcbccadce 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -1,10 +1,18 @@ #include +#include +#include #include #include #include #include +enum +{ + Connection_setup_sleep = 200, + Connection_setup_retries_max = 1000 +}; + namespace DB { @@ -38,11 +46,21 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( * because in case when num_consumers > 1 - inputStreams run asynchronously and if they share the same connection, * then they also will share the same event loop. But it will mean that if one stream's consumer starts event loop, * then it will run all callbacks on the connection - including other stream's consumer's callbacks - - * it result in asynchronous run of the same code and lead to occasional seg faults. + * it result in asynchronous run of the same code (because local variables can be updated both by the current thread + * and in callbacks by another thread during event loop, which is blocking only to the thread that has started the loop). + * So sharing the connection (== sharing event loop) results in occasional seg faults in case of asynchronous run of objects that share the connection. */ - while (!connection.ready()) + + size_t cnt_retries = 0; + while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) { event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); + } + + if (!connection.ready()) + { + LOG_ERROR(log, "Cannot set up connection for consumer"); } consumer_channel = std::make_shared(&connection); @@ -85,12 +103,12 @@ void ReadBufferFromRabbitMQConsumer::initExchange() if (hash_exchange) { current_exchange_name = exchange_name + "_hash"; - consumer_channel->declareExchange(current_exchange_name, AMQP::consistent_hash).onError([&](const char * message) + consumer_channel->declareExchange(current_exchange_name, AMQP::consistent_hash).onError([&](const char * /* message */) { exchange_declared = false; }); - consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * message) + consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * /* message */) { exchange_declared = false; }); @@ -98,12 +116,12 @@ void ReadBufferFromRabbitMQConsumer::initExchange() else { current_exchange_name = exchange_name + "_direct"; - consumer_channel->declareExchange(current_exchange_name, AMQP::direct).onError([&](const char * message) + consumer_channel->declareExchange(current_exchange_name, AMQP::direct).onError([&](const char * /* message */) { exchange_declared = false; }); - consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * message) + consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * /* message */) { exchange_declared = false; }); @@ -113,30 +131,36 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { + /* This varibale can be updated from a different thread in case of some error so its better to always check + * whether exchange is in a working state and if not - declare it once again. + */ if (!exchange_declared) { initExchange(); exchange_declared = true; } - bool bindings_ok = false, bindings_error = false; + bool bindings_created = false, bindings_error = false; consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) { queues.emplace_back(queue_name_); - String binding_key = routing_key; - if (bind_by_id && !hash_exchange) + /* Every consumer has at least one unique queue. Bind the queues to exchange based on the consumer_channel_id + * in case there is one queue per consumer and bind by queue_id in case there is more than 1 queue per consumer. + * (queue_id is based on channel_id) + */ + if (bind_by_id || hash_exchange) { if (queues.size() == 1) { - binding_key = routing_key + "_" + std::to_string(channel_id); + binding_key = std::to_string(channel_id); } else { - binding_key = routing_key + "_" + std::to_string(channel_id + queue_id); + binding_key = std::to_string(channel_id + queue_id); } } @@ -145,7 +169,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) consumer_channel->bindQueue(current_exchange_name, queue_name_, binding_key) .onSuccess([&] { - bindings_ok = true; + bindings_created = true; }) .onError([&](const char * message) { @@ -159,9 +183,14 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to declare queue on the channel: " << message); }); - while (!bindings_ok && !bindings_error) + /* Run event loop (which updates local variables in a separate thread) until bindings are created or failed to be created. + * It is important at this moment to make sure that queue bindings are created before any publishing can happen because + * otherwise messages will be routed nowhere. + */ + while (!bindings_created && !bindings_error) { - startNonBlockEventLoop(); + /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events + startEventLoop(); } } @@ -184,17 +213,14 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) { - bool consumer_ok = false, consumer_error = false; + bool consumer_created = false, consumer_error = false; consumer_channel->consume(queue_name, AMQP::noack) - .onSuccess([&](const std::string & consumer) + .onSuccess([&](const std::string & /* consumer */) { - if (consumerTag == "") - consumerTag = consumer; + consumer_created = true; - consumer_ok = true; - - LOG_TRACE(log, "Consumer " + consumerTag + " is subscribed to queue " + queue_name); + LOG_TRACE(log, "Consumer " + std::to_string(channel_id) + " is subscribed to queue " + queue_name); }) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) { @@ -218,16 +244,16 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) LOG_ERROR(log, "Consumer failed: " << message); }); - while (!consumer_ok && !consumer_error) + while (!consumer_created && !consumer_error) { - startNonBlockEventLoop(); + startEventLoop(); } } -void ReadBufferFromRabbitMQConsumer::startNonBlockEventLoop() +void ReadBufferFromRabbitMQConsumer::startEventLoop() { - eventHandler.startNonBlock(); + eventHandler.start(); } @@ -242,12 +268,12 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { /* Run the onReceived callbacks to save the messages that have been received by now */ - startNonBlockEventLoop(); + startEventLoop(); } if (received.empty()) { - LOG_TRACE(log, "Stalled"); + LOG_TRACE(log, "No more messages to be fetched"); return false; } @@ -256,7 +282,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() current = messages.begin(); } - auto new_position = const_cast(current->data()); + auto * new_position = const_cast(current->data()); BufferBase::set(new_position, current->size(), 0); ++current; diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 7592fb53bfc..5e4318246a6 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -59,9 +59,8 @@ private: bool allowed = true; const std::atomic & stopped; - std::atomic exchange_declared = false; + bool exchange_declared = false; const size_t num_queues; - String consumerTag; // ID for the consumer Queues queues; bool subscribed = false; String current_exchange_name; @@ -75,7 +74,7 @@ private: void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startNonBlockEventLoop(); + void startEventLoop(); }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 7e7da953d80..cfabb5412ba 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -33,6 +33,11 @@ #include +enum + { + RESCHEDULE_WAIT = 500 + }; + namespace DB { @@ -55,7 +60,6 @@ StorageRabbitMQ::StorageRabbitMQ( const String & format_name_, char row_delimiter_, size_t num_consumers_, - bool bind_by_id_, size_t num_queues_, bool hash_exchange_) : IStorage(table_id_) @@ -66,7 +70,6 @@ StorageRabbitMQ::StorageRabbitMQ( , format_name(global_context.getMacros()->expand(format_name_)) , row_delimiter(row_delimiter_) , num_consumers(num_consumers_) - , bind_by_id(bind_by_id_) , num_queues(num_queues_) , hash_exchange(hash_exchange_) , log(&Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) @@ -79,8 +82,7 @@ StorageRabbitMQ::StorageRabbitMQ( task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); - /// Enable a different routing algorithm. - bind_by_id = num_consumers > 1 || num_queues > 1 || bind_by_id; + bind_by_id = num_consumers > 1 || num_queues > 1; } @@ -181,7 +183,8 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() next_channel_id += num_queues; update_channel_id = true; - return std::make_shared(parsed_address, exchange_name, routing_key, next_channel_id, + return std::make_shared( + parsed_address, exchange_name, routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } @@ -244,7 +247,7 @@ void StorageRabbitMQ::threadFunc() /// Wait for attached views if (!stream_cancelled) - task->scheduleAfter(500); + task->scheduleAfter(RESCHEDULE_WAIT); } @@ -397,13 +400,13 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - size_t bind_by_id = static_cast(rabbitmq_settings.rabbitmq_bind_by_id); + bool hash_exchange = static_cast(rabbitmq_settings.rabbitmq_hash_exchange); if (args_count >= 6) { const auto * ast = engine_args[5]->as(); if (ast && ast->value.getType() == Field::Types::UInt64) { - bind_by_id = static_cast(safeGet(ast->value)); + hash_exchange = static_cast(safeGet(ast->value)); } else { @@ -439,22 +442,8 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - size_t hash_exchange = static_cast(rabbitmq_settings.rabbitmq_hash_exchange); - if (args_count >= 9) - { - const auto * ast = engine_args[8]->as(); - if (ast && ast->value.getType() == Field::Types::UInt64) - { - hash_exchange = static_cast(safeGet(ast->value)); - } - else - { - throw Exception("Hash exchange flag must be a boolean", ErrorCodes::BAD_ARGUMENTS); - } - } - return StorageRabbitMQ::create(args.table_id, args.context, args.columns, host_port, routing_key, exchange, - format, row_delimiter, num_consumers, bind_by_id, num_queues, hash_exchange); + format, row_delimiter, num_consumers, num_queues, hash_exchange); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 8a3a48135b8..b334b48a301 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -53,10 +53,13 @@ protected: Context & context_, const ColumnsDescription & columns_, const String & host_port_, - const String & routing_key_, const String & exchange_name_, - const String & format_name_, char row_delimiter_, - size_t num_consumers_, bool bind_by_id_, size_t num_queues_, bool hash_exchange); - + const String & routing_key_, + const String & exchange_name_, + const String & format_name_, + char row_delimiter_, + size_t num_consumers_, + size_t num_queues_, + bool hash_exchange); private: Context global_context; @@ -80,7 +83,7 @@ private: std::mutex mutex; std::vector buffers; /// available buffers for RabbitMQ consumers - size_t next_channel_id = 0; + size_t next_channel_id = 1; /// Must >= 1 because it is used as a binding key, which has to be > 0 bool update_channel_id = false; BackgroundSchedulePool::TaskHolder task; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 815a84c1999..821c5a19e68 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -528,7 +528,7 @@ def test_rabbitmq_sharding_between_tables(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'topic_' + str(randrange(0, NUMBER_OF_CONCURRENT_CONSUMERS)) + key = str(randrange(1, NUMBER_OF_CONCURRENT_CONSUMERS)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() @@ -576,7 +576,6 @@ def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'clickhouse', rabbitmq_num_consumers = 5, rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; @@ -605,7 +604,7 @@ def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'clickhouse_' + str(randrange(0, NUM_CHANNELS)) + key = str(randrange(1, NUM_CHANNELS)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() @@ -641,7 +640,6 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_queues = 4, - rabbitmq_routing_key = 'clickhouse', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; DROP TABLE IF EXISTS test.view; @@ -669,7 +667,7 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'clickhouse_' + str(randrange(0, NUM_QUEUES)) + key = str(randrange(1, NUM_QUEUES)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() @@ -707,7 +705,6 @@ def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster) SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_queues = 2, rabbitmq_num_consumers = 10, - rabbitmq_routing_key = 'clickhouse', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; DROP TABLE IF EXISTS test.view; @@ -735,7 +732,7 @@ def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster) for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'clickhouse_' + str(randrange(0, NUM_QUEUES * NUM_CONSUMERS)) + key = str(randrange(1, NUM_QUEUES * NUM_CONSUMERS)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() @@ -772,7 +769,6 @@ def test_rabbitmq_read_only_combo(rabbitmq_cluster): ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_consumers = 4, - rabbitmq_routing_key = 'clickhouse', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; ''') @@ -807,7 +803,7 @@ def test_rabbitmq_read_only_combo(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'clickhouse_' + str(randrange(0, NUM_CONSUMERS)) + key = str(randrange(1, NUM_CONSUMERS)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() From 5e472af425476c7e145d952d9d853b985d7e6e24 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 26 May 2020 20:43:20 +0000 Subject: [PATCH 009/330] Fix merge & small fix --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 2 +- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 15 +++++++-------- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 6 +++--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index aa72ab51878..cde43862ede 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -14,7 +14,7 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : void RabbitMQHandler::onError(AMQP::TcpConnection * , const char * message) { - LOG_ERROR(log, "Library error report: " << message); + LOG_ERROR(log, "Library error report: {}", message); stop(); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 5cdcbccadce..945de989b57 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -46,11 +46,10 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( * because in case when num_consumers > 1 - inputStreams run asynchronously and if they share the same connection, * then they also will share the same event loop. But it will mean that if one stream's consumer starts event loop, * then it will run all callbacks on the connection - including other stream's consumer's callbacks - - * it result in asynchronous run of the same code (because local variables can be updated both by the current thread - * and in callbacks by another thread during event loop, which is blocking only to the thread that has started the loop). - * So sharing the connection (== sharing event loop) results in occasional seg faults in case of asynchronous run of objects that share the connection. + * as a result local variables can be updated both by the current thread and in callbacks by another thread during + * event loop, which is blocking only to the thread that has started the loop. Therefore sharing the connection + * (== sharing event loop) results in occasional seg faults in case of asynchronous run of objects that share the connection. */ - size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) { @@ -97,7 +96,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) { exchange_declared = false; - LOG_ERROR(log, "Failed to declare fanout exchange: " << message); + LOG_ERROR(log, "Failed to declare fanout exchange: {}", message); }); if (hash_exchange) @@ -174,13 +173,13 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: " << message); + LOG_ERROR(log, "Failed to create queue binding: {}", message); }); }) .onError([&](const char * message) { bindings_error = true; - LOG_ERROR(log, "Failed to declare queue on the channel: " << message); + LOG_ERROR(log, "Failed to declare queue on the channel: {}", message); }); /* Run event loop (which updates local variables in a separate thread) until bindings are created or failed to be created. @@ -241,7 +240,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) .onError([&](const char * message) { consumer_error = true; - LOG_ERROR(log, "Consumer failed: " << message); + LOG_ERROR(log, "Consumer failed: {}", message); }); while (!consumer_created && !consumer_error) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index cfabb5412ba..fb20569200d 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -106,7 +106,7 @@ Pipes StorageRabbitMQ::read( *this, context, column_names, log))); } - LOG_DEBUG(log, "Starting reading " << pipes.size() << " streams"); + LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); return pipes; } @@ -136,7 +136,7 @@ void StorageRabbitMQ::shutdown() for (size_t i = 0; i < num_created_consumers; ++i) { - auto buffer = popReadBuffer(); + popReadBuffer(); } task->deactivate(); @@ -233,7 +233,7 @@ void StorageRabbitMQ::threadFunc() if (!checkDependencies(table_id)) break; - LOG_DEBUG(log, "Started streaming to " << dependencies_count << " attached views"); + LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); if (!streamToViews()) break; From 0362bb2d2f54ec90c2a71a9f446d2aec41bf920a Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 29 May 2020 16:04:44 +0000 Subject: [PATCH 010/330] Make connection between concurrent consumers shared - not private --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 15 +++- src/Storages/RabbitMQ/RabbitMQHandler.h | 4 +- .../ReadBufferFromRabbitMQConsumer.cpp | 72 +++++++------------ .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 14 ++-- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 24 ++++++- src/Storages/RabbitMQ/StorageRabbitMQ.h | 4 ++ 6 files changed, 76 insertions(+), 57 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index cde43862ede..1f6e9ce1bb1 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -19,13 +19,26 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * , const char * message) } -void RabbitMQHandler::start() +void RabbitMQHandler::start(std::atomic & check_param) { + /* The object of this class is shared between concurrent consumers, who call this method repeatedly at the same time. + * But the loop should not be attempted to start if it is already running. Also note that the loop is blocking to + * the thread that has started it. + */ + std::lock_guard lock(mutex); + + /* The callback, which changes this variable, could have already been activated by another thread while we waited for the + * mutex to unlock (as it runs all active events on the connection). This means that there is no need to start event loop again. + */ + if (check_param) + return; + event_base_loop(evbase, EVLOOP_NONBLOCK); } void RabbitMQHandler::stop() { + std::lock_guard lock(mutex); event_base_loopbreak(evbase); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 5b8a08be548..a70b08aba55 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -17,12 +17,14 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void start(); + void start(std::atomic & check_param); void stop(); private: event_base * evbase; Poco::Logger * log; + + std::mutex mutex; }; } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 945de989b57..d6da5850472 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -7,17 +7,12 @@ #include -enum -{ - Connection_setup_sleep = 200, - Connection_setup_retries_max = 1000 -}; - namespace DB { ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( - std::pair & parsed_address, + ChannelPtr consumer_channel_, + RabbitMQHandler & eventHandler_, const String & exchange_name_, const String & routing_key_, const size_t channel_id_, @@ -28,10 +23,8 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( const size_t num_queues_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) - , evbase(event_base_new()) - , eventHandler(evbase, log) - , connection(&eventHandler, - AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , consumer_channel(std::move(consumer_channel_)) + , eventHandler(eventHandler_) , exchange_name(exchange_name_) , routing_key(routing_key_) , channel_id(channel_id_) @@ -41,29 +34,9 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , hash_exchange(hash_exchange_) , num_queues(num_queues_) , stopped(stopped_) + , exchange_declared(false) + , false_param(false) { - /* It turned out to be very important to make a different connection each time the object of this class is created, - * because in case when num_consumers > 1 - inputStreams run asynchronously and if they share the same connection, - * then they also will share the same event loop. But it will mean that if one stream's consumer starts event loop, - * then it will run all callbacks on the connection - including other stream's consumer's callbacks - - * as a result local variables can be updated both by the current thread and in callbacks by another thread during - * event loop, which is blocking only to the thread that has started the loop. Therefore sharing the connection - * (== sharing event loop) results in occasional seg faults in case of asynchronous run of objects that share the connection. - */ - size_t cnt_retries = 0; - while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) - { - event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); - std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); - } - - if (!connection.ready()) - { - LOG_ERROR(log, "Cannot set up connection for consumer"); - } - - consumer_channel = std::make_shared(&connection); - messages.clear(); current = messages.begin(); @@ -79,7 +52,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() { - connection.close(); + consumer_channel->close(); messages.clear(); current = messages.begin(); @@ -139,7 +112,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) exchange_declared = true; } - bool bindings_created = false, bindings_error = false; + std::atomic bindings_created = false, bindings_error = false; consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) @@ -189,7 +162,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) while (!bindings_created && !bindings_error) { /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events - startEventLoop(); + startEventLoop(bindings_created); } } @@ -212,7 +185,7 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) { - bool consumer_created = false, consumer_error = false; + std::atomic consumer_created = false, consumer_error = false; consumer_channel->consume(queue_name, AMQP::noack) .onSuccess([&](const std::string & /* consumer */) @@ -224,7 +197,6 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) { size_t message_size = message.bodySize(); - if (message_size && message.body() != nullptr) { String message_received = std::string(message.body(), message.body() + message_size); @@ -232,8 +204,10 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (row_delimiter != '\0') message_received += row_delimiter; - //LOG_TRACE(log, "Consumer " + consumerTag + " received the message " + message_received); - + /* Needed because this vector can be used at the same time by another thread in nextImpl() (below). + * So we lock mutex here and there so that they do not use it asynchronosly. + */ + std::lock_guard lock(mutex); received.push_back(message_received); } }) @@ -245,14 +219,15 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) while (!consumer_created && !consumer_error) { - startEventLoop(); + /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events + startEventLoop(consumer_created); } } -void ReadBufferFromRabbitMQConsumer::startEventLoop() +void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param) { - eventHandler.start(); + eventHandler.start(check_param); } @@ -265,9 +240,8 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { if (received.empty()) { - /* Run the onReceived callbacks to save the messages that have been received by now - */ - startEventLoop(); + /// Run the onReceived callbacks to save the messages that have been received by now + startEventLoop(false_param); } if (received.empty()) @@ -277,6 +251,12 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() } messages.clear(); + + /* Needed because this vector can be used at the same time by another thread in onReceived callback (above). + * So we lock mutex here and there so that they do not use it asynchronosly. + */ + std::lock_guard lock(mutex); + messages.swap(received); current = messages.begin(); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 5e4318246a6..31babc5033f 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -22,7 +22,8 @@ class ReadBufferFromRabbitMQConsumer : public ReadBuffer public: ReadBufferFromRabbitMQConsumer( - std::pair & parsed_address, + ChannelPtr consumer_channel_, + RabbitMQHandler & eventHandler_, const String & exchange_name_, const String & routing_key_, const size_t channel_id_, @@ -42,10 +43,8 @@ private: using Messages = std::vector; using Queues = std::vector; - event_base * evbase; - RabbitMQHandler eventHandler; - AMQP::TcpConnection connection; ChannelPtr consumer_channel; + RabbitMQHandler & eventHandler; const String & exchange_name; const String & routing_key; @@ -59,7 +58,8 @@ private: bool allowed = true; const std::atomic & stopped; - bool exchange_declared = false; + std::atomic exchange_declared; + std::atomic false_param; const size_t num_queues; Queues queues; bool subscribed = false; @@ -69,12 +69,14 @@ private: Messages messages; Messages::iterator current; + std::mutex mutex; + bool nextImpl() override; void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startEventLoop(); + void startEventLoop(std::atomic & check_param); }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index fb20569200d..ed486e8e709 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -35,7 +35,9 @@ enum { - RESCHEDULE_WAIT = 500 + RESCHEDULE_WAIT = 500, + Connection_setup_sleep = 200, + Connection_setup_retries_max = 1000 }; namespace DB @@ -75,10 +77,26 @@ StorageRabbitMQ::StorageRabbitMQ( , log(&Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) + , evbase(event_base_new()) + , eventHandler(evbase, log) + , connection(&eventHandler, + AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) { - rabbitmq_context.makeQueryContext(); + size_t cnt_retries = 0; + while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) + { + event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); + } + if (!connection.ready()) + { + LOG_ERROR(log, "Cannot set up connection for consumer"); + } + + rabbitmq_context.makeQueryContext(); setColumns(columns_); + task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); @@ -184,7 +202,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() update_channel_id = true; return std::make_shared( - parsed_address, exchange_name, routing_key, next_channel_id, + std::make_shared(&connection), eventHandler, exchange_name, routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index b334b48a301..fc098b168f1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -79,6 +79,10 @@ private: Poco::Logger * log; std::pair parsed_address; + event_base * evbase; + RabbitMQHandler eventHandler; + AMQP::TcpConnection connection; + Poco::Semaphore semaphore; std::mutex mutex; std::vector buffers; /// available buffers for RabbitMQ consumers From 8266715c492749e035f4bd764f5c75d3620af73e Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 31 May 2020 08:39:22 +0000 Subject: [PATCH 011/330] Fix build & fix style --- src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp | 3 +-- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 2 +- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 3 +++ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 10 ++++++---- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index d498a36f95b..86d760be54a 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -64,8 +64,7 @@ Block RabbitMQBlockInputStream::readImpl() MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); - auto input_format = FormatFactory::instance().getInputFormat( - storage.getFormatName(), *buffer, non_virtual_header, context, 1); + auto input_format = FormatFactory::instance().getInputFormat(storage.getFormatName(), *buffer, non_virtual_header, context, 1); InputPort port(input_format->getPort().getHeader(), input_format.get()); connect(input_format->getPort(), port); diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 1f6e9ce1bb1..cebe8ee3c3a 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -12,7 +12,7 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : } -void RabbitMQHandler::onError(AMQP::TcpConnection * , const char * message) +void RabbitMQHandler::onError(AMQP::TcpConnection * /* connection */, const char * message) { LOG_ERROR(log, "Library error report: {}", message); stop(); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index d6da5850472..d6372dfe4d3 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -1,6 +1,9 @@ #include #include #include +#include +#include +#include #include #include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index ed486e8e709..5f7570dd8c1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -45,10 +45,8 @@ namespace DB namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -157,6 +155,7 @@ void StorageRabbitMQ::shutdown() popReadBuffer(); } + connection.close(); task->deactivate(); } @@ -201,8 +200,10 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() next_channel_id += num_queues; update_channel_id = true; + ChannelPtr consumer_channel = std::make_shared(&connection); + return std::make_shared( - std::make_shared(&connection), eventHandler, exchange_name, routing_key, next_channel_id, + consumer_channel, eventHandler, exchange_name, routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } @@ -460,7 +461,8 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - return StorageRabbitMQ::create(args.table_id, args.context, args.columns, host_port, routing_key, exchange, + return StorageRabbitMQ::create( + args.table_id, args.context, args.columns, host_port, routing_key, exchange, format, row_delimiter, num_consumers, num_queues, hash_exchange); }; From 037ed3a02ce5bc0ed3c49b9dd24b13359286828b Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 31 May 2020 09:34:57 +0000 Subject: [PATCH 012/330] Code fix & style fix & merge fix --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 5 +-- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 8 ++++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 33 +++++++++---------- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 86d760be54a..1c6eaf6f2e9 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -64,8 +64,9 @@ Block RabbitMQBlockInputStream::readImpl() MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); - auto input_format = FormatFactory::instance().getInputFormat(storage.getFormatName(), *buffer, non_virtual_header, context, 1); - + auto input_format = FormatFactory::instance().getInputFormat( + storage.getFormatName(), *buffer, non_virtual_header, context, 1); + InputPort port(input_format->getPort().getHeader(), input_format.get()); connect(input_format->getPort(), port); port.setNeeded(); diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index cebe8ee3c3a..09398da73c7 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -12,9 +12,15 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : } -void RabbitMQHandler::onError(AMQP::TcpConnection * /* connection */, const char * message) +void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) { LOG_ERROR(log, "Library error report: {}", message); + if (!connection->ready()) + { + std::cerr << "Connection lost, no recovery is possible"; + throw; + } + stop(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 5f7570dd8c1..b1060a59e00 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -32,17 +32,15 @@ #include #include - -enum - { - RESCHEDULE_WAIT = 500, - Connection_setup_sleep = 200, - Connection_setup_retries_max = 1000 - }; - namespace DB { +enum +{ + Connection_setup_sleep = 200, + Connection_setup_retries_max = 1000 +}; + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -77,8 +75,7 @@ StorageRabbitMQ::StorageRabbitMQ( , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) , evbase(event_base_new()) , eventHandler(evbase, log) - , connection(&eventHandler, - AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) { size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) @@ -136,9 +133,10 @@ void StorageRabbitMQ::startup() pushReadBuffer(createReadBuffer()); ++num_created_consumers; } - catch (const AMQP::Exception &) + catch (const AMQP::Exception & e) { - tryLogCurrentException(log); + std::cerr << e.what(); + throw; } } @@ -202,9 +200,8 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); - return std::make_shared( - consumer_channel, eventHandler, exchange_name, routing_key, next_channel_id, - log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); + return std::make_shared(consumer_channel, eventHandler, exchange_name, + routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } @@ -266,7 +263,7 @@ void StorageRabbitMQ::threadFunc() /// Wait for attached views if (!stream_cancelled) - task->scheduleAfter(RESCHEDULE_WAIT); + task->activateAndSchedule(); } @@ -462,8 +459,8 @@ void registerStorageRabbitMQ(StorageFactory & factory) } return StorageRabbitMQ::create( - args.table_id, args.context, args.columns, host_port, routing_key, exchange, - format, row_delimiter, num_consumers, num_queues, hash_exchange); + args.table_id, args.context, args.columns, + host_port, routing_key, exchange, format, row_delimiter, num_consumers, num_queues, hash_exchange); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); From 5757dd1d57c68a8f03ddc5b9ba41e85d584f909c Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jun 2020 15:37:23 +0000 Subject: [PATCH 013/330] Add insert part --- src/Storages/RabbitMQ/Buffer_fwd.h | 3 + .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 57 ++++++ .../RabbitMQ/RabbitMQBlockOutputStream.h | 29 +++ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 16 ++ src/Storages/RabbitMQ/StorageRabbitMQ.h | 7 + .../WriteBufferToRabbitMQProducer.cpp | 169 ++++++++++++++++++ .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 67 +++++++ 7 files changed, 348 insertions(+) create mode 100644 src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp create mode 100644 src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h create mode 100644 src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp create mode 100644 src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h diff --git a/src/Storages/RabbitMQ/Buffer_fwd.h b/src/Storages/RabbitMQ/Buffer_fwd.h index f0ef010c518..5be2c6fdf6a 100644 --- a/src/Storages/RabbitMQ/Buffer_fwd.h +++ b/src/Storages/RabbitMQ/Buffer_fwd.h @@ -8,4 +8,7 @@ namespace DB class ReadBufferFromRabbitMQConsumer; using ConsumerBufferPtr = std::shared_ptr; +class WriteBufferToRabbitMQProducer; +using ProducerBufferPtr = std::shared_ptr; + } diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp new file mode 100644 index 00000000000..3f940891c23 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern int CANNOT_CREATE_IO_BUFFER; +} + + +RabbitMQBlockOutputStream::RabbitMQBlockOutputStream( + StorageRabbitMQ & storage_, const Context & context_) : storage(storage_), context(context_) +{ +} + + +Block RabbitMQBlockOutputStream::getHeader() const +{ + return storage.getSampleBlockNonMaterialized(); +} + + +void RabbitMQBlockOutputStream::writePrefix() +{ + buffer = storage.createWriteBuffer(); + if (!buffer) + throw Exception("Failed to create RabbitMQ producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER); + + child = FormatFactory::instance().getOutput( + storage.getFormatName(), *buffer, getHeader(), context, [this](const Columns & /* columns */, size_t /* rows */) + { + buffer->count_row(); + }); +} + + +void RabbitMQBlockOutputStream::write(const Block & block) +{ + child->write(block); + + if (buffer) + buffer->flush(); +} + + +void RabbitMQBlockOutputStream::writeSuffix() +{ + child->writeSuffix(); +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h new file mode 100644 index 00000000000..2f7b89a2a30 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class RabbitMQBlockOutputStream : public IBlockOutputStream +{ + +public: + explicit RabbitMQBlockOutputStream(StorageRabbitMQ & storage_, const Context & context_); + + Block getHeader() const override; + + void writePrefix() override; + void write(const Block & block) override; + void writeSuffix() override; + +private: + StorageRabbitMQ & storage; + Context context; + ProducerBufferPtr buffer; + BlockOutputStreamPtr child; +}; +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index fb705e4d1bc..ee5dede5261 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -124,6 +126,12 @@ Pipes StorageRabbitMQ::read( } +BlockOutputStreamPtr StorageRabbitMQ::write(const ASTPtr &, const Context & context) +{ + return std::make_shared(*this, context); +} + + void StorageRabbitMQ::startup() { for (size_t i = 0; i < num_consumers; ++i) @@ -205,6 +213,14 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() } +ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() +{ + return std::make_shared(parsed_address, routing_key, exchange_name, + log, num_consumers, bind_by_id, hash_exchange, + row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); +} + + bool StorageRabbitMQ::checkDependencies(const StorageID & table_id) { // Check if all dependencies are attached diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index fc098b168f1..5aa77a9a732 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -25,6 +25,7 @@ public: std::string getName() const override { return "RabbitMQ"; } bool supportsSettings() const override { return true; } + bool noPushingToViews() const override { return true; } void startup() override; void shutdown() override; @@ -37,10 +38,16 @@ public: size_t max_block_size, unsigned num_streams) override; + BlockOutputStreamPtr write( + const ASTPtr & query, + const Context & context) override; + void pushReadBuffer(ConsumerBufferPtr buf); ConsumerBufferPtr popReadBuffer(); ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + ProducerBufferPtr createWriteBuffer(); + const String & getExchangeName() const { return exchange_name; } const String & getRoutingKey() const { return routing_key; } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp new file mode 100644 index 00000000000..529cc5bd93b --- /dev/null +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -0,0 +1,169 @@ +#include +#include "Core/Block.h" +#include "Columns/ColumnString.h" +#include "Columns/ColumnsNumber.h" +#include +#include +#include +#include + + +namespace DB +{ + +enum +{ + Connection_setup_sleep = 200, + Connection_setup_retries_max = 1000, + Buffer_limit_to_flush = 50000 +}; + +WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( + std::pair & parsed_address, + const String & routing_key_, + const String & exchange_, + Poco::Logger * log_, + const size_t num_queues_, + const bool bind_by_id_, + const bool hash_exchange_, + std::optional delimiter, + size_t rows_per_message, + size_t chunk_size_) + : WriteBuffer(nullptr, 0) + , routing_key(routing_key_) + , exchange_name(exchange_) + , log(log_) + , num_queues(num_queues_) + , bind_by_id(bind_by_id_) + , hash_exchange(hash_exchange_) + , delim(delimiter) + , max_rows(rows_per_message) + , chunk_size(chunk_size_) + , producerEvbase(event_base_new()) + , eventHandler(producerEvbase, log) + , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) +{ + /* The reason behind making a separate connection for each concurrent producer is explained here: + * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 + * - publishing from different threads (as outputStreams are asynchronous) leads to internal libary errors. + */ + size_t cnt_retries = 0; + while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) + { + event_base_loop(producerEvbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); + } + + if (!connection.ready()) + { + LOG_ERROR(log, "Cannot set up connection for producer!"); + } + + producer_channel = std::make_shared(&connection); +} + + +WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() +{ + flush(); + connection.close(); + + assert(rows == 0 && chunks.empty()); +} + + +void WriteBufferToRabbitMQProducer::count_row() +{ + if (++rows % max_rows == 0) + { + const std::string & last_chunk = chunks.back(); + size_t last_chunk_size = offset(); + + if (delim && last_chunk[last_chunk_size - 1] == delim) + --last_chunk_size; + + std::string payload; + payload.reserve((chunks.size() - 1) * chunk_size + last_chunk_size); + + for (auto i = chunks.begin(), e = --chunks.end(); i != e; ++i) + payload.append(*i); + + payload.append(last_chunk, 0, last_chunk_size); + + rows = 0; + chunks.clear(); + set(nullptr, 0); + + messages.emplace_back(payload); + ++message_counter; + + if (messages.size() >= Buffer_limit_to_flush) + { + flush(); + } + } +} + + +void WriteBufferToRabbitMQProducer::flush() +{ + /* Why accumulating payloads and not publishing each of them at once in count_row()? Because publishing needs to + * be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it each time + * we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to + * exchange becoming inactive at some point and part of messages is lost as a result. + */ + std::atomic exchange_declared = false, exchange_error = false; + + producer_channel->declareExchange(exchange_name + "_direct", AMQP::direct, AMQP::passive) + .onSuccess([&]() + { + for (auto & payload : messages) + { + if (!message_counter) + return; + + next_queue = next_queue % num_queues + 1; + + if (bind_by_id || hash_exchange) + { + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + } + else + { + producer_channel->publish(exchange_name, routing_key, payload); + } + + --message_counter; + } + + exchange_declared = true; + messages.clear(); + }) + .onError([&](const char * message) + { + exchange_error = true; + exchange_declared = false; + LOG_ERROR(log, "Exchange was not declared: {}", message); + }); + + while (!exchange_declared && !exchange_error) + { + startEventLoop(exchange_declared); + } +} + + +void WriteBufferToRabbitMQProducer::nextImpl() +{ + chunks.push_back(std::string()); + chunks.back().resize(chunk_size); + set(chunks.back().data(), chunk_size); +} + + +void WriteBufferToRabbitMQProducer::startEventLoop(std::atomic & check_param) +{ + eventHandler.start(check_param); +} + +} diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h new file mode 100644 index 00000000000..d7a1715d491 --- /dev/null +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -0,0 +1,67 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +using ProducerPtr = std::shared_ptr; +using Messages = std::vector; + +class WriteBufferToRabbitMQProducer : public WriteBuffer +{ +public: + WriteBufferToRabbitMQProducer( + std::pair & parsed_address, + const String & routing_key_, + const String & exchange_, + Poco::Logger * log_, + const size_t num_queues_, + const bool bind_by_id_, + const bool hash_exchange_, + std::optional delimiter, + size_t rows_per_message, + size_t chunk_size_ + ); + + ~WriteBufferToRabbitMQProducer() override; + + void count_row(); + void flush(); + +private: + void nextImpl() override; + void checkExchange(); + void startEventLoop(std::atomic & check_param); + + const String routing_key; + const String exchange_name; + const bool bind_by_id; + const bool hash_exchange; + const size_t num_queues; + + event_base * producerEvbase; + RabbitMQHandler eventHandler; + AMQP::TcpConnection connection; + ProducerPtr producer_channel; + + size_t next_queue = 0; + UInt64 message_counter = 0; + String channel_id; + + Messages messages; + + Poco::Logger * log; + const std::optional delim; + const size_t max_rows; + const size_t chunk_size; + size_t count_mes = 0; + size_t rows = 0; + std::list chunks; +}; + +} From 5939422b85c44038f2345b99810d95ed5bb090a3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jun 2020 16:19:59 +0000 Subject: [PATCH 014/330] Add tests for insert part --- .../integration/test_storage_rabbitmq/test.py | 234 ++++++++++++++++++ 1 file changed, 234 insertions(+) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 821c5a19e68..0533dd7e2f4 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -838,6 +838,240 @@ def test_rabbitmq_read_only_combo(rabbitmq_cluster): assert int(result) == messages_num * threads_num * NUM_MV, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(180) +def test_rabbitmq_insert(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'insert1', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + consumer_connection = pika.BlockingConnection(parameters) + + consumer = consumer_connection.channel() + consumer.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + result = consumer.queue_declare(queue='') + queue_name = result.method.queue + consumer.queue_bind(exchange='clickhouse-exchange', queue=queue_name, routing_key='insert1') + + values = [] + for i in range(50): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + insert_messages = [] + def onReceived(channel, method, properties, body): + i = 0 + insert_messages.append(body.decode()) + if (len(insert_messages) == 50): + channel.stop_consuming() + + consumer.basic_qos(prefetch_count=50) + consumer.basic_consume(onReceived, queue_name) + consumer.start_consuming() + consumer_connection.close() + + result = '\n'.join(insert_messages) + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(240) +def test_rabbitmq_many_inserts(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.rabbitmq_many; + DROP TABLE IF EXISTS test.view_many; + DROP TABLE IF EXISTS test.consumer_many; + CREATE TABLE test.rabbitmq_many (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'insert2', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_many (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS + SELECT * FROM test.rabbitmq_many; + ''') + + messages_num = 1000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_many VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 20 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_many') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE test.consumer_many; + DROP TABLE test.view_many; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(240) +def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view_sharding; + DROP TABLE IF EXISTS test.consumer_sharding; + CREATE TABLE test.rabbitmq_sharding (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_sharding (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer_sharding TO test.view_sharding AS + SELECT * FROM test.rabbitmq_sharding; + ''') + + messages_num = 10000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_sharding VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 20 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_sharding') + time.sleep(1) + print result + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE test.consumer_sharding; + DROP TABLE test.view_sharding; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_overloaded_insert(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view_overload; + DROP TABLE IF EXISTS test.consumer_overload; + CREATE TABLE test.rabbitmq_overload (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 10, + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_overload (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS + SELECT * FROM test.rabbitmq_overload; + ''') + + messages_num = 100000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_overload VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 5 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_overload') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE test.consumer_overload; + DROP TABLE test.view_overload; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From 386dc4d95ee269c289fa338bedc4f9cb1d0b9149 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jun 2020 16:56:16 +0000 Subject: [PATCH 015/330] Fixes --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 1 - src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 8 ++------ src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 09398da73c7..547851f349a 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -44,7 +44,6 @@ void RabbitMQHandler::start(std::atomic & check_param) void RabbitMQHandler::stop() { - std::lock_guard lock(mutex); event_base_loopbreak(evbase); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index d6372dfe4d3..27c5ab800f0 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -207,9 +207,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (row_delimiter != '\0') message_received += row_delimiter; - /* Needed because this vector can be used at the same time by another thread in nextImpl() (below). - * So we lock mutex here and there so that they do not use it asynchronosly. - */ + /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl() (below). std::lock_guard lock(mutex); received.push_back(message_received); } @@ -255,9 +253,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() messages.clear(); - /* Needed because this vector can be used at the same time by another thread in onReceived callback (above). - * So we lock mutex here and there so that they do not use it asynchronosly. - */ + /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback (above). std::lock_guard lock(mutex); messages.swap(received); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 529cc5bd93b..12d6c2b0fb8 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -15,7 +15,7 @@ enum { Connection_setup_sleep = 200, Connection_setup_retries_max = 1000, - Buffer_limit_to_flush = 50000 + Buffer_limit_to_flush = 10000 /// It is important to keep it low in order not to kill consumers }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( From 806fd2739567562f62fae565fea980bdcaea051b Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jun 2020 20:48:24 +0000 Subject: [PATCH 016/330] Fix build & fix style & fix --- src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 3 ++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 13 +++++++------ 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 3f940891c23..0858e2101df 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -10,7 +10,7 @@ namespace DB namespace ErrorCodes { - extern int CANNOT_CREATE_IO_BUFFER; + extern const int CANNOT_CREATE_IO_BUFFER; } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 547851f349a..6308e2e0089 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -15,7 +15,8 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) { LOG_ERROR(log, "Library error report: {}", message); - if (!connection->ready()) + + if (!connection->usable() || !connection->ready()) { std::cerr << "Connection lost, no recovery is possible"; throw; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index ee5dede5261..147d3ba2115 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -72,7 +72,7 @@ StorageRabbitMQ::StorageRabbitMQ( , num_consumers(num_consumers_) , num_queues(num_queues_) , hash_exchange(hash_exchange_) - , log(&Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) + , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) , evbase(event_base_new()) diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 12d6c2b0fb8..73434bc0ea6 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -15,7 +16,7 @@ enum { Connection_setup_sleep = 200, Connection_setup_retries_max = 1000, - Buffer_limit_to_flush = 10000 /// It is important to keep it low in order not to kill consumers + Buffer_limit_to_flush = 5000 /// It is important to keep it low in order not to kill consumers }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( @@ -44,8 +45,8 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) { /* The reason behind making a separate connection for each concurrent producer is explained here: - * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - * - publishing from different threads (as outputStreams are asynchronous) leads to internal libary errors. + * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - publishing from + * different threads (as outputStreams are asynchronous) with the same connection leads to internal libary errors. */ size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) @@ -107,9 +108,9 @@ void WriteBufferToRabbitMQProducer::count_row() void WriteBufferToRabbitMQProducer::flush() { - /* Why accumulating payloads and not publishing each of them at once in count_row()? Because publishing needs to - * be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it each time - * we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to + /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing + * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it + * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to * exchange becoming inactive at some point and part of messages is lost as a result. */ std::atomic exchange_declared = false, exchange_error = false; From 786874e86754a60f711ed2fc93399edf51eb4f35 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jun 2020 13:15:53 +0000 Subject: [PATCH 017/330] Better publish & some fixes --- .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../WriteBufferToRabbitMQProducer.cpp | 70 +++++++++++++++---- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 7 +- 6 files changed, 69 insertions(+), 16 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 0858e2101df..17e4db3fb89 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -35,7 +35,7 @@ void RabbitMQBlockOutputStream::writePrefix() child = FormatFactory::instance().getOutput( storage.getFormatName(), *buffer, getHeader(), context, [this](const Columns & /* columns */, size_t /* rows */) { - buffer->count_row(); + buffer->countRow(); }); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 6308e2e0089..775db87a1f8 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -16,7 +16,7 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes { LOG_ERROR(log, "Library error report: {}", message); - if (!connection->usable() || !connection->ready()) + if (connection->closed()) { std::cerr << "Connection lost, no recovery is possible"; throw; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index a70b08aba55..117f80d26f8 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -1,6 +1,8 @@ #pragma once +#include #include +#include #include #include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 147d3ba2115..7cbfb164a2d 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -216,7 +216,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { return std::make_shared(parsed_address, routing_key, exchange_name, - log, num_consumers, bind_by_id, hash_exchange, + log, num_consumers * num_queues, bind_by_id, hash_exchange, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 73434bc0ea6..86d3b32925a 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -46,7 +46,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( { /* The reason behind making a separate connection for each concurrent producer is explained here: * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - publishing from - * different threads (as outputStreams are asynchronous) with the same connection leads to internal libary errors. + * different threads (as outputStreams are asynchronous) with the same connection leads to internal library errors. */ size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) @@ -73,7 +73,7 @@ WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() } -void WriteBufferToRabbitMQProducer::count_row() +void WriteBufferToRabbitMQProducer::countRow() { if (++rows % max_rows == 0) { @@ -108,42 +108,88 @@ void WriteBufferToRabbitMQProducer::count_row() void WriteBufferToRabbitMQProducer::flush() { - /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing - * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it - * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to - * exchange becoming inactive at some point and part of messages is lost as a result. - */ std::atomic exchange_declared = false, exchange_error = false; + /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name + * and makes it visible from current producer_channel. + */ + producer_channel->declareExchange(exchange_name + "_direct", AMQP::direct, AMQP::passive) .onSuccess([&]() { + exchange_declared = true; + + /// The case that should not normally happen: message was not delivered to queue (queue ttl exceeded) / not forwareded to consumer + if (flush_returned) + { + /// Needed to avoid data race because two different threads may access this vector + std::lock_guard lock(mutex); + + LOG_TRACE(log, "Redelivering returned messages"); + for (auto & payload : returned) + { + next_queue = next_queue % num_queues + 1; + + if (bind_by_id || hash_exchange) + { + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + } + else + { + producer_channel->publish(exchange_name, routing_key, payload); + } + + --message_counter; + } + + returned.clear(); + } + + /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing + * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it + * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to + * exchange becoming inactive at some point and part of messages is lost as a result. + */ for (auto & payload : messages) { if (!message_counter) - return; + break; next_queue = next_queue % num_queues + 1; if (bind_by_id || hash_exchange) { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + producer_channel->publish(exchange_name, std::to_string(next_queue), payload, AMQP::mandatory || AMQP::immediate) + .onReturned([&](const AMQP::Message & message, int16_t /* code */, const std::string & /* description */) + { + flush_returned = true; + + /// Needed to avoid data race because two different threads may access this variable + std::lock_guard lock(mutex); + returned.emplace_back(std::string(message.body(), message.body() + message.bodySize())); + }); } else { - producer_channel->publish(exchange_name, routing_key, payload); + producer_channel->publish(exchange_name, routing_key, payload, AMQP::mandatory || AMQP::immediate) + .onReturned([&](const AMQP::Message & message, int16_t /* code */, const std::string & /* description */) + { + flush_returned = true; + + /// Needed to avoid data race because two different threads may access this vector + std::lock_guard lock(mutex); + returned.emplace_back(std::string(message.body(), message.body() + message.bodySize())); + }); } --message_counter; } - exchange_declared = true; messages.clear(); }) .onError([&](const char * message) { exchange_error = true; - exchange_declared = false; LOG_ERROR(log, "Exchange was not declared: {}", message); }); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index d7a1715d491..146be0c5796 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -30,7 +32,7 @@ public: ~WriteBufferToRabbitMQProducer() override; - void count_row(); + void countRow(); void flush(); private: @@ -52,8 +54,11 @@ private: size_t next_queue = 0; UInt64 message_counter = 0; String channel_id; + std::atomic flush_returned = false; + std::mutex mutex; Messages messages; + Messages returned; Poco::Logger * log; const std::optional delim; From 5624066195f94f78a876926ff88db0f1aad4ff72 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Jun 2020 06:14:09 +0000 Subject: [PATCH 018/330] Fix producer --- .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 2 + src/Storages/RabbitMQ/StorageRabbitMQ.h | 4 +- .../WriteBufferToRabbitMQProducer.cpp | 49 ++----------------- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 3 -- 4 files changed, 8 insertions(+), 50 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 17e4db3fb89..8e867db6de9 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -46,6 +46,8 @@ void RabbitMQBlockOutputStream::write(const Block & block) if (buffer) buffer->flush(); + + storage.pingConnection(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 5aa77a9a732..635d53e6cf0 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -54,6 +54,8 @@ public: const String & getFormatName() const { return format_name; } NamesAndTypesList getVirtuals() const override; + const void pingConnection() { connection.heartbeat(); } + protected: StorageRabbitMQ( const StorageID & table_id_, @@ -88,7 +90,7 @@ private: event_base * evbase; RabbitMQHandler eventHandler; - AMQP::TcpConnection connection; + AMQP::TcpConnection connection; /// Connection for all consumers Poco::Semaphore semaphore; std::mutex mutex; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 86d3b32925a..e61a8e1ccd8 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -16,7 +16,7 @@ enum { Connection_setup_sleep = 200, Connection_setup_retries_max = 1000, - Buffer_limit_to_flush = 5000 /// It is important to keep it low in order not to kill consumers + Buffer_limit_to_flush = 10000 /// It is important to keep it low in order not to kill consumers }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( @@ -113,38 +113,11 @@ void WriteBufferToRabbitMQProducer::flush() /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name * and makes it visible from current producer_channel. */ - producer_channel->declareExchange(exchange_name + "_direct", AMQP::direct, AMQP::passive) .onSuccess([&]() { exchange_declared = true; - /// The case that should not normally happen: message was not delivered to queue (queue ttl exceeded) / not forwareded to consumer - if (flush_returned) - { - /// Needed to avoid data race because two different threads may access this vector - std::lock_guard lock(mutex); - - LOG_TRACE(log, "Redelivering returned messages"); - for (auto & payload : returned) - { - next_queue = next_queue % num_queues + 1; - - if (bind_by_id || hash_exchange) - { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload); - } - else - { - producer_channel->publish(exchange_name, routing_key, payload); - } - - --message_counter; - } - - returned.clear(); - } - /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to @@ -159,27 +132,11 @@ void WriteBufferToRabbitMQProducer::flush() if (bind_by_id || hash_exchange) { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload, AMQP::mandatory || AMQP::immediate) - .onReturned([&](const AMQP::Message & message, int16_t /* code */, const std::string & /* description */) - { - flush_returned = true; - - /// Needed to avoid data race because two different threads may access this variable - std::lock_guard lock(mutex); - returned.emplace_back(std::string(message.body(), message.body() + message.bodySize())); - }); + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); } else { - producer_channel->publish(exchange_name, routing_key, payload, AMQP::mandatory || AMQP::immediate) - .onReturned([&](const AMQP::Message & message, int16_t /* code */, const std::string & /* description */) - { - flush_returned = true; - - /// Needed to avoid data race because two different threads may access this vector - std::lock_guard lock(mutex); - returned.emplace_back(std::string(message.body(), message.body() + message.bodySize())); - }); + producer_channel->publish(exchange_name, routing_key, payload); } --message_counter; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 146be0c5796..c61a76a3e74 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -54,11 +54,8 @@ private: size_t next_queue = 0; UInt64 message_counter = 0; String channel_id; - std::atomic flush_returned = false; - std::mutex mutex; Messages messages; - Messages returned; Poco::Logger * log; const std::optional delim; From 972611e31b3c6f1ad18f94898372590eafd8e509 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Jun 2020 06:22:53 +0000 Subject: [PATCH 019/330] Fix consumer --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 54 +++++++++++++------ src/Storages/RabbitMQ/RabbitMQHandler.h | 4 +- .../ReadBufferFromRabbitMQConsumer.cpp | 50 +++++++++++++++-- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 3 ++ 4 files changed, 91 insertions(+), 20 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 775db87a1f8..1a3ede79420 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -4,6 +4,13 @@ namespace DB { +enum +{ + Lock_timeout = 50, + Max_threads_to_pass = 10 +}; + + RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : LibEventHandler(evbase_), evbase(evbase_), @@ -16,10 +23,9 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes { LOG_ERROR(log, "Library error report: {}", message); - if (connection->closed()) + if (!connection->usable() || !connection->ready()) { - std::cerr << "Connection lost, no recovery is possible"; - throw; + LOG_ERROR(log, "Connection lost completely"); } stop(); @@ -28,24 +34,42 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes void RabbitMQHandler::start(std::atomic & check_param) { - /* The object of this class is shared between concurrent consumers, who call this method repeatedly at the same time. - * But the loop should not be attempted to start if it is already running. Also note that the loop is blocking to - * the thread that has started it. + /* The object of this class is shared between concurrent consumers (who share the same connection == share the same + * event loop). But the loop should not be attempted to start if it is already running. */ - std::lock_guard lock(mutex); + if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) + { + /* The callback, which changes this variable, could have already been activated by another thread while we waited + * for the mutex to unlock (as it runs all active events on the connection). This means that there is no need to + * start event loop again. + */ + if (!check_param) + { + event_base_loop(evbase, EVLOOP_NONBLOCK); + } - /* The callback, which changes this variable, could have already been activated by another thread while we waited for the - * mutex to unlock (as it runs all active events on the connection). This means that there is no need to start event loop again. - */ - if (check_param) - return; - - event_base_loop(evbase, EVLOOP_NONBLOCK); + mutex_before_event_loop.unlock(); + } + else + { + if (++count_passed == Max_threads_to_pass) + { + /* Event loop is blocking to the thread that started it and it is not good to block one single thread as it loops + * untill there are no active events, but there can be too many of them for one thread to be blocked for so long. + */ + stop(); + count_passed = 0; + } + } } void RabbitMQHandler::stop() { - event_base_loopbreak(evbase); + if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) + { + event_base_loopbreak(evbase); + mutex_before_loop_stop.unlock(); + } } } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 117f80d26f8..39fccd4dace 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -26,7 +26,9 @@ private: event_base * evbase; Poco::Logger * log; - std::mutex mutex; + size_t count_passed = 0; + std::timed_mutex mutex_before_event_loop; + std::timed_mutex mutex_before_loop_stop; }; } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 27c5ab800f0..f8259ce8c4c 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -13,6 +13,11 @@ namespace DB { +enum +{ + Received_max_to_stop_loop = 10000 // Explained below +}; + ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, @@ -117,7 +122,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) std::atomic bindings_created = false, bindings_error = false; - consumer_channel->declareQueue(AMQP::exclusive) + consumer_channel->declareQueue(AMQP::durable) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) { queues.emplace_back(queue_name_); @@ -145,6 +150,12 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onSuccess([&] { bindings_created = true; + + /// Unblock current thread so that it does not continue to execute all callbacks on the connection + if (++count_bound_queues == num_queues) + { + stopEventLoop(); + } }) .onError([&](const char * message) { @@ -196,6 +207,12 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) consumer_created = true; LOG_TRACE(log, "Consumer " + std::to_string(channel_id) + " is subscribed to queue " + queue_name); + + /// Unblock current thread so that it does not continue to execute all callbacks on the connection + if (++count_subscribed == queues.size()) + { + stopEventLoop(); + } }) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) { @@ -207,15 +224,34 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (row_delimiter != '\0') message_received += row_delimiter; + //LOG_TRACE(log, "Consumer {} received a message", channel_id); + + bool stop_loop = false; + /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl() (below). - std::lock_guard lock(mutex); - received.push_back(message_received); + { + std::lock_guard lock(mutex); + received.push_back(message_received); + + /* As event loop is blocking to the thread that started it and a single thread should not be blocked while + * executing all callbacks on the connection (not only its own), then there should be some point to unblock + */ + if (received.size() >= Received_max_to_stop_loop) + { + stop_loop = true; + } + } + + if (stop_loop) + { + stopEventLoop(); + } } }) .onError([&](const char * message) { consumer_error = true; - LOG_ERROR(log, "Consumer failed: {}", message); + LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); }); while (!consumer_created && !consumer_error) @@ -226,6 +262,12 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) } +void ReadBufferFromRabbitMQConsumer::stopEventLoop() +{ + eventHandler.stop(); +} + + void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param) { eventHandler.start(check_param); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 31babc5033f..55adb39bdce 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -64,6 +64,8 @@ private: Queues queues; bool subscribed = false; String current_exchange_name; + size_t count_subscribed = 0; + size_t count_bound_queues = 0; Messages received; Messages messages; @@ -77,6 +79,7 @@ private: void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); void startEventLoop(std::atomic & check_param); + void stopEventLoop(); }; } From 68b94c5c20fc1ed1d222ee14d096991f2fdca705 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 5 Jun 2020 13:42:11 +0000 Subject: [PATCH 020/330] Fixes --- programs/server/config.xml | 3 ++- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 14 +------------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 7 +++++-- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 12 ++++++++---- src/Storages/RabbitMQ/StorageRabbitMQ.h | 3 ++- .../RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 7 +++++-- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 5 ++++- 8 files changed, 28 insertions(+), 24 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 21605edeb36..b39ee180466 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -51,7 +51,8 @@ 8443 9440 --> - + root + clickhouse diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 1a3ede79420..34a77489faa 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -6,8 +6,7 @@ namespace DB enum { - Lock_timeout = 50, - Max_threads_to_pass = 10 + Lock_timeout = 50 }; @@ -50,17 +49,6 @@ void RabbitMQHandler::start(std::atomic & check_param) mutex_before_event_loop.unlock(); } - else - { - if (++count_passed == Max_threads_to_pass) - { - /* Event loop is blocking to the thread that started it and it is not good to block one single thread as it loops - * untill there are no active events, but there can be too many of them for one thread to be blocked for so long. - */ - stop(); - count_passed = 0; - } - } } void RabbitMQHandler::stop() diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index f8259ce8c4c..1bd2c7831ff 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -44,6 +44,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , stopped(stopped_) , exchange_declared(false) , false_param(false) + , loop_attempt(false) { messages.clear(); current = messages.begin(); @@ -225,7 +226,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) message_received += row_delimiter; //LOG_TRACE(log, "Consumer {} received a message", channel_id); - + bool stop_loop = false; /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl() (below). @@ -236,7 +237,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) /* As event loop is blocking to the thread that started it and a single thread should not be blocked while * executing all callbacks on the connection (not only its own), then there should be some point to unblock */ - if (received.size() >= Received_max_to_stop_loop) + if (!loop_attempt && received.size() % Received_max_to_stop_loop == 0) { stop_loop = true; } @@ -284,7 +285,9 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() if (received.empty()) { /// Run the onReceived callbacks to save the messages that have been received by now + loop_attempt = true; startEventLoop(false_param); + loop_attempt = false; } if (received.empty()) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 55adb39bdce..97eca73cece 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -66,6 +66,7 @@ private: String current_exchange_name; size_t count_subscribed = 0; size_t count_bound_queues = 0; + std::atomic loop_attempt; Messages received; Messages messages; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 7cbfb164a2d..481314a38c2 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -74,10 +74,14 @@ StorageRabbitMQ::StorageRabbitMQ( , hash_exchange(hash_exchange_) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) + , login_password(std::make_pair( + rabbitmq_context.getConfigRef().getString("rabbitmq_username", "root"), + rabbitmq_context.getConfigRef().getString("rabbitmq_password", "clickhouse"))) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) , evbase(event_base_new()) , eventHandler(evbase, log) - , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, + AMQP::Login(login_password.first, login_password.second), "/")) { size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) @@ -208,14 +212,14 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); - return std::make_shared(consumer_channel, eventHandler, exchange_name, - routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); + return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_key, + next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - return std::make_shared(parsed_address, routing_key, exchange_name, + return std::make_shared(parsed_address, login_password, routing_key, exchange_name, log, num_consumers * num_queues, bind_by_id, hash_exchange, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 635d53e6cf0..563f37ae6f1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -86,7 +86,8 @@ private: const bool hash_exchange; Poco::Logger * log; - std::pair parsed_address; + std::pair parsed_address; + std::pair login_password; event_base * evbase; RabbitMQHandler eventHandler; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index e61a8e1ccd8..7c0764853c7 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -20,7 +20,8 @@ enum }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( - std::pair & parsed_address, + std::pair & parsed_address, + std::pair & login_password_, const String & routing_key_, const String & exchange_, Poco::Logger * log_, @@ -31,6 +32,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( size_t rows_per_message, size_t chunk_size_) : WriteBuffer(nullptr, 0) + , login_password(login_password_) , routing_key(routing_key_) , exchange_name(exchange_) , log(log_) @@ -42,7 +44,8 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , chunk_size(chunk_size_) , producerEvbase(event_base_new()) , eventHandler(producerEvbase, log) - , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, + AMQP::Login(login_password.first, login_password.second), "/")) { /* The reason behind making a separate connection for each concurrent producer is explained here: * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - publishing from diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index c61a76a3e74..e0c48556239 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -18,7 +19,8 @@ class WriteBufferToRabbitMQProducer : public WriteBuffer { public: WriteBufferToRabbitMQProducer( - std::pair & parsed_address, + std::pair & parsed_address, + std::pair & login_password_, const String & routing_key_, const String & exchange_, Poco::Logger * log_, @@ -40,6 +42,7 @@ private: void checkExchange(); void startEventLoop(std::atomic & check_param); + std::pair & login_password; const String routing_key; const String exchange_name; const bool bind_by_id; From cb618a32b80df2cbaec35264df01754d35e30d6b Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 5 Jun 2020 14:27:56 +0000 Subject: [PATCH 021/330] Fix style --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 4 ++-- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 34a77489faa..95d7e22d434 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -18,7 +18,7 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : } -void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) +void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) { LOG_ERROR(log, "Library error report: {}", message); @@ -44,7 +44,7 @@ void RabbitMQHandler::start(std::atomic & check_param) */ if (!check_param) { - event_base_loop(evbase, EVLOOP_NONBLOCK); + event_base_loop(evbase, EVLOOP_NONBLOCK); } mutex_before_event_loop.unlock(); diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 39fccd4dace..d2d70185128 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -19,7 +19,7 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void start(std::atomic & check_param); + void start(std::atomic & check_param); void stop(); private: diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 563f37ae6f1..111e52768d0 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -63,11 +63,11 @@ protected: const ColumnsDescription & columns_, const String & host_port_, const String & routing_key_, - const String & exchange_name_, + const String & exchange_name_, const String & format_name_, char row_delimiter_, - size_t num_consumers_, - size_t num_queues_, + size_t num_consumers_, + size_t num_queues_, bool hash_exchange); private: From ce448d92916b180b7891ee5e14a8a9e52c703eeb Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 7 Jun 2020 11:14:05 +0000 Subject: [PATCH 022/330] Better event handler --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 25 ++++++- src/Storages/RabbitMQ/RabbitMQHandler.h | 5 +- .../ReadBufferFromRabbitMQConsumer.cpp | 70 +++++++++---------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 11 ++- .../WriteBufferToRabbitMQProducer.cpp | 7 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 2 +- 6 files changed, 70 insertions(+), 50 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 95d7e22d434..d9dc19afa28 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -6,7 +6,8 @@ namespace DB enum { - Lock_timeout = 50 + Lock_timeout = 50, + Loop_stop_timeout = 200 }; @@ -15,6 +16,8 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : evbase(evbase_), log(log_) { + tv.tv_sec = 0; + tv.tv_usec = Loop_stop_timeout; } @@ -31,7 +34,7 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes } -void RabbitMQHandler::start(std::atomic & check_param) +void RabbitMQHandler::startConsumerLoop(std::atomic & check_param, std::atomic & loop_started) { /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop). But the loop should not be attempted to start if it is already running. @@ -44,6 +47,7 @@ void RabbitMQHandler::start(std::atomic & check_param) */ if (!check_param) { + loop_started = true; event_base_loop(evbase, EVLOOP_NONBLOCK); } @@ -51,6 +55,13 @@ void RabbitMQHandler::start(std::atomic & check_param) } } + +void RabbitMQHandler::startProducerLoop() +{ + event_base_loop(evbase, EVLOOP_NONBLOCK); +} + + void RabbitMQHandler::stop() { if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) @@ -60,4 +71,14 @@ void RabbitMQHandler::stop() } } + +void RabbitMQHandler::stopWithTimeout() +{ + if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) + { + event_base_loopexit(evbase, &tv); + mutex_before_loop_stop.unlock(); + } +} + } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index d2d70185128..a8692a845f1 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -19,13 +19,16 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void start(std::atomic & check_param); + void startConsumerLoop(std::atomic & check_param, std::atomic & loop_started); + void startProducerLoop(); + void stopWithTimeout(); void stop(); private: event_base * evbase; Poco::Logger * log; + timeval tv; size_t count_passed = 0; std::timed_mutex mutex_before_event_loop; std::timed_mutex mutex_before_loop_stop; diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 1bd2c7831ff..b650988dd61 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -7,17 +7,13 @@ #include #include #include +#include "Poco/Timer.h" #include namespace DB { -enum -{ - Received_max_to_stop_loop = 10000 // Explained below -}; - ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, @@ -44,7 +40,6 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , stopped(stopped_) , exchange_declared(false) , false_param(false) - , loop_attempt(false) { messages.clear(); current = messages.begin(); @@ -112,7 +107,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { - /* This varibale can be updated from a different thread in case of some error so its better to always check + /* This varibale can be updated from a different thread in case of some error so its better to check * whether exchange is in a working state and if not - declare it once again. */ if (!exchange_declared) @@ -123,7 +118,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) std::atomic bindings_created = false, bindings_error = false; - consumer_channel->declareQueue(AMQP::durable) + consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) { queues.emplace_back(queue_name_); @@ -151,12 +146,6 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onSuccess([&] { bindings_created = true; - - /// Unblock current thread so that it does not continue to execute all callbacks on the connection - if (++count_bound_queues == num_queues) - { - stopEventLoop(); - } }) .onError([&](const char * message) { @@ -176,8 +165,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) */ while (!bindings_created && !bindings_error) { - /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events - startEventLoop(bindings_created); + startEventLoop(bindings_created, loop_started); } } @@ -187,7 +175,7 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() if (subscribed) return; - LOG_TRACE(log, "Subscribing to " + std::to_string(queues.size()) + " queues"); + LOG_TRACE(log, "Subscribing {} to {} queues", channel_id, queues.size()); for (auto & queue : queues) { @@ -200,17 +188,19 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) { - std::atomic consumer_created = false, consumer_error = false; + std::atomic consumer_created = false, consumer_failed = false; consumer_channel->consume(queue_name, AMQP::noack) .onSuccess([&](const std::string & /* consumer */) { consumer_created = true; + LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); - LOG_TRACE(log, "Consumer " + std::to_string(channel_id) + " is subscribed to queue " + queue_name); - - /// Unblock current thread so that it does not continue to execute all callbacks on the connection - if (++count_subscribed == queues.size()) + /* Unblock current thread if it is looping (any consumer could start the loop and only one of them) so that it does not + * continue to execute all active callbacks on the connection (=> one looping consumer will not be blocked for too + * long and events will be distributed between them) + */ + if (loop_started && ++count_subscribed == queues.size()) { stopEventLoop(); } @@ -223,9 +213,9 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) String message_received = std::string(message.body(), message.body() + message_size); if (row_delimiter != '\0') + { message_received += row_delimiter; - - //LOG_TRACE(log, "Consumer {} received a message", channel_id); + } bool stop_loop = false; @@ -235,9 +225,10 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) received.push_back(message_received); /* As event loop is blocking to the thread that started it and a single thread should not be blocked while - * executing all callbacks on the connection (not only its own), then there should be some point to unblock + * executing all callbacks on the connection (not only its own), then there should be some point to unblock. + * loop_started == 1 if current consumer is started the loop and not another. */ - if (!loop_attempt && received.size() % Received_max_to_stop_loop == 0) + if (!loop_started) { stop_loop = true; } @@ -245,20 +236,20 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (stop_loop) { - stopEventLoop(); + stopEventLoopWithTimeout(); } } }) .onError([&](const char * message) { - consumer_error = true; + consumer_failed = true; LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); }); - while (!consumer_created && !consumer_error) + /// These variables are updated in a separate thread. + while (!consumer_created && !consumer_failed) { - /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events - startEventLoop(consumer_created); + startEventLoop(consumer_created, loop_started); } } @@ -269,9 +260,15 @@ void ReadBufferFromRabbitMQConsumer::stopEventLoop() } -void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param) +void ReadBufferFromRabbitMQConsumer::stopEventLoopWithTimeout() { - eventHandler.start(check_param); + eventHandler.stopWithTimeout(); +} + + +void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param, std::atomic & loop_started) +{ + eventHandler.startConsumerLoop(check_param, loop_started); } @@ -284,10 +281,9 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { if (received.empty()) { - /// Run the onReceived callbacks to save the messages that have been received by now - loop_attempt = true; - startEventLoop(false_param); - loop_attempt = false; + /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread + startEventLoop(false_param, loop_started); + loop_started = false; } if (received.empty()) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 97eca73cece..2341c94443f 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -41,7 +41,6 @@ public: private: using Messages = std::vector; - using Queues = std::vector; ChannelPtr consumer_channel; RabbitMQHandler & eventHandler; @@ -51,6 +50,7 @@ private: const size_t channel_id; const bool bind_by_id; const bool hash_exchange; + const size_t num_queues; Poco::Logger * log; char row_delimiter; @@ -60,14 +60,12 @@ private: std::atomic exchange_declared; std::atomic false_param; - const size_t num_queues; - Queues queues; bool subscribed = false; String current_exchange_name; size_t count_subscribed = 0; - size_t count_bound_queues = 0; - std::atomic loop_attempt; + std::atomic loop_started; + std::vector queues; Messages received; Messages messages; Messages::iterator current; @@ -79,7 +77,8 @@ private: void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startEventLoop(std::atomic & check_param); + void startEventLoop(std::atomic & check_param, std::atomic & loop_started); + void stopEventLoopWithTimeout(); void stopEventLoop(); }; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 7c0764853c7..31c3dea97aa 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -153,9 +153,10 @@ void WriteBufferToRabbitMQProducer::flush() LOG_ERROR(log, "Exchange was not declared: {}", message); }); + /// These variables are updated in a separate thread and starting the loop blocks current thread while (!exchange_declared && !exchange_error) { - startEventLoop(exchange_declared); + startEventLoop(); } } @@ -168,9 +169,9 @@ void WriteBufferToRabbitMQProducer::nextImpl() } -void WriteBufferToRabbitMQProducer::startEventLoop(std::atomic & check_param) +void WriteBufferToRabbitMQProducer::startEventLoop() { - eventHandler.start(check_param); + eventHandler.startProducerLoop(); } } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index e0c48556239..9ae3893d6ae 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -40,7 +40,7 @@ public: private: void nextImpl() override; void checkExchange(); - void startEventLoop(std::atomic & check_param); + void startEventLoop(); std::pair & login_password; const String routing_key; From 17e7cc03c0e090bc045a8b0d5ee720e7d8df8ca2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 8 Jun 2020 01:11:48 +0000 Subject: [PATCH 023/330] Add consumer failure handler --- .../ReadBufferFromRabbitMQConsumer.cpp | 50 ++++++++++++++++--- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 16 ++++-- .../integration/test_storage_rabbitmq/test.py | 25 +++++++--- 3 files changed, 72 insertions(+), 19 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index b650988dd61..5d649ab2084 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -10,10 +10,14 @@ #include "Poco/Timer.h" #include - namespace DB { +enum +{ + Loop_retries_limit = 500 +}; + ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, @@ -38,8 +42,6 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , hash_exchange(hash_exchange_) , num_queues(num_queues_) , stopped(stopped_) - , exchange_declared(false) - , false_param(false) { messages.clear(); current = messages.begin(); @@ -122,6 +124,8 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) { queues.emplace_back(queue_name_); + subscribed_queue[queue_name_] = false; + String binding_key = routing_key; /* Every consumer has at least one unique queue. Bind the queues to exchange based on the consumer_channel_id @@ -175,34 +179,43 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() if (subscribed) return; - LOG_TRACE(log, "Subscribing {} to {} queues", channel_id, queues.size()); - for (auto & queue : queues) { subscribe(queue); } - subscribed = true; + LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); + + if (count_subscribed == queues.size()) + { + subscribed = true; + } } void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) { - std::atomic consumer_created = false, consumer_failed = false; + if (subscribed_queue[queue_name]) + return; + + consumer_created = false, consumer_failed = false; consumer_channel->consume(queue_name, AMQP::noack) .onSuccess([&](const std::string & /* consumer */) { consumer_created = true; + ++count_subscribed; + LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); /* Unblock current thread if it is looping (any consumer could start the loop and only one of them) so that it does not * continue to execute all active callbacks on the connection (=> one looping consumer will not be blocked for too * long and events will be distributed between them) */ - if (loop_started && ++count_subscribed == queues.size()) + if (loop_started && count_subscribed == queues.size()) { stopEventLoop(); + subscribed = true; } }) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) @@ -246,10 +259,31 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); }); + size_t cnt_retries = 0; + /// These variables are updated in a separate thread. while (!consumer_created && !consumer_failed) { startEventLoop(consumer_created, loop_started); + + if (!consumer_created && !consumer_failed) + { + if (cnt_retries >= Loop_retries_limit) + { + /* For unknown reason there is a case when subscribtion may fail and OnError callback is not activated + * for a long time. In this case there should be resubscription. + */ + LOG_ERROR(log, "Consumer {} failed to subscride to queue {}", channel_id, queue_name); + break; + } + + ++cnt_retries; + } + } + + if (consumer_created) + { + subscribed_queue[queue_name] = true; } } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 2341c94443f..9e0b29307c4 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -58,17 +58,23 @@ private: bool allowed = true; const std::atomic & stopped; - std::atomic exchange_declared; - std::atomic false_param; - bool subscribed = false; String current_exchange_name; - size_t count_subscribed = 0; - std::atomic loop_started; + + /* Note: as all concurrent consumers share the same connection => they also share the same + * event loop, which can be started by any consumer and the loop is blocking only to the thread that + * started it, and the loop executes ALL active callbacks on the connection => in case num_consumers > 1, + * at most two threads will be present: main thread and the one that executes callbacks (1 thread if + * main thread is the one that started the loop). Both reference these variables. + */ + std::atomic exchange_declared = false, subscribed = false, loop_started = false, false_param = false; + std::atomic consumer_created = false, consumer_failed = false; + std::atomic count_subscribed = 0; std::vector queues; Messages received; Messages messages; Messages::iterator current; + std::unordered_map subscribed_queue; std::mutex mutex; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 0533dd7e2f4..d7e991fe7ae 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -145,9 +145,14 @@ def test_rabbitmq_select_from_new_syntax_table(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key='new', body=message) - result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=False) - connection.close() + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + rabbitmq_check_result(result, True) @@ -171,9 +176,14 @@ def test_rabbitmq_select_from_old_syntax_table(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key='old', body=message) - result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) - connection.close() + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + rabbitmq_check_result(result, True) @@ -294,7 +304,11 @@ def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key='tsv', body=message) - result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break connection.close() rabbitmq_check_result(result, True) @@ -997,7 +1011,6 @@ def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view_sharding') time.sleep(1) - print result if int(result) == messages_num * threads_num: break From 897a592ee60b9c4308aa12bc46a2cc3d933a2f75 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 9 Jun 2020 21:52:06 +0000 Subject: [PATCH 024/330] Move subscription --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 14 +-- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- .../ReadBufferFromRabbitMQConsumer.cpp | 94 ++++++------------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 23 +++-- 5 files changed, 44 insertions(+), 91 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 1c6eaf6f2e9..245320008f3 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -50,7 +50,7 @@ void RabbitMQBlockInputStream::readPrefixImpl() if (!buffer || finished) return; - buffer->subscribeConsumer(); + buffer->checkSubscription(); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index d9dc19afa28..0a432e1b5ca 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -34,23 +34,15 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes } -void RabbitMQHandler::startConsumerLoop(std::atomic & check_param, std::atomic & loop_started) +void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) { /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop). But the loop should not be attempted to start if it is already running. */ if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) { - /* The callback, which changes this variable, could have already been activated by another thread while we waited - * for the mutex to unlock (as it runs all active events on the connection). This means that there is no need to - * start event loop again. - */ - if (!check_param) - { - loop_started = true; - event_base_loop(evbase, EVLOOP_NONBLOCK); - } - + loop_started = true; + event_base_loop(evbase, EVLOOP_NONBLOCK); mutex_before_event_loop.unlock(); } } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index a8692a845f1..911651097bb 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -19,7 +19,7 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void startConsumerLoop(std::atomic & check_param, std::atomic & loop_started); + void startConsumerLoop(std::atomic & loop_started); void startProducerLoop(); void stopWithTimeout(); void stop(); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 5d649ab2084..32dcd30e6f5 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -13,10 +13,6 @@ namespace DB { -enum -{ - Loop_retries_limit = 500 -}; ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, @@ -109,9 +105,6 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { - /* This varibale can be updated from a different thread in case of some error so its better to check - * whether exchange is in a working state and if not - declare it once again. - */ if (!exchange_declared) { initExchange(); @@ -144,6 +137,9 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } + /// Must be done here, cannot be done in readPrefix() + subscribe(queues.back()); + LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); consumer_channel->bindQueue(current_exchange_name, queue_name_, binding_key) @@ -169,26 +165,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) */ while (!bindings_created && !bindings_error) { - startEventLoop(bindings_created, loop_started); - } -} - - -void ReadBufferFromRabbitMQConsumer::subscribeConsumer() -{ - if (subscribed) - return; - - for (auto & queue : queues) - { - subscribe(queue); - } - - LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); - - if (count_subscribed == queues.size()) - { - subscribed = true; + startEventLoop(loop_started); } } @@ -198,25 +175,13 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (subscribed_queue[queue_name]) return; - consumer_created = false, consumer_failed = false; - consumer_channel->consume(queue_name, AMQP::noack) .onSuccess([&](const std::string & /* consumer */) { - consumer_created = true; + subscribed_queue[queue_name] = true; ++count_subscribed; LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); - - /* Unblock current thread if it is looping (any consumer could start the loop and only one of them) so that it does not - * continue to execute all active callbacks on the connection (=> one looping consumer will not be blocked for too - * long and events will be distributed between them) - */ - if (loop_started && count_subscribed == queues.size()) - { - stopEventLoop(); - subscribed = true; - } }) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) { @@ -232,7 +197,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) bool stop_loop = false; - /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl() (below). + /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl(). { std::lock_guard lock(mutex); received.push_back(message_received); @@ -255,35 +220,32 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) }) .onError([&](const char * message) { - consumer_failed = true; + consumer_error = true; LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); }); +} - size_t cnt_retries = 0; - /// These variables are updated in a separate thread. - while (!consumer_created && !consumer_failed) +void ReadBufferFromRabbitMQConsumer::checkSubscription() +{ + /// In general this condition will always be true and looping/resubscribing would not happen + if (count_subscribed == num_queues) + return; + + wait_subscribed = num_queues; + + /// These variables are updated in a separate thread + while (count_subscribed != wait_subscribed && !consumer_error) { - startEventLoop(consumer_created, loop_started); - - if (!consumer_created && !consumer_failed) - { - if (cnt_retries >= Loop_retries_limit) - { - /* For unknown reason there is a case when subscribtion may fail and OnError callback is not activated - * for a long time. In this case there should be resubscription. - */ - LOG_ERROR(log, "Consumer {} failed to subscride to queue {}", channel_id, queue_name); - break; - } - - ++cnt_retries; - } + startEventLoop(loop_started); } - if (consumer_created) + LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); + + /// A case that would not normally happen + for (auto & queue : queues) { - subscribed_queue[queue_name] = true; + subscribe(queue); } } @@ -300,9 +262,9 @@ void ReadBufferFromRabbitMQConsumer::stopEventLoopWithTimeout() } -void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param, std::atomic & loop_started) +void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & loop_started) { - eventHandler.startConsumerLoop(check_param, loop_started); + eventHandler.startConsumerLoop(loop_started); } @@ -316,7 +278,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() if (received.empty()) { /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread - startEventLoop(false_param, loop_started); + startEventLoop(loop_started); loop_started = false; } @@ -328,7 +290,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() messages.clear(); - /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback (above). + /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback. std::lock_guard lock(mutex); messages.swap(received); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 9e0b29307c4..7fbc1024d44 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -37,7 +37,7 @@ public: ~ReadBufferFromRabbitMQConsumer() override; void allowNext() { allowed = true; } // Allow to read next message. - void subscribeConsumer(); + void checkSubscription(); private: using Messages = std::vector; @@ -59,16 +59,9 @@ private: const std::atomic & stopped; String current_exchange_name; - - /* Note: as all concurrent consumers share the same connection => they also share the same - * event loop, which can be started by any consumer and the loop is blocking only to the thread that - * started it, and the loop executes ALL active callbacks on the connection => in case num_consumers > 1, - * at most two threads will be present: main thread and the one that executes callbacks (1 thread if - * main thread is the one that started the loop). Both reference these variables. - */ - std::atomic exchange_declared = false, subscribed = false, loop_started = false, false_param = false; - std::atomic consumer_created = false, consumer_failed = false; - std::atomic count_subscribed = 0; + bool exchange_declared = false; + std::atomic loop_started = false, consumer_error = false; + std::atomic count_subscribed = 0, wait_subscribed; std::vector queues; Messages received; @@ -76,6 +69,12 @@ private: Messages::iterator current; std::unordered_map subscribed_queue; + /* Note: as all consumers share the same connection => they also share the same + * event loop, which can be started by any consumer and the loop is blocking only to the thread that + * started it, and the loop executes ALL active callbacks on the connection => in case num_consumers > 1, + * at most two threads will be present: main thread and the one that executes callbacks (1 thread if + * main thread is the one that started the loop). + */ std::mutex mutex; bool nextImpl() override; @@ -83,7 +82,7 @@ private: void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startEventLoop(std::atomic & check_param, std::atomic & loop_started); + void startEventLoop(std::atomic & loop_started); void stopEventLoopWithTimeout(); void stopEventLoop(); From 472b04b69c58adaac2cd1030d7921c7ceb6f09a8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 10 Jun 2020 19:59:37 +0000 Subject: [PATCH 025/330] Better producer --- .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 7 +-- .../WriteBufferToRabbitMQProducer.cpp | 50 +++++++------------ .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 3 +- 3 files changed, 21 insertions(+), 39 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 8e867db6de9..5dc2c1f8fc4 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -37,17 +37,14 @@ void RabbitMQBlockOutputStream::writePrefix() { buffer->countRow(); }); + + buffer->startEventLoop(); } void RabbitMQBlockOutputStream::write(const Block & block) { child->write(block); - - if (buffer) - buffer->flush(); - - storage.pingConnection(); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 31c3dea97aa..151d5fc62d4 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -16,7 +16,7 @@ enum { Connection_setup_sleep = 200, Connection_setup_retries_max = 1000, - Buffer_limit_to_flush = 10000 /// It is important to keep it low in order not to kill consumers + Batch = 10000 }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( @@ -64,12 +64,13 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( } producer_channel = std::make_shared(&connection); + checkExchange(); } WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() { - flush(); + checkExchange(); connection.close(); assert(rows == 0 && chunks.empty()); @@ -98,18 +99,29 @@ void WriteBufferToRabbitMQProducer::countRow() chunks.clear(); set(nullptr, 0); - messages.emplace_back(payload); + next_queue = next_queue % num_queues + 1; + + if (bind_by_id || hash_exchange) + { + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + } + else + { + producer_channel->publish(exchange_name, routing_key, payload); + } + ++message_counter; - if (messages.size() >= Buffer_limit_to_flush) + /// run event loop to actually publish, checking exchange is just a point to stop the event loop + if ((message_counter %= Batch) == 0) { - flush(); + checkExchange(); } } } -void WriteBufferToRabbitMQProducer::flush() +void WriteBufferToRabbitMQProducer::checkExchange() { std::atomic exchange_declared = false, exchange_error = false; @@ -120,32 +132,6 @@ void WriteBufferToRabbitMQProducer::flush() .onSuccess([&]() { exchange_declared = true; - - /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing - * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it - * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to - * exchange becoming inactive at some point and part of messages is lost as a result. - */ - for (auto & payload : messages) - { - if (!message_counter) - break; - - next_queue = next_queue % num_queues + 1; - - if (bind_by_id || hash_exchange) - { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload); - } - else - { - producer_channel->publish(exchange_name, routing_key, payload); - } - - --message_counter; - } - - messages.clear(); }) .onError([&](const char * message) { diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 9ae3893d6ae..3cbcec9ccc2 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -35,12 +35,11 @@ public: ~WriteBufferToRabbitMQProducer() override; void countRow(); - void flush(); + void startEventLoop(); private: void nextImpl() override; void checkExchange(); - void startEventLoop(); std::pair & login_password; const String routing_key; From d7b269480641aabbcec3f1702dacfefab123e6c5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 10 Jun 2020 23:01:47 +0000 Subject: [PATCH 026/330] Support all exchange types --- src/Storages/RabbitMQ/RabbitMQSettings.h | 3 +- .../ReadBufferFromRabbitMQConsumer.cpp | 132 +++++-- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 12 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 33 +- src/Storages/RabbitMQ/StorageRabbitMQ.h | 6 +- .../WriteBufferToRabbitMQProducer.cpp | 6 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 4 +- .../integration/test_storage_rabbitmq/test.py | 368 ++++++++++++++---- 8 files changed, 411 insertions(+), 153 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 509ed68b8d3..a3f133cfed0 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -12,12 +12,13 @@ namespace DB #define LIST_OF_RABBITMQ_SETTINGS(M) \ M(SettingString, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ M(SettingString, rabbitmq_routing_key, "5672", "A routing key to connect producer->exchange->queue<->consumer.", 0) \ - M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exhange name, to which messages are sent. Needed to bind queues to it.", 0) \ + M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent. Needed to bind queues to it.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ + M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 32dcd30e6f5..1321a4fb3b6 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -23,8 +23,9 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( Poco::Logger * log_, char row_delimiter_, const bool bind_by_id_, - const bool hash_exchange_, const size_t num_queues_, + const String & exchange_type_, + const String table_name_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) @@ -35,18 +36,22 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , log(log_) , row_delimiter(row_delimiter_) , bind_by_id(bind_by_id_) - , hash_exchange(hash_exchange_) , num_queues(num_queues_) + , exchange_type(exchange_type_) + , table_name(table_name_) , stopped(stopped_) { messages.clear(); current = messages.begin(); + exchange_type_set = exchange_type != "default" ? 1 : 0; + /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. */ for (size_t queue_id = 0; queue_id < num_queues; ++queue_id) { + /// Queue bingings must be declared before any publishing => it must be done here and not in readPrefix() initQueueBindings(queue_id); } } @@ -64,54 +69,88 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { - /* As there are 5 different types of exchanges and the type should be set as a parameter while publishing the message, - * then for uniformity this parameter should always be set as fanout-exchange type. In current implementation, the exchange, - * to which messages a published, will be bound to the exchange of the needed type, which will distribute messages according to its type. + /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest + * and it is also used for INSERT query. */ - consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) + String producer_exchange = exchange_type_set ? exchange_name + "_default" : exchange_name; + consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) { - exchange_declared = false; - LOG_ERROR(log, "Failed to declare fanout exchange: {}", message); + internal_exchange_declared = false; + LOG_ERROR(log, "Failed to declare exchange: {}", message); }); - if (hash_exchange) + internal_exchange_name = producer_exchange + "_direct"; + consumer_channel->declareExchange(internal_exchange_name, AMQP::direct).onError([&](const char * message) { - current_exchange_name = exchange_name + "_hash"; - consumer_channel->declareExchange(current_exchange_name, AMQP::consistent_hash).onError([&](const char * /* message */) - { - exchange_declared = false; - }); + internal_exchange_declared = false; + LOG_ERROR(log, "Failed to declare exchange: {}", message); + }); - consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * /* message */) - { - exchange_declared = false; - }); - } - else + consumer_channel->bindExchange(producer_exchange, internal_exchange_name, routing_key).onError([&](const char * message) { - current_exchange_name = exchange_name + "_direct"; - consumer_channel->declareExchange(current_exchange_name, AMQP::direct).onError([&](const char * /* message */) - { - exchange_declared = false; - }); + internal_exchange_declared = false; + LOG_ERROR(log, "Failed to bind exchange: {}", message); + }); - consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * /* message */) - { - exchange_declared = false; - }); - } + if (!exchange_type_set) + return; + + /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is also supported. + + AMQP::ExchangeType type; + if (exchange_type == "fanout") type = AMQP::ExchangeType::fanout; + else if (exchange_type == "direct") type = AMQP::ExchangeType::direct; + else if (exchange_type == "topic") type = AMQP::ExchangeType::topic; + else if (exchange_type == "consistent_hash") type = AMQP::ExchangeType::consistent_hash; + else return; + + /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages + * between all consumers. (This enables better scaling as without hash-echange - the only oprion to avoid getting the same + * messages more than once - is having only one consumer with one queue, which is not good.) + */ + consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); + }); + + hash_exchange = true; + + /// No need for declaring hash-exchange if there is only one consumer with one queue and exchange type is not hash + if (!bind_by_id && exchange_type != "consistent_hash") + return; + + AMQP::Table exchange_arguments; + exchange_arguments["hash-property"] = "message_id"; + + local_exchange_name = exchange_name + "_" + table_name; + consumer_channel->declareExchange(local_exchange_name, AMQP::consistent_hash, exchange_arguments) + .onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); + }); + + consumer_channel->bindExchange(exchange_name, local_exchange_name, routing_key).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + }); } void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { - if (!exchange_declared) + /// These variables might be updated later from a separate thread in onError callbacks + if (!internal_exchange_declared || (exchange_type_set && !local_exchange_declared)) { initExchange(); - exchange_declared = true; + local_exchange_declared = true; + internal_exchange_declared = true; } - std::atomic bindings_created = false, bindings_error = false; + bool internal_bindings_created = false, internal_bindings_error = false; + bool local_bindings_created = false, local_bindings_error = false; consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) @@ -137,25 +176,39 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } - /// Must be done here, cannot be done in readPrefix() + /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection subscribe(queues.back()); LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); - consumer_channel->bindQueue(current_exchange_name, queue_name_, binding_key) + consumer_channel->bindQueue(internal_exchange_name, queue_name_, binding_key) .onSuccess([&] { - bindings_created = true; + internal_bindings_created = true; }) .onError([&](const char * message) { - bindings_error = true; + internal_bindings_error = true; LOG_ERROR(log, "Failed to create queue binding: {}", message); }); + + if (exchange_type_set) + { + consumer_channel->bindQueue(local_exchange_name, queue_name_, binding_key) + .onSuccess([&] + { + local_bindings_created = true; + }) + .onError([&](const char * message) + { + local_bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding: {}", message); + }); + } }) .onError([&](const char * message) { - bindings_error = true; + internal_bindings_error = true; LOG_ERROR(log, "Failed to declare queue on the channel: {}", message); }); @@ -163,7 +216,8 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) * It is important at this moment to make sure that queue bindings are created before any publishing can happen because * otherwise messages will be routed nowhere. */ - while (!bindings_created && !bindings_error) + while (!internal_bindings_created && !internal_bindings_error + || (exchange_type_set && !local_bindings_created && !local_bindings_error)) { startEventLoop(loop_started); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 7fbc1024d44..51eae60cdeb 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -30,8 +30,9 @@ public: Poco::Logger * log_, char row_delimiter_, const bool bind_by_id_, - const bool hash_exchange_, const size_t num_queues_, + const String & exchange_type_, + const String table_name_, const std::atomic & stopped_); ~ReadBufferFromRabbitMQConsumer() override; @@ -49,8 +50,9 @@ private: const String & routing_key; const size_t channel_id; const bool bind_by_id; - const bool hash_exchange; const size_t num_queues; + const String & exchange_type; + const String table_name; Poco::Logger * log; char row_delimiter; @@ -58,8 +60,10 @@ private: bool allowed = true; const std::atomic & stopped; - String current_exchange_name; - bool exchange_declared = false; + String internal_exchange_name, local_exchange_name; + bool internal_exchange_declared = false, local_exchange_declared = false; + bool exchange_type_set = false, hash_exchange = false; + std::atomic loop_started = false, consumer_error = false; std::atomic count_subscribed = 0, wait_subscribed; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 481314a38c2..895b9ca2bec 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -59,9 +59,9 @@ StorageRabbitMQ::StorageRabbitMQ( const String & exchange_name_, const String & format_name_, char row_delimiter_, + const String & exchange_type_, size_t num_consumers_, - size_t num_queues_, - bool hash_exchange_) + size_t num_queues_) : IStorage(table_id_) , global_context(context_.getGlobalContext()) , rabbitmq_context(Context(global_context)) @@ -71,7 +71,7 @@ StorageRabbitMQ::StorageRabbitMQ( , row_delimiter(row_delimiter_) , num_consumers(num_consumers_) , num_queues(num_queues_) - , hash_exchange(hash_exchange_) + , exchange_type(exchange_type_) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , login_password(std::make_pair( @@ -212,16 +212,20 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); + auto table_id = getStorageID(); + String table_name = table_id.getNameForLogs(); + return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_key, - next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); + next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, table_name, stream_cancelled); } ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - return std::make_shared(parsed_address, login_password, routing_key, exchange_name, - log, num_consumers * num_queues, bind_by_id, hash_exchange, - row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); + String producer_exchange = exchange_type == "default" ? exchange_name : exchange_name + "_default"; + + return std::make_shared(parsed_address, login_password, routing_key, producer_exchange, + log, num_consumers * num_queues, bind_by_id, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } @@ -436,20 +440,19 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - bool hash_exchange = static_cast(rabbitmq_settings.rabbitmq_hash_exchange); + String exchange_type = rabbitmq_settings.rabbitmq_exchange_type.value; if (args_count >= 6) { + engine_args[5] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[5], args.local_context); + const auto * ast = engine_args[5]->as(); - if (ast && ast->value.getType() == Field::Types::UInt64) + if (ast && ast->value.getType() == Field::Types::String) { - hash_exchange = static_cast(safeGet(ast->value)); - } - else - { - throw Exception("Hash exchange flag must be a boolean", ErrorCodes::BAD_ARGUMENTS); + exchange_type = safeGet(ast->value); } } + UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; if (args_count >= 7) { @@ -480,7 +483,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) return StorageRabbitMQ::create( args.table_id, args.context, args.columns, - host_port, routing_key, exchange, format, row_delimiter, num_consumers, num_queues, hash_exchange); + host_port, routing_key, exchange, format, row_delimiter, exchange_type, num_consumers, num_queues); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 111e52768d0..27a9b8834f4 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -66,9 +66,9 @@ protected: const String & exchange_name_, const String & format_name_, char row_delimiter_, + const String & exchange_type_, size_t num_consumers_, - size_t num_queues_, - bool hash_exchange); + size_t num_queues_); private: Context global_context; @@ -83,7 +83,7 @@ private: size_t num_created_consumers = 0; bool bind_by_id; size_t num_queues; - const bool hash_exchange; + const String exchange_type; Poco::Logger * log; std::pair parsed_address; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 151d5fc62d4..8fa241dade5 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -23,11 +23,10 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( std::pair & parsed_address, std::pair & login_password_, const String & routing_key_, - const String & exchange_, + const String exchange_, Poco::Logger * log_, const size_t num_queues_, const bool bind_by_id_, - const bool hash_exchange_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_) @@ -38,7 +37,6 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , log(log_) , num_queues(num_queues_) , bind_by_id(bind_by_id_) - , hash_exchange(hash_exchange_) , delim(delimiter) , max_rows(rows_per_message) , chunk_size(chunk_size_) @@ -101,7 +99,7 @@ void WriteBufferToRabbitMQProducer::countRow() next_queue = next_queue % num_queues + 1; - if (bind_by_id || hash_exchange) + if (bind_by_id) { producer_channel->publish(exchange_name, std::to_string(next_queue), payload); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 3cbcec9ccc2..90e0d90b356 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -22,11 +22,10 @@ public: std::pair & parsed_address, std::pair & login_password_, const String & routing_key_, - const String & exchange_, + const String exchange_, Poco::Logger * log_, const size_t num_queues_, const bool bind_by_id_, - const bool hash_exchange_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_ @@ -45,7 +44,6 @@ private: const String routing_key; const String exchange_name; const bool bind_by_id; - const bool hash_exchange; const size_t num_queues; event_base * producerEvbase; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index d7e991fe7ae..d9c08ef7b6b 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -497,90 +497,6 @@ def test_rabbitmq_big_message(rabbitmq_cluster): assert int(result) == rabbitmq_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) -def test_rabbitmq_sharding_between_tables(rabbitmq_cluster): - - NUMBER_OF_CONCURRENT_CONSUMERS = 10 - - instance.query(''' - DROP TABLE IF EXISTS test.destination; - CREATE TABLE test.destination(key UInt64, value UInt64, - _consumed_by LowCardinality(String)) - ENGINE = MergeTree() - ORDER BY key; - ''') - - for consumer_id in range(NUMBER_OF_CONCURRENT_CONSUMERS): - table_name = 'rabbitmq_consumer{}'.format(consumer_id) - print("Setting up {}".format(table_name)) - - instance.query(''' - DROP TABLE IF EXISTS test.{0}; - DROP TABLE IF EXISTS test.{0}_mv; - CREATE TABLE test.{0} (key UInt64, value UInt64) - ENGINE = RabbitMQ - SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_hash_exchange = 1, - rabbitmq_format = 'JSONEachRow', - rabbitmq_row_delimiter = '\\n'; - CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS - SELECT key, value, '{0}' as _consumed_by FROM test.{0}; - '''.format(table_name)) - - i = [0] - messages_num = 1000 - - credentials = pika.PlainCredentials('root', 'clickhouse') - parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) - - def produce(): - # init connection here because otherwise python rabbitmq client fails sometimes - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') - messages = [] - for _ in range(messages_num): - messages.append(json.dumps({'key': i[0], 'value': i[0]})) - i[0] += 1 - key = str(randrange(1, NUMBER_OF_CONCURRENT_CONSUMERS)) - for message in messages: - channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) - connection.close() - time.sleep(1) - - threads = [] - threads_num = 20 - - for _ in range(threads_num): - threads.append(threading.Thread(target=produce)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - while True: - result = instance.query('SELECT count() FROM test.destination') - time.sleep(1) - if int(result) == messages_num * threads_num: - break - - for consumer_id in range(NUMBER_OF_CONCURRENT_CONSUMERS): - print("dropping rabbitmq_consumer{}".format(consumer_id)) - table_name = 'rabbitmq_consumer{}'.format(consumer_id) - instance.query(''' - DROP TABLE IF EXISTS test.{0}; - DROP TABLE IF EXISTS test.{0}_mv; - '''.format(table_name)) - - instance.query(''' - DROP TABLE IF EXISTS test.destination; - ''') - - for thread in threads: - thread.join() - - assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) - - @pytest.mark.timeout(320) def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): @@ -1011,6 +927,7 @@ def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view_sharding') time.sleep(1) + print result if int(result) == messages_num * threads_num: break @@ -1085,6 +1002,289 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster): assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(420) +def test_rabbitmq_direct_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + CREATE TABLE test.direct_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_exchange_name = 'direct_exchange_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key = 'direct_{0}', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.direct_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.direct_exchange_{0}; + '''.format(consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='direct_exchange_testing', exchange_type='direct') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for num in range(num_tables): + key = "direct_" + str(key_num) + key_num += 1 + for message in messages: + channel.basic_publish(exchange='direct_exchange_testing', routing_key=key, body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables: + break + + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_fanout_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.fanout_exchange_{0}; + DROP TABLE IF EXISTS test.fanout_exchange_{0}_mv; + CREATE TABLE test.fanout_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_routing_key = 'key_{0}', + rabbitmq_exchange_name = 'fanout_exchange_testing', + rabbitmq_exchange_type = 'fanout', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.fanout_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.fanout_exchange_{0}; + '''.format(consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='fanout_exchange_testing', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for message in messages: + channel.basic_publish(exchange='fanout_exchange_testing', routing_key='', body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables: + break + + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_topic_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + CREATE TABLE test.topic_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_exchange_name = 'topic_exchange_testing', + rabbitmq_exchange_type = 'topic', + rabbitmq_routing_key = '*.{0}', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.topic_exchange_{0}; + '''.format(consumer_id)) + + for consumer_id in range(num_tables): + print("Setting up table {}".format(num_tables + consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + CREATE TABLE test.topic_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_exchange_name = 'topic_exchange_testing', + rabbitmq_exchange_type = 'topic', + rabbitmq_routing_key = '*.logs', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.topic_exchange_{0}; + '''.format(num_tables + consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='topic_exchange_testing', exchange_type='topic') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for num in range(num_tables): + key = "topic." + str(key_num) + key_num += 1 + for message in messages: + channel.basic_publish(exchange='topic_exchange_testing', routing_key=key, body=message) + + key = "random.logs" + for message in messages: + channel.basic_publish(exchange='topic_exchange_testing', routing_key=key, body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables + messages_num * num_tables: + break + + assert int(result) == messages_num * num_tables + messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_hash_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 4 + for consumer_id in range(num_tables): + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + print("Setting up {}".format(table_name)) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + CREATE TABLE test.{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 10, + rabbitmq_exchange_type = 'consistent_hash', + rabbitmq_exchange_name = 'hash_exchange_testing', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.{0}; + '''.format(table_name)) + + i = [0] + messages_num = 500 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + + def produce(): + # init connection here because otherwise python rabbitmq client might fail + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='hash_exchange_testing', exchange_type='x-consistent-hash') + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = str(randrange(10)) + for message in messages: + channel.basic_publish(exchange='hash_exchange_testing', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 10 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for consumer_id in range(num_tables): + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + '''.format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From 9350472ee456bd0561e263df8b9c4f13bef3aaf6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jun 2020 09:23:23 +0000 Subject: [PATCH 027/330] Support multiple bindings --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 6 +- src/Storages/RabbitMQ/RabbitMQSettings.h | 5 +- .../ReadBufferFromRabbitMQConsumer.cpp | 77 +++++++++----- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 6 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 33 +++--- src/Storages/RabbitMQ/StorageRabbitMQ.h | 7 +- .../integration/test_storage_rabbitmq/test.py | 100 +++++++++++++++--- 7 files changed, 162 insertions(+), 72 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 245320008f3..2d995d97f18 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -20,7 +20,7 @@ RabbitMQBlockInputStream::RabbitMQBlockInputStream( , column_names(columns) , log(log_) , non_virtual_header(storage.getSampleBlockNonMaterialized()) - , virtual_header(storage.getSampleBlockForColumns({"_exchange", "_routingKey"})) + , virtual_header(storage.getSampleBlockForColumns({"_exchange"})) { } @@ -122,13 +122,11 @@ Block RabbitMQBlockInputStream::readImpl() auto new_rows = read_rabbitmq_message(); - auto exchange_name = storage.getExchangeName(); - auto routing_key = storage.getRoutingKey(); + auto exchange_name = buffer->getExchange(); for (size_t i = 0; i < new_rows; ++i) { virtual_columns[0]->insert(exchange_name); - virtual_columns[1]->insert(routing_key); } total_rows = total_rows + new_rows; diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index a3f133cfed0..d81a887747b 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -11,13 +11,12 @@ namespace DB #define LIST_OF_RABBITMQ_SETTINGS(M) \ M(SettingString, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ - M(SettingString, rabbitmq_routing_key, "5672", "A routing key to connect producer->exchange->queue<->consumer.", 0) \ - M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent. Needed to bind queues to it.", 0) \ + M(SettingString, rabbitmq_routing_key_list, "5672", "A string of routing keys, separated by dots.", 0) \ + M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ - M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 1321a4fb3b6..967da1a75ad 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -18,7 +18,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, const String & exchange_name_, - const String & routing_key_, + const Names & routing_keys_, const size_t channel_id_, Poco::Logger * log_, char row_delimiter_, @@ -31,7 +31,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , consumer_channel(std::move(consumer_channel_)) , eventHandler(eventHandler_) , exchange_name(exchange_name_) - , routing_key(routing_key_) + , routing_keys(routing_keys_) , channel_id(channel_id_) , log(log_) , row_delimiter(row_delimiter_) @@ -44,7 +44,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( messages.clear(); current = messages.begin(); - exchange_type_set = exchange_type != "default" ? 1 : 0; + exchange_type_set = exchange_type != "default" ? true : false; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. @@ -69,8 +69,8 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { - /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest - * and it is also used for INSERT query. + /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest (also due to different + * binding algorithm this default behaviuor is much faster). It is also used in INSERT query. */ String producer_exchange = exchange_type_set ? exchange_name + "_default" : exchange_name; consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) @@ -86,7 +86,8 @@ void ReadBufferFromRabbitMQConsumer::initExchange() LOG_ERROR(log, "Failed to declare exchange: {}", message); }); - consumer_channel->bindExchange(producer_exchange, internal_exchange_name, routing_key).onError([&](const char * message) + /// With fanout exchange the binding key is ignored - a parameter might be arbitrary + consumer_channel->bindExchange(producer_exchange, internal_exchange_name, routing_keys[0]).onError([&](const char * message) { internal_exchange_declared = false; LOG_ERROR(log, "Failed to bind exchange: {}", message); @@ -95,7 +96,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() if (!exchange_type_set) return; - /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is also supported. + /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is supported. AMQP::ExchangeType type; if (exchange_type == "fanout") type = AMQP::ExchangeType::fanout; @@ -131,11 +132,14 @@ void ReadBufferFromRabbitMQConsumer::initExchange() LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); }); - consumer_channel->bindExchange(exchange_name, local_exchange_name, routing_key).onError([&](const char * message) + for (auto & routing_key : routing_keys) { - local_exchange_declared = false; - LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); - }); + consumer_channel->bindExchange(exchange_name, local_exchange_name, routing_key).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + }); + } } @@ -158,7 +162,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) queues.emplace_back(queue_name_); subscribed_queue[queue_name_] = false; - String binding_key = routing_key; + String binding_key = routing_keys[0]; /* Every consumer has at least one unique queue. Bind the queues to exchange based on the consumer_channel_id * in case there is one queue per consumer and bind by queue_id in case there is more than 1 queue per consumer. @@ -176,11 +180,6 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } - /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection - subscribe(queues.back()); - - LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); - consumer_channel->bindQueue(internal_exchange_name, queue_name_, binding_key) .onSuccess([&] { @@ -189,21 +188,47 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { internal_bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: {}", message); + LOG_ERROR(log, "Failed to bind to key {}, the reason is: {}", binding_key, message); }); + /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection + subscribe(queues.back()); + + LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); + if (exchange_type_set) { - consumer_channel->bindQueue(local_exchange_name, queue_name_, binding_key) - .onSuccess([&] + /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary + if (hash_exchange) { - local_bindings_created = true; - }) - .onError([&](const char * message) + consumer_channel->bindQueue(local_exchange_name, queue_name_, binding_key) + .onSuccess([&] + { + local_bindings_created = true; + }) + .onError([&](const char * message) + { + local_bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding: {}", message); + }); + } + else { - local_bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: {}", message); - }); + /// means there is only one queue with one consumer - no even distribution needed - no hash-exchange + for (auto & routing_key : routing_keys) + { + consumer_channel->bindQueue(local_exchange_name, queue_name_, routing_key) + .onSuccess([&] + { + local_bindings_created = true; + }) + .onError([&](const char * message) + { + local_bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding: {}", message); + }); + } + } } }) .onError([&](const char * message) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 51eae60cdeb..3d02eeab761 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -25,7 +25,7 @@ public: ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, const String & exchange_name_, - const String & routing_key_, + const Names & routing_keys_, const size_t channel_id_, Poco::Logger * log_, char row_delimiter_, @@ -40,6 +40,8 @@ public: void allowNext() { allowed = true; } // Allow to read next message. void checkSubscription(); + auto getExchange() const { return exchange_name; } + private: using Messages = std::vector; @@ -47,7 +49,7 @@ private: RabbitMQHandler & eventHandler; const String & exchange_name; - const String & routing_key; + const Names & routing_keys; const size_t channel_id; const bool bind_by_id; const size_t num_queues; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 895b9ca2bec..e17d541b661 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -55,7 +55,7 @@ StorageRabbitMQ::StorageRabbitMQ( Context & context_, const ColumnsDescription & columns_, const String & host_port_, - const String & routing_key_, + const Names & routing_keys_, const String & exchange_name_, const String & format_name_, char row_delimiter_, @@ -65,7 +65,7 @@ StorageRabbitMQ::StorageRabbitMQ( : IStorage(table_id_) , global_context(context_.getGlobalContext()) , rabbitmq_context(Context(global_context)) - , routing_key(global_context.getMacros()->expand(routing_key_)) + , routing_keys(global_context.getMacros()->expand(routing_keys_)) , exchange_name(exchange_name_) , format_name(global_context.getMacros()->expand(format_name_)) , row_delimiter(row_delimiter_) @@ -215,7 +215,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() auto table_id = getStorageID(); String table_name = table_id.getNameForLogs(); - return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_key, + return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_keys, next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, table_name, stream_cancelled); } @@ -224,7 +224,7 @@ ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { String producer_exchange = exchange_type == "default" ? exchange_name : exchange_name + "_default"; - return std::make_shared(parsed_address, login_password, routing_key, producer_exchange, + return std::make_shared(parsed_address, login_password, routing_keys[0], producer_exchange, log, num_consumers * num_queues, bind_by_id, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } @@ -369,18 +369,18 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - String routing_key = rabbitmq_settings.rabbitmq_routing_key.value; + String routing_key_list = rabbitmq_settings.rabbitmq_routing_key_list.value; if (args_count >= 2) { - const auto * ast = engine_args[1]->as(); - if (ast && ast->value.getType() == Field::Types::String) - { - routing_key = safeGet(ast->value); - } - else - { - throw Exception(String("RabbitMQ routing key must be a string"), ErrorCodes::BAD_ARGUMENTS); - } + engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.local_context); + routing_key_list = engine_args[1]->as().value.safeGet(); + } + + Names routing_keys; + boost::split(routing_keys, routing_key_list, [](char c){ return c == ','; }); + for (String & key : routing_keys) + { + boost::trim(key); } String exchange = rabbitmq_settings.rabbitmq_exchange_name.value; @@ -483,7 +483,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) return StorageRabbitMQ::create( args.table_id, args.context, args.columns, - host_port, routing_key, exchange, format, row_delimiter, exchange_type, num_consumers, num_queues); + host_port, routing_keys, exchange, format, row_delimiter, exchange_type, num_consumers, num_queues); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); @@ -494,8 +494,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) NamesAndTypesList StorageRabbitMQ::getVirtuals() const { return NamesAndTypesList{ - {"_exchange", std::make_shared()}, - {"_routingKey", std::make_shared()} + {"_exchange", std::make_shared()} }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 27a9b8834f4..45ced9d247b 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -48,9 +48,6 @@ public: ProducerBufferPtr createWriteBuffer(); - const String & getExchangeName() const { return exchange_name; } - const String & getRoutingKey() const { return routing_key; } - const String & getFormatName() const { return format_name; } NamesAndTypesList getVirtuals() const override; @@ -62,7 +59,7 @@ protected: Context & context_, const ColumnsDescription & columns_, const String & host_port_, - const String & routing_key_, + const Names & routing_keys_, const String & exchange_name_, const String & format_name_, char row_delimiter_, @@ -74,7 +71,7 @@ private: Context global_context; Context rabbitmq_context; - String routing_key; + Names routing_keys; const String exchange_name; const String format_name; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index d9c08ef7b6b..46b622bde8a 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -120,7 +120,7 @@ def test_rabbitmq_select_from_new_syntax_table(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'new', + rabbitmq_routing_key_list = 'new', rabbitmq_exchange_name = 'clickhouse-exchange', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; @@ -193,7 +193,7 @@ def test_rabbitmq_select_empty(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'empty', + rabbitmq_routing_key_list = 'empty', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; ''') @@ -207,7 +207,7 @@ def test_rabbitmq_json_without_delimiter(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'json', + rabbitmq_routing_key_list = 'json', rabbitmq_exchange_name = 'clickhouse-exchange', rabbitmq_format = 'JSONEachRow' ''') @@ -249,7 +249,7 @@ def test_rabbitmq_csv_with_delimiter(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'csv', + rabbitmq_routing_key_list = 'csv', rabbitmq_exchange_name = 'clickhouse-exchange', rabbitmq_format = 'CSV', rabbitmq_row_delimiter = '\\n'; @@ -285,7 +285,7 @@ def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'tsv', + rabbitmq_routing_key_list = 'tsv', rabbitmq_exchange_name = 'clickhouse-exchange', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; @@ -322,7 +322,7 @@ def test_rabbitmq_materialized_view(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'mv', + rabbitmq_routing_key_list = 'mv', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view (key UInt64, value UInt64) @@ -365,7 +365,7 @@ def test_rabbitmq_materialized_view_with_subquery(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'mvsq', + rabbitmq_routing_key_list = 'mvsq', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view (key UInt64, value UInt64) @@ -410,7 +410,7 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'mmv', + rabbitmq_routing_key_list = 'mmv', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view1 (key UInt64, value UInt64) @@ -471,7 +471,7 @@ def test_rabbitmq_big_message(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value String) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'big', + rabbitmq_routing_key_list = 'big', rabbitmq_format = 'JSONEachRow'; CREATE TABLE test.view (key UInt64, value String) ENGINE = MergeTree @@ -774,7 +774,7 @@ def test_rabbitmq_insert(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'insert1', + rabbitmq_routing_key_list = 'insert1', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; ''') @@ -829,7 +829,7 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): CREATE TABLE test.rabbitmq_many (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'insert2', + rabbitmq_routing_key_list = 'insert2', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view_many (key UInt64, value UInt64) @@ -1024,7 +1024,7 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): rabbitmq_num_consumers = 5, rabbitmq_exchange_name = 'direct_exchange_testing', rabbitmq_exchange_type = 'direct', - rabbitmq_routing_key = 'direct_{0}', + rabbitmq_routing_key_list = 'direct_{0}', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE MATERIALIZED VIEW test.direct_exchange_{0}_mv TO test.destination AS @@ -1083,7 +1083,7 @@ def test_rabbitmq_fanout_exchange(rabbitmq_cluster): ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_consumers = 5, - rabbitmq_routing_key = 'key_{0}', + rabbitmq_routing_key_list = 'key_{0}', rabbitmq_exchange_name = 'fanout_exchange_testing', rabbitmq_exchange_type = 'fanout', rabbitmq_format = 'JSONEachRow', @@ -1143,7 +1143,7 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): rabbitmq_num_consumers = 5, rabbitmq_exchange_name = 'topic_exchange_testing', rabbitmq_exchange_type = 'topic', - rabbitmq_routing_key = '*.{0}', + rabbitmq_routing_key_list = '*.{0}', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS @@ -1161,7 +1161,7 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): rabbitmq_num_consumers = 4, rabbitmq_exchange_name = 'topic_exchange_testing', rabbitmq_exchange_type = 'topic', - rabbitmq_routing_key = '*.logs', + rabbitmq_routing_key_list = '*.logs', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS @@ -1285,6 +1285,76 @@ def test_rabbitmq_hash_exchange(rabbitmq_cluster): assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(420) +def test_rabbitmq_multiple_bindings(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.bindings; + DROP TABLE IF EXISTS test.bindings_mv; + CREATE TABLE test.bindings (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_num_queues = 2, + rabbitmq_exchange_name = 'multiple_bindings_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'key1,key2,key3,key4,key5', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_bindings (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.bindings_mv TO test.view_bindings AS + SELECT * FROM test.bindings; + ''') + + + i = [0] + messages_num = 500 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + + def produce(): + # init connection here because otherwise python rabbitmq client might fail + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='hash_exchange_testing', exchange_type='x-consistent-hash') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + keys = ['key1', 'key2', 'key3', 'key4', 'key5'] + + for key in keys: + for message in messages: + channel.basic_publish(exchange='multiple_bindings_testing', routing_key=key, body=message) + + connection.close() + + threads = [] + threads_num = 10 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_bindings') + time.sleep(1) + print result + if int(result) == messages_num * threads_num * 5: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num * 5, 'ClickHouse lost some messages: {}'.format(result) + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From 626eb53baae96de1bfbd2f736ad444ecee34827f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jun 2020 10:56:40 +0000 Subject: [PATCH 028/330] Fix multiple bindings for single queue & rm hardcoded strings --- .../ReadBufferFromRabbitMQConsumer.cpp | 48 ++++++++++++------- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../integration/test_storage_rabbitmq/test.py | 46 ++++++++++++------ 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 967da1a75ad..5d2e3073d41 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -14,6 +14,17 @@ namespace DB { +namespace Exchange +{ + /// Note that default here means default by implementation and not by rabbitmq settings + static const String DEFAULT = "default"; + static const String FANOUT = "fanout"; + static const String DIRECT = "direct"; + static const String TOPIC = "topic"; + static const String HASH = "consistent_hash"; +} + + ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, @@ -44,7 +55,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( messages.clear(); current = messages.begin(); - exchange_type_set = exchange_type != "default" ? true : false; + exchange_type_set = exchange_type != Exchange::DEFAULT ? true : false; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. @@ -72,14 +83,14 @@ void ReadBufferFromRabbitMQConsumer::initExchange() /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest (also due to different * binding algorithm this default behaviuor is much faster). It is also used in INSERT query. */ - String producer_exchange = exchange_type_set ? exchange_name + "_default" : exchange_name; + String producer_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) { internal_exchange_declared = false; LOG_ERROR(log, "Failed to declare exchange: {}", message); }); - internal_exchange_name = producer_exchange + "_direct"; + internal_exchange_name = producer_exchange + "_" + Exchange::DIRECT; consumer_channel->declareExchange(internal_exchange_name, AMQP::direct).onError([&](const char * message) { internal_exchange_declared = false; @@ -99,11 +110,11 @@ void ReadBufferFromRabbitMQConsumer::initExchange() /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is supported. AMQP::ExchangeType type; - if (exchange_type == "fanout") type = AMQP::ExchangeType::fanout; - else if (exchange_type == "direct") type = AMQP::ExchangeType::direct; - else if (exchange_type == "topic") type = AMQP::ExchangeType::topic; - else if (exchange_type == "consistent_hash") type = AMQP::ExchangeType::consistent_hash; - else return; + if (exchange_type == Exchange::FANOUT) type = AMQP::ExchangeType::fanout; + else if (exchange_type == Exchange::DIRECT) type = AMQP::ExchangeType::direct; + else if (exchange_type == Exchange::TOPIC) type = AMQP::ExchangeType::topic; + else if (exchange_type == Exchange::HASH) type = AMQP::ExchangeType::consistent_hash; + else return; /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages * between all consumers. (This enables better scaling as without hash-echange - the only oprion to avoid getting the same @@ -115,12 +126,12 @@ void ReadBufferFromRabbitMQConsumer::initExchange() LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); }); - hash_exchange = true; - /// No need for declaring hash-exchange if there is only one consumer with one queue and exchange type is not hash - if (!bind_by_id && exchange_type != "consistent_hash") + if (!bind_by_id && exchange_type != Exchange::HASH) return; + hash_exchange = true; + AMQP::Table exchange_arguments; exchange_arguments["hash-property"] = "message_id"; @@ -153,6 +164,10 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) internal_exchange_declared = true; } + /* Internal exchange is a default exchange (by implementstion, not by rabbitmq settings) and is used for INSERT query + * and if exchange_type is not set - there is no local exchange. If it is set - then local exchange is a distributor + * exchange, which is bound to the exchange specified by the client. + */ bool internal_bindings_created = false, internal_bindings_error = false; bool local_bindings_created = false, local_bindings_error = false; @@ -188,7 +203,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { internal_bindings_error = true; - LOG_ERROR(log, "Failed to bind to key {}, the reason is: {}", binding_key, message); + LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); }); /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection @@ -209,15 +224,16 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { local_bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: {}", message); + LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); }); } else { - /// means there is only one queue with one consumer - no even distribution needed - no hash-exchange + /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange for (auto & routing_key : routing_keys) { - consumer_channel->bindQueue(local_exchange_name, queue_name_, routing_key) + /// Binding directly to exchange, specified by the client + consumer_channel->bindQueue(exchange_name, queue_name_, routing_key) .onSuccess([&] { local_bindings_created = true; @@ -225,7 +241,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { local_bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: {}", message); + LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_key, message); }); } } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e17d541b661..212d1fbc783 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -213,7 +213,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); auto table_id = getStorageID(); - String table_name = table_id.getNameForLogs(); + String table_name = table_id.getNameForLogs(); return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_keys, next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, table_name, stream_cancelled); diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 46b622bde8a..1a56395eb29 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -927,7 +927,6 @@ def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view_sharding') time.sleep(1) - print result if int(result) == messages_num * threads_num: break @@ -1288,9 +1287,17 @@ def test_rabbitmq_hash_exchange(rabbitmq_cluster): @pytest.mark.timeout(420) def test_rabbitmq_multiple_bindings(rabbitmq_cluster): instance.query(''' - DROP TABLE IF EXISTS test.bindings; - DROP TABLE IF EXISTS test.bindings_mv; - CREATE TABLE test.bindings (key UInt64, value UInt64) + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + instance.query(''' + DROP TABLE IF EXISTS test.bindings_1; + DROP TABLE IF EXISTS test.bindings_1_mv; + CREATE TABLE test.bindings_1 (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_consumers = 5, @@ -1300,13 +1307,25 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): rabbitmq_routing_key_list = 'key1,key2,key3,key4,key5', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; - CREATE TABLE test.view_bindings (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.bindings_mv TO test.view_bindings AS - SELECT * FROM test.bindings; + CREATE MATERIALIZED VIEW test.bindings_1_mv TO test.destination AS + SELECT * FROM test.bindings_1; ''') + # in case num_consumers and num_queues are not set - multiple bindings are implemented differently, so test them too + instance.query(''' + DROP TABLE IF EXISTS test.bindings_2; + DROP TABLE IF EXISTS test.bindings_2_mv; + CREATE TABLE test.bindings_2 (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'multiple_bindings_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'key1,key2,key3,key4,key5', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.bindings_2_mv TO test.destination AS + SELECT * FROM test.bindings_2; + ''') i = [0] messages_num = 500 @@ -1318,7 +1337,7 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): # init connection here because otherwise python rabbitmq client might fail connection = pika.BlockingConnection(parameters) channel = connection.channel() - channel.exchange_declare(exchange='hash_exchange_testing', exchange_type='x-consistent-hash') + channel.exchange_declare(exchange='multiple_bindings_testing', exchange_type='direct') messages = [] for _ in range(messages_num): @@ -1343,16 +1362,15 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): thread.start() while True: - result = instance.query('SELECT count() FROM test.view_bindings') + result = instance.query('SELECT count() FROM test.destination') time.sleep(1) - print result - if int(result) == messages_num * threads_num * 5: + if int(result) == messages_num * threads_num * 5 * 2: break for thread in threads: thread.join() - assert int(result) == messages_num * threads_num * 5, 'ClickHouse lost some messages: {}'.format(result) + assert int(result) == messages_num * threads_num * 5 * 2, 'ClickHouse lost some messages: {}'.format(result) if __name__ == '__main__': From ebc781a0c84350b82113cd921feee028c7a5f6a7 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Jun 2020 19:14:22 +0300 Subject: [PATCH 029/330] make polymorphic_parts perf test not blazing fast --- ...phic_parts.xml => polymorphic_parts_l.xml} | 22 +++++------- tests/performance/polymorphic_parts_m.xml | 35 +++++++++++++++++++ tests/performance/polymorphic_parts_s.xml | 35 +++++++++++++++++++ 3 files changed, 79 insertions(+), 13 deletions(-) rename tests/performance/{polymorphic_parts.xml => polymorphic_parts_l.xml} (60%) create mode 100644 tests/performance/polymorphic_parts_m.xml create mode 100644 tests/performance/polymorphic_parts_s.xml diff --git a/tests/performance/polymorphic_parts.xml b/tests/performance/polymorphic_parts_l.xml similarity index 60% rename from tests/performance/polymorphic_parts.xml rename to tests/performance/polymorphic_parts_l.xml index a8e305953d0..75ad857c9a8 100644 --- a/tests/performance/polymorphic_parts.xml +++ b/tests/performance/polymorphic_parts_l.xml @@ -18,20 +18,16 @@ ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) - INSERT INTO hits_wide(UserID) VALUES (rand()) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) + + + 1000 + 1000 + - INSERT INTO hits_compact(UserID) VALUES (rand()) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000) - - INSERT INTO hits_buffer(UserID) VALUES (rand()) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(10000) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(50000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml new file mode 100644 index 00000000000..fbe0c18d07e --- /dev/null +++ b/tests/performance/polymorphic_parts_m.xml @@ -0,0 +1,35 @@ + + + CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 + + + CREATE TABLE hits_compact AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_bytes_for_wide_part = '10M' + + + CREATE TABLE hits_buffer AS hits_10m_single + ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + + + + + 100 + 100 + + + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(5000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(5000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(5000) + + DROP TABLE IF EXISTS hits_wide + DROP TABLE IF EXISTS hits_compact + DROP TABLE IF EXISTS hits_buffer + diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml new file mode 100644 index 00000000000..085295af842 --- /dev/null +++ b/tests/performance/polymorphic_parts_s.xml @@ -0,0 +1,35 @@ + + + CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 + + + CREATE TABLE hits_compact AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_bytes_for_wide_part = '10M' + + + CREATE TABLE hits_buffer AS hits_10m_single + ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + + + + + 1 + 1 + + + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(50) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50) + + DROP TABLE IF EXISTS hits_wide + DROP TABLE IF EXISTS hits_compact + DROP TABLE IF EXISTS hits_buffer + From 7fb8a985281f9c663817d32d7939f61f3e258217 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 12 Jun 2020 00:33:30 +0300 Subject: [PATCH 030/330] increase number of rows --- tests/performance/polymorphic_parts_l.xml | 8 ++++---- tests/performance/polymorphic_parts_m.xml | 8 ++++---- tests/performance/polymorphic_parts_s.xml | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml index 75ad857c9a8..acda0de281a 100644 --- a/tests/performance/polymorphic_parts_l.xml +++ b/tests/performance/polymorphic_parts_l.xml @@ -24,10 +24,10 @@ 1000 - - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(50000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50000) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml index fbe0c18d07e..a9842496de0 100644 --- a/tests/performance/polymorphic_parts_m.xml +++ b/tests/performance/polymorphic_parts_m.xml @@ -24,10 +24,10 @@ 100 - - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(5000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(5000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(5000) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(10000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml index 085295af842..3b9eea91b1d 100644 --- a/tests/performance/polymorphic_parts_s.xml +++ b/tests/performance/polymorphic_parts_s.xml @@ -24,10 +24,10 @@ 1 - - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(50) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact From 3b0a3e00c0c1a902622aabf277b2ab0bb2a07571 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jun 2020 20:05:35 +0000 Subject: [PATCH 031/330] Some fixes --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 8 +-- src/Storages/RabbitMQ/RabbitMQHandler.h | 5 +- src/Storages/RabbitMQ/RabbitMQSettings.h | 2 +- .../ReadBufferFromRabbitMQConsumer.cpp | 53 +++++++++++-------- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 9 +++- .../WriteBufferToRabbitMQProducer.cpp | 4 +- .../integration/test_storage_rabbitmq/test.py | 3 +- 7 files changed, 51 insertions(+), 33 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 0a432e1b5ca..8667427ee63 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -37,11 +37,12 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) { /* The object of this class is shared between concurrent consumers (who share the same connection == share the same - * event loop). But the loop should not be attempted to start if it is already running. + * event loop and handler). But the loop should not be attempted to start if it is already running. */ if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) { loop_started = true; + stop_scheduled.store(false); event_base_loop(evbase, EVLOOP_NONBLOCK); mutex_before_event_loop.unlock(); } @@ -56,7 +57,7 @@ void RabbitMQHandler::startProducerLoop() void RabbitMQHandler::stop() { - if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) + if (mutex_before_loop_stop.try_lock()) { event_base_loopbreak(evbase); mutex_before_loop_stop.unlock(); @@ -66,8 +67,9 @@ void RabbitMQHandler::stop() void RabbitMQHandler::stopWithTimeout() { - if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) + if (mutex_before_loop_stop.try_lock()) { + stop_scheduled.store(true); event_base_loopexit(evbase, &tv); mutex_before_loop_stop.unlock(); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 911651097bb..9b2d273422d 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -23,15 +23,16 @@ public: void startProducerLoop(); void stopWithTimeout(); void stop(); + std::atomic & checkStopIsScheduled() { return stop_scheduled; }; private: event_base * evbase; Poco::Logger * log; timeval tv; - size_t count_passed = 0; + std::atomic stop_scheduled = false; std::timed_mutex mutex_before_event_loop; - std::timed_mutex mutex_before_loop_stop; + std::mutex mutex_before_loop_stop; }; } diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index d81a887747b..c9f09489f77 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -15,9 +15,9 @@ namespace DB M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ + M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ - M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 5d2e3073d41..6b8763138a4 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -13,6 +13,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} namespace Exchange { @@ -22,6 +26,7 @@ namespace Exchange static const String DIRECT = "direct"; static const String TOPIC = "topic"; static const String HASH = "consistent_hash"; + static const String HEADERS = "headers"; } @@ -55,7 +60,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( messages.clear(); current = messages.begin(); - exchange_type_set = exchange_type != Exchange::DEFAULT ? true : false; + exchange_type_set = exchange_type != Exchange::DEFAULT; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. @@ -81,7 +86,7 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest (also due to different - * binding algorithm this default behaviuor is much faster). It is also used in INSERT query. + * binding algorithm this default behaviuor is much faster). It is also used in INSERT query (so it is always declared). */ String producer_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) @@ -114,10 +119,12 @@ void ReadBufferFromRabbitMQConsumer::initExchange() else if (exchange_type == Exchange::DIRECT) type = AMQP::ExchangeType::direct; else if (exchange_type == Exchange::TOPIC) type = AMQP::ExchangeType::topic; else if (exchange_type == Exchange::HASH) type = AMQP::ExchangeType::consistent_hash; - else return; + else if (exchange_type == Exchange::HEADERS) + throw Exception("Headers exchange is not supported", ErrorCodes::BAD_ARGUMENTS); + else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages - * between all consumers. (This enables better scaling as without hash-echange - the only oprion to avoid getting the same + * between all consumers. (This enables better scaling as without hash-exchange - the only option to avoid getting the same * messages more than once - is having only one consumer with one queue, which is not good.) */ consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) @@ -156,7 +163,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { - /// These variables might be updated later from a separate thread in onError callbacks + /// These variables might be updated later from a separate thread in onError callbacks. if (!internal_exchange_declared || (exchange_type_set && !local_exchange_declared)) { initExchange(); @@ -206,7 +213,10 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); }); - /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection + /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed. Also note + * that if moved there, it must(!) be wrapped inside a channel->onReady callback or any other, otherwise consumer might fail + * to subscribe and no resubscription will help. + */ subscribe(queues.back()); LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); @@ -229,7 +239,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } else { - /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange + /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange. for (auto & routing_key : routing_keys) { /// Binding directly to exchange, specified by the client @@ -274,6 +284,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) .onSuccess([&](const std::string & /* consumer */) { subscribed_queue[queue_name] = true; + consumer_error = false; ++count_subscribed; LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); @@ -290,24 +301,17 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) message_received += row_delimiter; } - bool stop_loop = false; - /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl(). { std::lock_guard lock(mutex); received.push_back(message_received); - - /* As event loop is blocking to the thread that started it and a single thread should not be blocked while - * executing all callbacks on the connection (not only its own), then there should be some point to unblock. - * loop_started == 1 if current consumer is started the loop and not another. - */ - if (!loop_started) - { - stop_loop = true; - } } - if (stop_loop) + /* As event loop is blocking to the thread that started it and a single thread should not be blocked while + * executing all callbacks on the connection (not only its own), then there should be some point to unblock. + * loop_started == 1 if current consumer is started the loop and not another. + */ + if (!loop_started.load() && !eventHandler.checkStopIsScheduled().load()) { stopEventLoopWithTimeout(); } @@ -323,7 +327,6 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) void ReadBufferFromRabbitMQConsumer::checkSubscription() { - /// In general this condition will always be true and looping/resubscribing would not happen if (count_subscribed == num_queues) return; @@ -337,7 +340,11 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); - /// A case that would not normally happen + /// Updated in callbacks which are run by the loop + if (count_subscribed == num_queues) + return; + + /// A case that should never normally happen for (auto & queue : queues) { subscribe(queue); @@ -372,9 +379,9 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { if (received.empty()) { - /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread + /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread. startEventLoop(loop_started); - loop_started = false; + loop_started.store(false); } if (received.empty()) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 212d1fbc783..af8ad50e4e1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -450,8 +450,15 @@ void registerStorageRabbitMQ(StorageFactory & factory) { exchange_type = safeGet(ast->value); } - } + if (exchange_type != "fanout" && exchange_type != "direct" && exchange_type != "topic" && exchange_type != "consistent_hash") + { + if (exchange_type == "headers") + throw Exception("Headers exchange is not supported", ErrorCodes::BAD_ARGUMENTS); + else + throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); + } + } UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; if (args_count >= 7) diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 8fa241dade5..6e2b6f21f1d 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -33,7 +33,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( : WriteBuffer(nullptr, 0) , login_password(login_password_) , routing_key(routing_key_) - , exchange_name(exchange_) + , exchange_name(exchange_ + "_direct") , log(log_) , num_queues(num_queues_) , bind_by_id(bind_by_id_) @@ -126,7 +126,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name * and makes it visible from current producer_channel. */ - producer_channel->declareExchange(exchange_name + "_direct", AMQP::direct, AMQP::passive) + producer_channel->declareExchange(exchange_name, AMQP::direct, AMQP::passive) .onSuccess([&]() { exchange_declared = true; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 1a56395eb29..37163db06f4 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -882,7 +882,7 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): @pytest.mark.timeout(240) -def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): +def test_rabbitmq_sharding_between_channels_and_queues_insert(rabbitmq_cluster): instance.query(''' DROP TABLE IF EXISTS test.view_sharding; DROP TABLE IF EXISTS test.consumer_sharding; @@ -890,6 +890,7 @@ def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_consumers = 5, + rabbitmq_num_queues = 2, rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view_sharding (key UInt64, value UInt64) From 462e8bcdc97dfd094747b682f19cabbc8e4b74bc Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jun 2020 21:03:53 +0000 Subject: [PATCH 032/330] Support transactions for publishing --- src/Storages/RabbitMQ/RabbitMQSettings.h | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 24 ++++++++-- src/Storages/RabbitMQ/StorageRabbitMQ.h | 4 +- .../WriteBufferToRabbitMQProducer.cpp | 46 +++++++++++++++++-- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 3 ++ 5 files changed, 70 insertions(+), 8 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index c9f09489f77..5cd52ed9ef7 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -18,6 +18,7 @@ namespace DB M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ + M(SettingBool, rabbitmq_transactional_channel, false, "Use transactional channel for publishing.", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index af8ad50e4e1..669cfe19aa5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -61,7 +61,8 @@ StorageRabbitMQ::StorageRabbitMQ( char row_delimiter_, const String & exchange_type_, size_t num_consumers_, - size_t num_queues_) + size_t num_queues_, + const bool use_transactional_channel_) : IStorage(table_id_) , global_context(context_.getGlobalContext()) , rabbitmq_context(Context(global_context)) @@ -72,6 +73,7 @@ StorageRabbitMQ::StorageRabbitMQ( , num_consumers(num_consumers_) , num_queues(num_queues_) , exchange_type(exchange_type_) + , use_transactional_channel(use_transactional_channel_) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , login_password(std::make_pair( @@ -225,7 +227,8 @@ ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() String producer_exchange = exchange_type == "default" ? exchange_name : exchange_name + "_default"; return std::make_shared(parsed_address, login_password, routing_keys[0], producer_exchange, - log, num_consumers * num_queues, bind_by_id, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); + log, num_consumers * num_queues, bind_by_id, use_transactional_channel, + row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } @@ -488,9 +491,24 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } + bool use_transactional_channel = static_cast(rabbitmq_settings.rabbitmq_transactional_channel); + if (args_count >= 9) + { + const auto * ast = engine_args[8]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + use_transactional_channel = static_cast(safeGet(ast->value)); + } + else + { + throw Exception("Transactional channel parameter is a bool", ErrorCodes::BAD_ARGUMENTS); + } + } + return StorageRabbitMQ::create( args.table_id, args.context, args.columns, - host_port, routing_keys, exchange, format, row_delimiter, exchange_type, num_consumers, num_queues); + host_port, routing_keys, exchange, format, row_delimiter, exchange_type, num_consumers, + num_queues, use_transactional_channel); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 45ced9d247b..e056faa0d65 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -65,7 +65,8 @@ protected: char row_delimiter_, const String & exchange_type_, size_t num_consumers_, - size_t num_queues_); + size_t num_queues_, + const bool use_transactional_channel_); private: Context global_context; @@ -81,6 +82,7 @@ private: bool bind_by_id; size_t num_queues; const String exchange_type; + const bool use_transactional_channel; Poco::Logger * log; std::pair parsed_address; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 6e2b6f21f1d..09179b95a15 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -15,7 +15,8 @@ namespace DB enum { Connection_setup_sleep = 200, - Connection_setup_retries_max = 1000, + Loop_retries_max = 1000, + Loop_wait = 10, Batch = 10000 }; @@ -27,6 +28,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( Poco::Logger * log_, const size_t num_queues_, const bool bind_by_id_, + const bool use_transactional_channel_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_) @@ -37,6 +39,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , log(log_) , num_queues(num_queues_) , bind_by_id(bind_by_id_) + , use_transactional_channel(use_transactional_channel_) , delim(delimiter) , max_rows(rows_per_message) , chunk_size(chunk_size_) @@ -50,7 +53,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( * different threads (as outputStreams are asynchronous) with the same connection leads to internal library errors. */ size_t cnt_retries = 0; - while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) + while (!connection.ready() && ++cnt_retries != Loop_retries_max) { event_base_loop(producerEvbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); @@ -63,14 +66,19 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( producer_channel = std::make_shared(&connection); checkExchange(); + + /// If publishing should be wrapped in transactions + if (use_transactional_channel) + { + producer_channel->startTransaction(); + } } WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() { - checkExchange(); + finilize(); connection.close(); - assert(rows == 0 && chunks.empty()); } @@ -145,6 +153,36 @@ void WriteBufferToRabbitMQProducer::checkExchange() } +void WriteBufferToRabbitMQProducer::finilize() +{ + checkExchange(); + + if (use_transactional_channel) + { + std::atomic answer_received = false; + producer_channel->commitTransaction() + .onSuccess([&]() + { + answer_received = true; + LOG_TRACE(log, "All messages were successfully published"); + }) + .onError([&](const char * message) + { + answer_received = true; + LOG_TRACE(log, "None of messages were publishd: {}", message); + /// Probably should do something here + }); + + size_t count_retries = 0; + while (!answer_received && ++count_retries != Loop_retries_max) + { + startEventLoop(); + std::this_thread::sleep_for(std::chrono::milliseconds(Loop_wait)); + } + } +} + + void WriteBufferToRabbitMQProducer::nextImpl() { chunks.push_back(std::string()); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 90e0d90b356..9fd36257561 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -26,6 +26,7 @@ public: Poco::Logger * log_, const size_t num_queues_, const bool bind_by_id_, + const bool use_transactional_channel_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_ @@ -39,12 +40,14 @@ public: private: void nextImpl() override; void checkExchange(); + void finilize(); std::pair & login_password; const String routing_key; const String exchange_name; const bool bind_by_id; const size_t num_queues; + const bool use_transactional_channel; event_base * producerEvbase; RabbitMQHandler eventHandler; From b8a4c7708ac8724dd0aac5ca957a9aa46132af47 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 13 Jun 2020 18:15:59 +0000 Subject: [PATCH 033/330] Make local exchanges unique for each table --- .../ReadBufferFromRabbitMQConsumer.cpp | 94 ++++++++++--------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 8 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 14 ++- src/Storages/RabbitMQ/StorageRabbitMQ.h | 1 + 4 files changed, 64 insertions(+), 53 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 6b8763138a4..90485b28a96 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -41,7 +41,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( const bool bind_by_id_, const size_t num_queues_, const String & exchange_type_, - const String table_name_, + const String & local_exchange_name_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) @@ -54,7 +54,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , bind_by_id(bind_by_id_) , num_queues(num_queues_) , exchange_type(exchange_type_) - , table_name(table_name_) + , local_exchange_name(local_exchange_name_) , stopped(stopped_) { messages.clear(); @@ -85,28 +85,31 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { - /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest (also due to different - * binding algorithm this default behaviuor is much faster). It is also used in INSERT query (so it is always declared). + /* This direct-exchange is used for default implemenation and for INSERT query (so it is always declared). If exchange_type + * is not set, then there are only two exchanges - external, defined by the client, and local, unique for each table. + * This strict division to external and local exchanges is needed to avoid too much complexity with defining exchange_name + * for INSERT query producer and, in general, it is much better to distinguish them into separate ones. */ - String producer_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; - consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) + String default_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; + consumer_channel->declareExchange(default_exchange, AMQP::fanout).onError([&](const char * message) { - internal_exchange_declared = false; - LOG_ERROR(log, "Failed to declare exchange: {}", message); + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare exchange {}. Reason: {}", default_exchange, message); }); - internal_exchange_name = producer_exchange + "_" + Exchange::DIRECT; - consumer_channel->declareExchange(internal_exchange_name, AMQP::direct).onError([&](const char * message) + default_local_exchange = local_exchange_name; + default_local_exchange += exchange_type_set ? "_default_" + Exchange::DIRECT : "_" + Exchange::DIRECT; + consumer_channel->declareExchange(default_local_exchange, AMQP::direct).onError([&](const char * message) { - internal_exchange_declared = false; - LOG_ERROR(log, "Failed to declare exchange: {}", message); + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare exchange {}. Reason: {}", default_local_exchange, message); }); - /// With fanout exchange the binding key is ignored - a parameter might be arbitrary - consumer_channel->bindExchange(producer_exchange, internal_exchange_name, routing_keys[0]).onError([&](const char * message) + /// With fanout exchange the binding key is ignored - a parameter might be arbitrary. All distribution lies on local_exchange. + consumer_channel->bindExchange(default_exchange, default_local_exchange, routing_keys[0]).onError([&](const char * message) { - internal_exchange_declared = false; - LOG_ERROR(log, "Failed to bind exchange: {}", message); + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind {} exchange to {} exchange. Reason: {}", default_exchange, default_local_exchange, message); }); if (!exchange_type_set) @@ -124,26 +127,29 @@ void ReadBufferFromRabbitMQConsumer::initExchange() else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages - * between all consumers. (This enables better scaling as without hash-exchange - the only option to avoid getting the same - * messages more than once - is having only one consumer with one queue, which is not good.) + * between all consumers. (This enables better scaling as without hash-exchange - the only option to avoid getting + * the same messages more than once - is having only one consumer with one queue, which is not good.) */ consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); + LOG_ERROR(log, "Failed to declare client's {} exchange: {}", exchange_type, message); }); - /// No need for declaring hash-exchange if there is only one consumer with one queue and exchange type is not hash - if (!bind_by_id && exchange_type != Exchange::HASH) + /// No need for declaring hash-exchange if there is only one consumer with one queue or exchange type is already hash + if (!bind_by_id) return; hash_exchange = true; + if (exchange_type == Exchange::HASH) + return; + AMQP::Table exchange_arguments; exchange_arguments["hash-property"] = "message_id"; - local_exchange_name = exchange_name + "_" + table_name; - consumer_channel->declareExchange(local_exchange_name, AMQP::consistent_hash, exchange_arguments) + String local_hash_exchange_name = local_exchange_name + "_hash"; + consumer_channel->declareExchange(local_hash_exchange_name, AMQP::consistent_hash, exchange_arguments) .onError([&](const char * message) { local_exchange_declared = false; @@ -152,7 +158,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() for (auto & routing_key : routing_keys) { - consumer_channel->bindExchange(exchange_name, local_exchange_name, routing_key).onError([&](const char * message) + consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_key).onError([&](const char * message) { local_exchange_declared = false; LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); @@ -164,19 +170,15 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { /// These variables might be updated later from a separate thread in onError callbacks. - if (!internal_exchange_declared || (exchange_type_set && !local_exchange_declared)) + if (!local_exchange_declared || (exchange_type_set && !local_hash_exchange_declared)) { initExchange(); local_exchange_declared = true; - internal_exchange_declared = true; + local_hash_exchange_declared = true; } - /* Internal exchange is a default exchange (by implementstion, not by rabbitmq settings) and is used for INSERT query - * and if exchange_type is not set - there is no local exchange. If it is set - then local exchange is a distributor - * exchange, which is bound to the exchange specified by the client. - */ - bool internal_bindings_created = false, internal_bindings_error = false; - bool local_bindings_created = false, local_bindings_error = false; + bool default_bindings_created = false, default_bindings_error = false; + bool bindings_created = false, bindings_error = false; consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) @@ -202,14 +204,14 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } - consumer_channel->bindQueue(internal_exchange_name, queue_name_, binding_key) + consumer_channel->bindQueue(default_local_exchange, queue_name_, binding_key) .onSuccess([&] { - internal_bindings_created = true; + default_bindings_created = true; }) .onError([&](const char * message) { - internal_bindings_error = true; + default_bindings_error = true; LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); }); @@ -223,17 +225,22 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) if (exchange_type_set) { - /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary if (hash_exchange) { - consumer_channel->bindQueue(local_exchange_name, queue_name_, binding_key) + /* If exchange_type == hash, then bind directly to this client's exchange (because there is no need for a distributor + * exchange as it is already hash-exchange), otherwise hash-exchange is a local distributor exchange. + */ + String hash_exchange_name = exchange_type == Exchange::HASH ? exchange_name : local_exchange_name + "_hash"; + + /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary + consumer_channel->bindQueue(hash_exchange_name, queue_name_, binding_key) .onSuccess([&] { - local_bindings_created = true; + bindings_created = true; }) .onError([&](const char * message) { - local_bindings_error = true; + bindings_error = true; LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); }); } @@ -246,11 +253,11 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) consumer_channel->bindQueue(exchange_name, queue_name_, routing_key) .onSuccess([&] { - local_bindings_created = true; + bindings_created = true; }) .onError([&](const char * message) { - local_bindings_error = true; + bindings_error = true; LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_key, message); }); } @@ -259,7 +266,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) }) .onError([&](const char * message) { - internal_bindings_error = true; + default_bindings_error = true; LOG_ERROR(log, "Failed to declare queue on the channel: {}", message); }); @@ -267,8 +274,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) * It is important at this moment to make sure that queue bindings are created before any publishing can happen because * otherwise messages will be routed nowhere. */ - while (!internal_bindings_created && !internal_bindings_error - || (exchange_type_set && !local_bindings_created && !local_bindings_error)) + while (!default_bindings_created && !default_bindings_error || (exchange_type_set && !bindings_created && !bindings_error)) { startEventLoop(loop_started); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 3d02eeab761..6a2c847357d 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -32,7 +32,7 @@ public: const bool bind_by_id_, const size_t num_queues_, const String & exchange_type_, - const String table_name_, + const String & local_exchange_name_, const std::atomic & stopped_); ~ReadBufferFromRabbitMQConsumer() override; @@ -54,7 +54,7 @@ private: const bool bind_by_id; const size_t num_queues; const String & exchange_type; - const String table_name; + const String & local_exchange_name; Poco::Logger * log; char row_delimiter; @@ -62,8 +62,8 @@ private: bool allowed = true; const std::atomic & stopped; - String internal_exchange_name, local_exchange_name; - bool internal_exchange_declared = false, local_exchange_declared = false; + String default_local_exchange; + bool local_exchange_declared = false, local_hash_exchange_declared = false; bool exchange_type_set = false, hash_exchange = false; std::atomic loop_started = false, consumer_error = false; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 669cfe19aa5..d3811bdb0d2 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -104,6 +104,10 @@ StorageRabbitMQ::StorageRabbitMQ( task->deactivate(); bind_by_id = num_consumers > 1 || num_queues > 1; + + auto table_id = getStorageID(); + String table_name = table_id.table_name; + local_exchange_name = exchange_name + "_" + table_name; } @@ -214,17 +218,17 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); - auto table_id = getStorageID(); - String table_name = table_id.getNameForLogs(); - return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_keys, - next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, table_name, stream_cancelled); + next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, local_exchange_name, stream_cancelled); } ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - String producer_exchange = exchange_type == "default" ? exchange_name : exchange_name + "_default"; + /* If exchange type is set, then there are different exchanges for external publishing and for INSERT query + * as in this case they are of different types. + */ + String producer_exchange = exchange_type == "default" ? local_exchange_name : local_exchange_name + "_default"; return std::make_shared(parsed_address, login_password, routing_keys[0], producer_exchange, log, num_consumers * num_queues, bind_by_id, use_transactional_channel, diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index e056faa0d65..79e4d5e4ca2 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -74,6 +74,7 @@ private: Names routing_keys; const String exchange_name; + String local_exchange_name; const String format_name; char row_delimiter; From 9c49398728909f0ae375aed8e3de17673405cc3c Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 13 Jun 2020 18:44:17 +0000 Subject: [PATCH 034/330] Fix tests --- .../integration/test_storage_rabbitmq/test.py | 64 ++++++++++++++++--- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 37163db06f4..8442a7ecb0a 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -33,7 +33,7 @@ def check_rabbitmq_is_available(): 'exec', '-i', rabbitmq_id, - 'rabbitmqctl', + 'rabbitmqctl', 'await_startup'), stdout=subprocess.PIPE) p.communicate() @@ -774,6 +774,7 @@ def test_rabbitmq_insert(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'insert', rabbitmq_routing_key_list = 'insert1', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; @@ -784,10 +785,10 @@ def test_rabbitmq_insert(rabbitmq_cluster): consumer_connection = pika.BlockingConnection(parameters) consumer = consumer_connection.channel() - consumer.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + consumer.exchange_declare(exchange='insert_rabbitmq_direct', exchange_type='direct') result = consumer.queue_declare(queue='') queue_name = result.method.queue - consumer.queue_bind(exchange='clickhouse-exchange', queue=queue_name, routing_key='insert1') + consumer.queue_bind(exchange='insert_rabbitmq_direct', queue=queue_name, routing_key='insert1') values = [] for i in range(50): @@ -871,8 +872,9 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): break instance.query(''' - DROP TABLE test.consumer_many; - DROP TABLE test.view_many; + DROP TABLE IF EXISTS test.rabbitmq_many; + DROP TABLE IF EXISTS test.consumer_many; + DROP TABLE IF EXISTS test.view_many; ''') for thread in threads: @@ -932,8 +934,9 @@ def test_rabbitmq_sharding_between_channels_and_queues_insert(rabbitmq_cluster): break instance.query(''' - DROP TABLE test.consumer_sharding; - DROP TABLE test.view_sharding; + DROP TABLE IF EXISTS test.rabbitmq_sharding; + DROP TABLE IF EXISTS test.consumer_sharding; + DROP TABLE IF EXISTS test.view_sharding; ''') for thread in threads: @@ -992,8 +995,9 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster): break instance.query(''' - DROP TABLE test.consumer_overload; - DROP TABLE test.view_overload; + DROP TABLE IF EXISTS test.rabbitmq_overload; + DROP TABLE IF EXISTS test.consumer_overload; + DROP TABLE IF EXISTS test.view_overload; ''') for thread in threads: @@ -1060,6 +1064,16 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables: break + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + DROP TABLE IF EXISTS test.direct_exchange_{0}; + '''.format(consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) @@ -1118,6 +1132,16 @@ def test_rabbitmq_fanout_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables: break + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.fanout_exchange_{0}; + DROP TABLE IF EXISTS test.fanout_exchange_{0}_mv; + '''.format(consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) @@ -1201,6 +1225,22 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables + messages_num * num_tables: break + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + '''.format(consumer_id)) + + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + '''.format(num_tables + consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + assert int(result) == messages_num * num_tables + messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) @@ -1371,6 +1411,12 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): for thread in threads: thread.join() + instance.query(''' + DROP TABLE IF EXISTS test.bindings_1; + DROP TABLE IF EXISTS test.bindings_2; + DROP TABLE IF EXISTS test.destination; + ''') + assert int(result) == messages_num * threads_num * 5 * 2, 'ClickHouse lost some messages: {}'.format(result) From dcd7b7351c23bbe01e81493ecbad6fc2504822e4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 13 Jun 2020 21:37:37 +0000 Subject: [PATCH 035/330] Support headers-exchange type --- .../ReadBufferFromRabbitMQConsumer.cpp | 57 ++++++++++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 10 +- .../integration/test_storage_rabbitmq/test.py | 97 +++++++++++++++++++ 3 files changed, 152 insertions(+), 12 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 90485b28a96..31ca4f280e3 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "Poco/Timer.h" #include @@ -122,8 +123,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() else if (exchange_type == Exchange::DIRECT) type = AMQP::ExchangeType::direct; else if (exchange_type == Exchange::TOPIC) type = AMQP::ExchangeType::topic; else if (exchange_type == Exchange::HASH) type = AMQP::ExchangeType::consistent_hash; - else if (exchange_type == Exchange::HEADERS) - throw Exception("Headers exchange is not supported", ErrorCodes::BAD_ARGUMENTS); + else if (exchange_type == Exchange::HEADERS) type = AMQP::ExchangeType::headers; else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages @@ -156,14 +156,37 @@ void ReadBufferFromRabbitMQConsumer::initExchange() LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); }); - for (auto & routing_key : routing_keys) + if (exchange_type == Exchange::HEADERS) { - consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_key).onError([&](const char * message) + AMQP::Table binding_arguments; + std::vector matching; + + for (auto & header : routing_keys) + { + boost::split(matching, header, [](char c){ return c == '='; }); + binding_arguments[matching[0]] = matching[1]; + matching.clear(); + } + + /// Routing key can be arbitrary here. + consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_keys[0], binding_arguments) + .onError([&](const char * message) { local_exchange_declared = false; LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); }); } + else + { + for (auto & routing_key : routing_keys) + { + consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_key).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + }); + } + } } @@ -232,7 +255,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) */ String hash_exchange_name = exchange_type == Exchange::HASH ? exchange_name : local_exchange_name + "_hash"; - /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary + /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary. consumer_channel->bindQueue(hash_exchange_name, queue_name_, binding_key) .onSuccess([&] { @@ -244,6 +267,30 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); }); } + else if (exchange_type == Exchange::HEADERS) + { + AMQP::Table binding_arguments; + std::vector matching; + + /// It is not parsed for the second time - if it was parsed above, then it would go to the first if statement, not here. + for (auto & header : routing_keys) + { + boost::split(matching, header, [](char c){ return c == '='; }); + binding_arguments[matching[0]] = matching[1]; + matching.clear(); + } + + consumer_channel->bindQueue(exchange_name, queue_name_, routing_keys[0], binding_arguments) + .onSuccess([&] + { + bindings_created = true; + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_keys[0], message); + }); + } else { /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange. diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index d3811bdb0d2..852edd24726 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -458,13 +458,9 @@ void registerStorageRabbitMQ(StorageFactory & factory) exchange_type = safeGet(ast->value); } - if (exchange_type != "fanout" && exchange_type != "direct" && exchange_type != "topic" && exchange_type != "consistent_hash") - { - if (exchange_type == "headers") - throw Exception("Headers exchange is not supported", ErrorCodes::BAD_ARGUMENTS); - else - throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); - } + if (exchange_type != "fanout" && exchange_type != "direct" && exchange_type != "topic" + && exchange_type != "headers" && exchange_type != "consistent_hash") + throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); } UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 8442a7ecb0a..f58e898a45f 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1420,6 +1420,103 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): assert int(result) == messages_num * threads_num * 5 * 2, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(420) +def test_rabbitmq_headers_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables_to_receive = 3 + for consumer_id in range(num_tables_to_receive): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.headers_exchange_{0}; + DROP TABLE IF EXISTS test.headers_exchange_{0}_mv; + CREATE TABLE test.headers_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_exchange_name = 'headers_exchange_testing', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'x-match=all,format=logs,type=report,year=2020', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.headers_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.headers_exchange_{0}; + '''.format(consumer_id)) + + num_tables_to_ignore = 2 + for consumer_id in range(num_tables_to_ignore): + print("Setting up table {}".format(consumer_id + num_tables_to_receive)) + instance.query(''' + DROP TABLE IF EXISTS test.headers_exchange_{0}; + DROP TABLE IF EXISTS test.headers_exchange_{0}_mv; + CREATE TABLE test.headers_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'headers_exchange_testing', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'x-match=all,format=logs,type=report,year=2019', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.headers_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.headers_exchange_{0}; + '''.format(consumer_id + num_tables_to_receive)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='headers_exchange_testing', exchange_type='headers') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + fields={} + fields['format']='logs' + fields['type']='report' + fields['year']='2020' + + key_num = 0 + for message in messages: + channel.basic_publish(exchange='headers_exchange_testing', routing_key='', + properties=pika.BasicProperties(headers=fields), body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables_to_receive: + break + + for consumer_id in range(num_tables_to_receive): + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + DROP TABLE IF EXISTS test.direct_exchange_{0}; + '''.format(consumer_id)) + for consumer_id in range(num_tables_to_ignore): + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + DROP TABLE IF EXISTS test.direct_exchange_{0}; + '''.format(consumer_id + num_tables_to_receive)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + assert int(result) == messages_num * num_tables_to_receive, 'ClickHouse lost some messages: {}'.format(result) + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From 93aee32ae4b5846a08069ccb1e7154c2b9418f8b Mon Sep 17 00:00:00 2001 From: Avogar Date: Sun, 14 Jun 2020 18:35:32 +0300 Subject: [PATCH 036/330] Add ORCBlockOutputFormat --- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatFactory.h | 4 +- .../Formats/Impl/ORCBlockOutputFormat.cpp | 409 ++++++++++++++++++ .../Formats/Impl/ORCBlockOutputFormat.h | 70 +++ .../01307_orc_output_format.reference | 6 + .../0_stateless/01307_orc_output_format.sh | 20 + 6 files changed, 509 insertions(+), 1 deletion(-) create mode 100644 src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/ORCBlockOutputFormat.h create mode 100644 tests/queries/0_stateless/01307_orc_output_format.reference create mode 100755 tests/queries/0_stateless/01307_orc_output_format.sh diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 9182c728600..e1bb40c737c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -394,6 +394,7 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorNull(*this); registerOutputFormatProcessorMySQLWrite(*this); registerOutputFormatProcessorMarkdown(*this); + registerOutputFormatProcessorORC(*this); } FormatFactory & FormatFactory::instance() diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index c8dd97aa940..9c1a23d7164 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -175,6 +175,9 @@ void registerInputFormatProcessorTemplate(FormatFactory & factory); void registerOutputFormatProcessorTemplate(FormatFactory & factory); void registerInputFormatProcessorMsgPack(FormatFactory & factory); void registerOutputFormatProcessorMsgPack(FormatFactory & factory); +void registerInputFormatProcessorORC(FormatFactory & factory); +void registerOutputFormatProcessorORC(FormatFactory & factory); + /// File Segmentation Engines for parallel reading @@ -206,6 +209,5 @@ void registerOutputFormatProcessorMarkdown(FormatFactory & factory); void registerInputFormatProcessorCapnProto(FormatFactory & factory); void registerInputFormatProcessorRegexp(FormatFactory & factory); void registerInputFormatProcessorJSONAsString(FormatFactory & factory); -void registerInputFormatProcessorORC(FormatFactory & factory); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp new file mode 100644 index 00000000000..3745ee229a8 --- /dev/null +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -0,0 +1,409 @@ +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +ORCOutputStream::ORCOutputStream(WriteBuffer & out_) : out(out_) {} + +uint64_t ORCOutputStream::getLength() const +{ + return out.count(); +} + +uint64_t ORCOutputStream::getNaturalWriteSize() const +{ + out.nextIfAtEnd(); + return out.available(); +} + +void ORCOutputStream::write(const void* buf, size_t length) +{ + out.write(static_cast(buf), length); +} + +ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes()) +{ + schema = orc::createStructType(); + size_t columns_count = header_.columns(); + for (size_t i = 0; i != columns_count; ++i) + { + schema->addStructField(header_.safeGetByPosition(i).name, getORCType(data_types[i])); + } + writer = orc::createWriter(*schema, &output_stream, options); +} + +ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & type) +{ + switch (type->getTypeId()) + { + case TypeIndex::UInt8: [[fallthrough]]; + case TypeIndex::Int8: + { + return orc::createPrimitiveType(orc::TypeKind::BYTE); + } + case TypeIndex::UInt16: [[fallthrough]]; + case TypeIndex::Int16: + { + return orc::createPrimitiveType(orc::TypeKind::SHORT); + } + case TypeIndex::UInt32: [[fallthrough]]; + case TypeIndex::Int32: + { + return orc::createPrimitiveType(orc::TypeKind::INT); + } + case TypeIndex::UInt64: [[fallthrough]]; + case TypeIndex::Int64: + { + return orc::createPrimitiveType(orc::TypeKind::LONG); + } + case TypeIndex::Float32: + { + return orc::createPrimitiveType(orc::TypeKind::FLOAT); + } + case TypeIndex::Float64: + { + return orc::createPrimitiveType(orc::TypeKind::DOUBLE); + } + case TypeIndex::Date: + { + return orc::createPrimitiveType(orc::TypeKind::DATE); + } + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::DateTime64: + { + return orc::createPrimitiveType(orc::TypeKind::TIMESTAMP); + } + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + { + return orc::createPrimitiveType(orc::TypeKind::STRING); + } + case TypeIndex::Nullable: + { + return getORCType(removeNullable(type)); + } + /* + case TypeIndex::Array: + { + const auto * array_type = typeid_cast(type.get()); + return orc::createListType(getORCType(array_type->getNestedType())); + } + */ + case TypeIndex::Decimal32: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + case TypeIndex::Decimal64: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + case TypeIndex::Decimal128: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + default: + { + throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + } + } +} + +template +void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap, + size_t rows_num) +{ + NumberVectorBatch * number_orc_column = dynamic_cast(orc_column); + const auto & number_column = assert_cast &>(column); + number_orc_column->resize(rows_num); + + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + number_orc_column->notNull[i] = 0; + continue; + } + number_orc_column->data[i] = number_column.getElement(i); + } + number_orc_column->numElements = rows_num; +} + +template +void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDecimals( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + DataTypePtr & type, + const PaddedPODArray * null_bytemap, + size_t rows_num, + ConvertFunc convert) +{ + DecimalVectorBatch *decimal_orc_column = dynamic_cast(orc_column); + const auto & decimal_column = assert_cast &>(column); + const auto * decimal_type = typeid_cast *>(type.get()); + decimal_orc_column->precision = decimal_type->getPrecision(); + decimal_orc_column->scale = decimal_type->getScale(); + decimal_orc_column->resize(rows_num); + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + decimal_orc_column->notNull[i] = 0; + continue; + } + decimal_orc_column->values[i] = convert(decimal_column.getElement(i).value); + } + decimal_orc_column->numElements = rows_num; +} + +void ORCBlockOutputFormat::writeColumn( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + DataTypePtr & type, + const PaddedPODArray * null_bytemap, + size_t rows_num) +{ + if (null_bytemap) + { + orc_column->hasNulls = true; + } + switch (type->getTypeId()) + { + case TypeIndex::Int8: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::UInt8: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Int16: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Int32: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::UInt32: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Int64: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::UInt64: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Float32: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Float64: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + { + orc::StringVectorBatch * string_orc_column = dynamic_cast(orc_column); + const auto & string_column = assert_cast(column); + string_orc_column->resize(rows_num); + + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + string_orc_column->notNull[i] = 0; + continue; + } + const StringRef & string = string_column.getDataAt(i); + string_orc_column->data[i] = const_cast(string.data); + string_orc_column->length[i] = string.size; + } + string_orc_column->numElements = rows_num; + break; + } + case TypeIndex::DateTime: + { + orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast(orc_column); + const auto & timestamp_column = assert_cast(column); + timestamp_orc_column->resize(rows_num); + + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + timestamp_orc_column->notNull[i] = 0; + continue; + } + timestamp_orc_column->data[i] = timestamp_column.getElement(i); + timestamp_orc_column->nanoseconds[i] = 0; + } + timestamp_orc_column->numElements = rows_num; + break; + } + case TypeIndex::DateTime64: + { + orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast(orc_column); + const auto & timestamp_column = assert_cast(column); + const auto * timestamp_type = assert_cast(type.get()); + + UInt32 scale = timestamp_type->getScale(); + timestamp_orc_column->resize(rows_num); + + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + timestamp_orc_column->notNull[i] = 0; + continue; + } + UInt64 value = timestamp_column.getElement(i); + timestamp_orc_column->data[i] = value / std::pow(10, scale); + timestamp_orc_column->nanoseconds[i] = (value % UInt64(std::pow(10, scale))) * std::pow(10, 9 - scale); + } + timestamp_orc_column->numElements = rows_num; + break; + } + case TypeIndex::Decimal32:; + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + rows_num, + [](Int32 value){ return value; }); + break; + } + case TypeIndex::Decimal64: + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + rows_num, + [](Int64 value){ return value; }); + break; + } + case TypeIndex::Decimal128: + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + rows_num, + [](Int128 value){ return orc::Int128(value >> 64, (value << 64) >> 64); }); + break; + } + case TypeIndex::Nullable: + { + const auto & nullable_column = assert_cast(column); + const PaddedPODArray & new_null_bytemap = assert_cast &>(*nullable_column.getNullMapColumnPtr()).getData(); + auto nested_type = removeNullable(type); + writeColumn(orc_column, nullable_column.getNestedColumn(), nested_type, &new_null_bytemap, rows_num); + break; + } + /* Doesn't work + case TypeIndex::Array: + { + orc::ListVectorBatch * list_orc_column = dynamic_cast(orc_column); + const auto & list_column = assert_cast(column); + auto nested_type = assert_cast(*type).getNestedType(); + const ColumnArray::Offsets & offsets = list_column.getOffsets(); + list_orc_column->resize(rows_num); + list_orc_column->offsets[0] = 0; + for (size_t i = 0; i != rows_num; ++i) + { + list_orc_column->offsets[i + 1] = offsets[i]; + } + const IColumn & nested_column = list_column.getData(); + orc::ColumnVectorBatch * nested_orc_column = list_orc_column->elements.get(); + writeColumn(nested_orc_column, nested_column, nested_type, null_bytemap, nested_column.size()); + list_orc_column->numElements = rows_num; + break; + } + */ + default: + throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + } +} + +void ORCBlockOutputFormat::consume(Chunk chunk) +{ + size_t columns_num = chunk.getNumColumns(); + size_t rows_num = chunk.getNumRows(); + ORC_UNIQUE_PTR batch = writer->createRowBatch(rows_num); + orc::StructVectorBatch *root = dynamic_cast(batch.get()); + for (size_t i = 0; i != columns_num; ++i) + { + writeColumn(root->fields[i], *chunk.getColumns()[i], data_types[i], nullptr, rows_num); + } + root->numElements = rows_num; + writer->add(*batch); +} + +void ORCBlockOutputFormat::finalize() +{ + writer->close(); +} + +void registerOutputFormatProcessorORC(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("ORC", []( + WriteBuffer & buf, + const Block & sample, + FormatFactory::WriteCallback, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings); + }); +} + +} diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h new file mode 100644 index 00000000000..e075169b66f --- /dev/null +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -0,0 +1,70 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class WriteBuffer; + +class ORCOutputStream : public orc::OutputStream +{ +public: + ORCOutputStream(WriteBuffer & out_); + + uint64_t getLength() const override; + uint64_t getNaturalWriteSize() const override; + void write(const void* buf, size_t length) override; + + void close() override {}; + const std::string& getName() const override { return "ORCOutputStream"; }; + +private: + WriteBuffer & out; +}; + +class ORCBlockOutputFormat : public IOutputFormat +{ +public: + ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + + String getName() const override { return "ORCBlockOutputFormat"; } + void consume(Chunk chunk) override; + void finalize() override; + + String getContentType() const override { return "application/octet-stream"; } + +private: + ORC_UNIQUE_PTR getORCType(const DataTypePtr & type); + template + void writeDecimals( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + DataTypePtr & type, + const PaddedPODArray * null_bytemap, + size_t rows_num, + ConvertFunc convert); + template + void writeNumbers( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap, + size_t rows_num); + void writeColumn( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, DataTypePtr & type, + const PaddedPODArray * null_bytemap, + size_t rows_num); + + const FormatSettings format_settings; + ORCOutputStream output_stream; + DataTypes data_types; + ORC_UNIQUE_PTR writer; + ORC_UNIQUE_PTR schema; + orc::WriterOptions options; +}; + +} diff --git a/tests/queries/0_stateless/01307_orc_output_format.reference b/tests/queries/0_stateless/01307_orc_output_format.reference new file mode 100644 index 00000000000..bd62476c2df --- /dev/null +++ b/tests/queries/0_stateless/01307_orc_output_format.reference @@ -0,0 +1,6 @@ +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 1.0001 1.0000000100 100000.00000000000001000000 1 +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.000 34.1234 123123.1231231230 123123123.12312312312312300000 \N +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 0000-00-00 00:00:00.000 42.4200 42.4242424200 424242.42424242424242000000 42 +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 1.0001 1.0000000100 100000.00000000000001000000 1 +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 34.1234 123123.1231231230 123123123.12312312312312300000 \N +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 42.4200 42.4242424200 424242.42424242424242000000 42 diff --git a/tests/queries/0_stateless/01307_orc_output_format.sh b/tests/queries/0_stateless/01307_orc_output_format.sh new file mode 100755 index 00000000000..8d7e85a03de --- /dev/null +++ b/tests/queries/0_stateless/01307_orc_output_format.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime, datetime64 DateTime64, decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', 18980, 1639872000, 1639872000000, 1.0001, 1.00000001, 100000.00000000000001, 1), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', 20000, 1839882000, 1639872891123, 34.1234, 123123.123123123, 123123123.123123123123123, NULL), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', 42, 42, 42, 42.42, 42.42424242, 424242.42424242424242, 42)"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC" > $CURDIR/tmp_orc_test_all_types.orc; + +cat $CURDIR/tmp_orc_test_all_types.orc | $CLICKHOUSE_CLIENT --query="INSERT INTO orc FORMAT ORC"; + +rm $CURDIR/tmp_orc_test_all_types.orc + +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE orc"; From 9e1b8b2872e366663707109eaee60a4df577865f Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 14 Jun 2020 16:26:37 +0000 Subject: [PATCH 037/330] Better exchanges, fix build, better comments, better tests --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 2 - src/Storages/RabbitMQ/RabbitMQHandler.cpp | 13 +- .../ReadBufferFromRabbitMQConsumer.cpp | 136 +++++++++--------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 7 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 10 +- .../WriteBufferToRabbitMQProducer.cpp | 12 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 5 +- .../integration/test_storage_rabbitmq/test.py | 55 +++---- 8 files changed, 122 insertions(+), 118 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 2d995d97f18..6257a60d678 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -142,8 +142,6 @@ Block RabbitMQBlockInputStream::readImpl() auto result_block = non_virtual_header.cloneWithColumns(std::move(result_columns)); auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); - LOG_DEBUG(log, "Total amount of rows is " + std::to_string(result_block.rows())); - for (const auto & column : virtual_block.getColumnsWithTypeAndName()) { result_block.insert(column); diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 8667427ee63..71c23bb9bc4 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -41,8 +41,9 @@ void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) */ if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) { - loop_started = true; - stop_scheduled.store(false); + loop_started.store(true); + stop_scheduled = false; + event_base_loop(evbase, EVLOOP_NONBLOCK); mutex_before_event_loop.unlock(); } @@ -67,12 +68,8 @@ void RabbitMQHandler::stop() void RabbitMQHandler::stopWithTimeout() { - if (mutex_before_loop_stop.try_lock()) - { - stop_scheduled.store(true); - event_base_loopexit(evbase, &tv); - mutex_before_loop_stop.unlock(); - } + stop_scheduled = true; + event_base_loopexit(evbase, &tv); } } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 31ca4f280e3..ef4398753c2 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -namespace Exchange +namespace ExchangeType { /// Note that default here means default by implementation and not by rabbitmq settings static const String DEFAULT = "default"; @@ -42,7 +42,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( const bool bind_by_id_, const size_t num_queues_, const String & exchange_type_, - const String & local_exchange_name_, + const String & local_exchange_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) @@ -55,13 +55,15 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , bind_by_id(bind_by_id_) , num_queues(num_queues_) , exchange_type(exchange_type_) - , local_exchange_name(local_exchange_name_) + , local_exchange(local_exchange_) + , local_default_exchange(local_exchange + "_" + ExchangeType::DIRECT) + , local_hash_exchange(local_exchange + "_" + ExchangeType::HASH) , stopped(stopped_) { messages.clear(); current = messages.begin(); - exchange_type_set = exchange_type != Exchange::DEFAULT; + exchange_type_set = exchange_type != ExchangeType::DEFAULT; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. @@ -87,53 +89,52 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { /* This direct-exchange is used for default implemenation and for INSERT query (so it is always declared). If exchange_type - * is not set, then there are only two exchanges - external, defined by the client, and local, unique for each table. + * is not set, then there are only two exchanges - external, defined by the client, and local, unique for each table (default). * This strict division to external and local exchanges is needed to avoid too much complexity with defining exchange_name - * for INSERT query producer and, in general, it is much better to distinguish them into separate ones. + * for INSERT query producer and, in general, it is better to distinguish them into separate ones. */ - String default_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; - consumer_channel->declareExchange(default_exchange, AMQP::fanout).onError([&](const char * message) + consumer_channel->declareExchange(local_default_exchange, AMQP::direct).onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to declare exchange {}. Reason: {}", default_exchange, message); - }); - - default_local_exchange = local_exchange_name; - default_local_exchange += exchange_type_set ? "_default_" + Exchange::DIRECT : "_" + Exchange::DIRECT; - consumer_channel->declareExchange(default_local_exchange, AMQP::direct).onError([&](const char * message) - { - local_exchange_declared = false; - LOG_ERROR(log, "Failed to declare exchange {}. Reason: {}", default_local_exchange, message); - }); - - /// With fanout exchange the binding key is ignored - a parameter might be arbitrary. All distribution lies on local_exchange. - consumer_channel->bindExchange(default_exchange, default_local_exchange, routing_keys[0]).onError([&](const char * message) - { - local_exchange_declared = false; - LOG_ERROR(log, "Failed to bind {} exchange to {} exchange. Reason: {}", default_exchange, default_local_exchange, message); + LOG_ERROR(log, "Failed to declare local direct-exchange. Reason: {}", message); }); if (!exchange_type_set) + { + consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare default fanout-exchange. Reason: {}", message); + }); + + /// With fanout exchange the binding key is ignored - a parameter might be arbitrary. All distribution lies on local_exchange. + consumer_channel->bindExchange(exchange_name, local_default_exchange, routing_keys[0]).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind local direct-exchange to fanout-exchange. Reason: {}", message); + }); + return; + } /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is supported. AMQP::ExchangeType type; - if (exchange_type == Exchange::FANOUT) type = AMQP::ExchangeType::fanout; - else if (exchange_type == Exchange::DIRECT) type = AMQP::ExchangeType::direct; - else if (exchange_type == Exchange::TOPIC) type = AMQP::ExchangeType::topic; - else if (exchange_type == Exchange::HASH) type = AMQP::ExchangeType::consistent_hash; - else if (exchange_type == Exchange::HEADERS) type = AMQP::ExchangeType::headers; + if (exchange_type == ExchangeType::FANOUT) type = AMQP::ExchangeType::fanout; + else if (exchange_type == ExchangeType::DIRECT) type = AMQP::ExchangeType::direct; + else if (exchange_type == ExchangeType::TOPIC) type = AMQP::ExchangeType::topic; + else if (exchange_type == ExchangeType::HASH) type = AMQP::ExchangeType::consistent_hash; + else if (exchange_type == ExchangeType::HEADERS) type = AMQP::ExchangeType::headers; else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); - /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages - * between all consumers. (This enables better scaling as without hash-exchange - the only option to avoid getting - * the same messages more than once - is having only one consumer with one queue, which is not good.) + /* Declare client's exchange of the specified type and bind it to hash-exchange (if it is not already hash-exchange), which + * will evenly distribute messages between all consumers. (This enables better scaling as without hash-exchange - the only + * option to avoid getting the same messages more than once - is having only one consumer with one queue, which is not good.) */ consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to declare client's {} exchange: {}", exchange_type, message); + LOG_ERROR(log, "Failed to declare client's {} exchange. Reason: {}", exchange_type, message); }); /// No need for declaring hash-exchange if there is only one consumer with one queue or exchange type is already hash @@ -142,26 +143,32 @@ void ReadBufferFromRabbitMQConsumer::initExchange() hash_exchange = true; - if (exchange_type == Exchange::HASH) + if (exchange_type == ExchangeType::HASH) return; - AMQP::Table exchange_arguments; - exchange_arguments["hash-property"] = "message_id"; + /* By default hash exchange distributes messages based on a hash value of a routing key, which must be a string integer. But + * in current case we use hash exchange for binding to another exchange of some other type, which needs its own routing keys + * of other types: headers, patterns and string-keys. This means that hash property must be changed. + */ + AMQP::Table binding_arguments; + binding_arguments["hash-property"] = "message_id"; - String local_hash_exchange_name = local_exchange_name + "_hash"; - consumer_channel->declareExchange(local_hash_exchange_name, AMQP::consistent_hash, exchange_arguments) + /// Declare exchange for sharding. + consumer_channel->declareExchange(local_hash_exchange, AMQP::consistent_hash, binding_arguments) .onError([&](const char * message) { local_exchange_declared = false; LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); }); - if (exchange_type == Exchange::HEADERS) + /// Then bind client's exchange to sharding exchange (by keys, specified by the client): + + if (exchange_type == ExchangeType::HEADERS) { AMQP::Table binding_arguments; std::vector matching; - for (auto & header : routing_keys) + for (const auto & header : routing_keys) { boost::split(matching, header, [](char c){ return c == '='; }); binding_arguments[matching[0]] = matching[1]; @@ -169,21 +176,21 @@ void ReadBufferFromRabbitMQConsumer::initExchange() } /// Routing key can be arbitrary here. - consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_keys[0], binding_arguments) + consumer_channel->bindExchange(exchange_name, local_hash_exchange, routing_keys[0], binding_arguments) .onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + LOG_ERROR(log, "Failed to bind local hash exchange to client's exchange. Reason: {}", message); }); } else { - for (auto & routing_key : routing_keys) + for (const auto & routing_key : routing_keys) { - consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_key).onError([&](const char * message) + consumer_channel->bindExchange(exchange_name, local_hash_exchange, routing_key).onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + LOG_ERROR(log, "Failed to bind local hash exchange to client's exchange. Reason: {}", message); }); } } @@ -227,7 +234,8 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } - consumer_channel->bindQueue(default_local_exchange, queue_name_, binding_key) + /// Bind queue to exchange that is used for INSERT query and also for default implementation. + consumer_channel->bindQueue(local_default_exchange, queue_name_, binding_key) .onSuccess([&] { default_bindings_created = true; @@ -238,13 +246,13 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); }); - /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed. Also note - * that if moved there, it must(!) be wrapped inside a channel->onReady callback or any other, otherwise consumer might fail - * to subscribe and no resubscription will help. + /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed, because + * if moved there, it must(!) be wrapped inside a channel->onReady callback or any other (and the looping), otherwise + * consumer might fail to subscribe and no resubscription will help. */ subscribe(queues.back()); - LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); + LOG_DEBUG(log, "Queue " + queue_name_ + " is declared"); if (exchange_type_set) { @@ -253,10 +261,10 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) /* If exchange_type == hash, then bind directly to this client's exchange (because there is no need for a distributor * exchange as it is already hash-exchange), otherwise hash-exchange is a local distributor exchange. */ - String hash_exchange_name = exchange_type == Exchange::HASH ? exchange_name : local_exchange_name + "_hash"; + String current_hash_exchange = exchange_type == ExchangeType::HASH ? exchange_name : local_hash_exchange; /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary. - consumer_channel->bindQueue(hash_exchange_name, queue_name_, binding_key) + consumer_channel->bindQueue(current_hash_exchange, queue_name_, binding_key) .onSuccess([&] { bindings_created = true; @@ -267,13 +275,13 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); }); } - else if (exchange_type == Exchange::HEADERS) + else if (exchange_type == ExchangeType::HEADERS) { AMQP::Table binding_arguments; std::vector matching; /// It is not parsed for the second time - if it was parsed above, then it would go to the first if statement, not here. - for (auto & header : routing_keys) + for (const auto & header : routing_keys) { boost::split(matching, header, [](char c){ return c == '='; }); binding_arguments[matching[0]] = matching[1]; @@ -288,15 +296,15 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_keys[0], message); + LOG_ERROR(log, "Failed to bind queue to key. Reason: {}", message); }); } else { /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange. - for (auto & routing_key : routing_keys) + for (const auto & routing_key : routing_keys) { - /// Binding directly to exchange, specified by the client + /// Binding directly to exchange, specified by the client. consumer_channel->bindQueue(exchange_name, queue_name_, routing_key) .onSuccess([&] { @@ -305,7 +313,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_key, message); + LOG_ERROR(log, "Failed to bind queue to key. Reason: {}", message); }); } } @@ -314,7 +322,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { default_bindings_error = true; - LOG_ERROR(log, "Failed to declare queue on the channel: {}", message); + LOG_ERROR(log, "Failed to declare queue on the channel. Reason: {}", message); }); /* Run event loop (which updates local variables in a separate thread) until bindings are created or failed to be created. @@ -364,7 +372,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) * executing all callbacks on the connection (not only its own), then there should be some point to unblock. * loop_started == 1 if current consumer is started the loop and not another. */ - if (!loop_started.load() && !eventHandler.checkStopIsScheduled().load()) + if (!loop_started.load() && !eventHandler.checkStopIsScheduled()) { stopEventLoopWithTimeout(); } @@ -373,7 +381,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) .onError([&](const char * message) { consumer_error = true; - LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); + LOG_ERROR(log, "Consumer {} failed. Reason: {}", channel_id, message); }); } @@ -385,7 +393,7 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() wait_subscribed = num_queues; - /// These variables are updated in a separate thread + /// These variables are updated in a separate thread. while (count_subscribed != wait_subscribed && !consumer_error) { startEventLoop(loop_started); @@ -393,11 +401,11 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); - /// Updated in callbacks which are run by the loop + /// Updated in callbacks which are run by the loop. if (count_subscribed == num_queues) return; - /// A case that should never normally happen + /// A case that should never normally happen. for (auto & queue : queues) { subscribe(queue); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 6a2c847357d..d4bf35c00b8 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -32,7 +32,7 @@ public: const bool bind_by_id_, const size_t num_queues_, const String & exchange_type_, - const String & local_exchange_name_, + const String & local_exchange_, const std::atomic & stopped_); ~ReadBufferFromRabbitMQConsumer() override; @@ -53,8 +53,11 @@ private: const size_t channel_id; const bool bind_by_id; const size_t num_queues; + const String & exchange_type; - const String & local_exchange_name; + const String & local_exchange; + const String local_default_exchange; + const String local_hash_exchange; Poco::Logger * log; char row_delimiter; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 852edd24726..3de8d193302 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -107,6 +107,8 @@ StorageRabbitMQ::StorageRabbitMQ( auto table_id = getStorageID(); String table_name = table_id.table_name; + + /// Make sure that local exchange name is unique for each table and is not the same as client's exchange name local_exchange_name = exchange_name + "_" + table_name; } @@ -132,6 +134,7 @@ Pipes StorageRabbitMQ::read( } LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); + return pipes; } @@ -225,12 +228,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - /* If exchange type is set, then there are different exchanges for external publishing and for INSERT query - * as in this case they are of different types. - */ - String producer_exchange = exchange_type == "default" ? local_exchange_name : local_exchange_name + "_default"; - - return std::make_shared(parsed_address, login_password, routing_keys[0], producer_exchange, + return std::make_shared(parsed_address, login_password, routing_keys[0], local_exchange_name, log, num_consumers * num_queues, bind_by_id, use_transactional_channel, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 09179b95a15..6d74e2c8298 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -77,7 +77,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() { - finilize(); + finilizeProducer(); connection.close(); assert(rows == 0 && chunks.empty()); } @@ -118,7 +118,9 @@ void WriteBufferToRabbitMQProducer::countRow() ++message_counter; - /// run event loop to actually publish, checking exchange is just a point to stop the event loop + /* Run event loop to actually publish, checking exchange is just a point to stop the event loop. Messages are not sent + * without looping and looping after every batch is much better than processing all the messages in one time. + */ if ((message_counter %= Batch) == 0) { checkExchange(); @@ -132,7 +134,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() std::atomic exchange_declared = false, exchange_error = false; /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name - * and makes it visible from current producer_channel. + * and makes it declared on the current producer_channel. */ producer_channel->declareExchange(exchange_name, AMQP::direct, AMQP::passive) .onSuccess([&]() @@ -142,7 +144,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() .onError([&](const char * message) { exchange_error = true; - LOG_ERROR(log, "Exchange was not declared: {}", message); + LOG_ERROR(log, "Exchange for INSERT query was not declared. Reason: {}", message); }); /// These variables are updated in a separate thread and starting the loop blocks current thread @@ -153,7 +155,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() } -void WriteBufferToRabbitMQProducer::finilize() +void WriteBufferToRabbitMQProducer::finilizeProducer() { checkExchange(); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 9fd36257561..7d2bb6e598f 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -40,7 +40,7 @@ public: private: void nextImpl() override; void checkExchange(); - void finilize(); + void finilizeProducer(); std::pair & login_password; const String routing_key; @@ -56,9 +56,6 @@ private: size_t next_queue = 0; UInt64 message_counter = 0; - String channel_id; - - Messages messages; Poco::Logger * log; const std::optional delim; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index f58e898a45f..3c4c0b3215b 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -497,7 +497,7 @@ def test_rabbitmq_big_message(rabbitmq_cluster): assert int(result) == rabbitmq_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): NUM_CHANNELS = 5 @@ -560,7 +560,7 @@ def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): NUM_QUEUES = 4 @@ -623,7 +623,7 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster): NUM_CONSUMERS = 10 @@ -688,7 +688,7 @@ def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster) assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_read_only_combo(rabbitmq_cluster): NUM_MV = 5; @@ -768,7 +768,7 @@ def test_rabbitmq_read_only_combo(rabbitmq_cluster): assert int(result) == messages_num * threads_num * NUM_MV, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(180) +@pytest.mark.timeout(240) def test_rabbitmq_insert(rabbitmq_cluster): instance.query(''' CREATE TABLE test.rabbitmq (key UInt64, value UInt64) @@ -1054,7 +1054,10 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): key = "direct_" + str(key_num) key_num += 1 for message in messages: - channel.basic_publish(exchange='direct_exchange_testing', routing_key=key, body=message) + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='direct_exchange_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) connection.close() @@ -1066,8 +1069,8 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): for consumer_id in range(num_tables): instance.query(''' - DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; '''.format(consumer_id)) instance.query(''' @@ -1122,7 +1125,10 @@ def test_rabbitmq_fanout_exchange(rabbitmq_cluster): key_num = 0 for message in messages: - channel.basic_publish(exchange='fanout_exchange_testing', routing_key='', body=message) + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='fanout_exchange_testing', routing_key='', + properties=pika.BasicProperties(message_id=mes_id), body=message) connection.close() @@ -1215,7 +1221,10 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): key = "random.logs" for message in messages: - channel.basic_publish(exchange='topic_exchange_testing', routing_key=key, body=message) + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='topic_exchange_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) connection.close() @@ -1225,18 +1234,12 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables + messages_num * num_tables: break - for consumer_id in range(num_tables): + for consumer_id in range(num_tables * 2): instance.query(''' DROP TABLE IF EXISTS test.topic_exchange_{0}; DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; '''.format(consumer_id)) - for consumer_id in range(num_tables): - instance.query(''' - DROP TABLE IF EXISTS test.topic_exchange_{0}; - DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; - '''.format(num_tables + consumer_id)) - instance.query(''' DROP TABLE IF EXISTS test.destination; ''') @@ -1244,7 +1247,7 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): assert int(result) == messages_num * num_tables + messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_hash_exchange(rabbitmq_cluster): instance.query(''' DROP TABLE IF EXISTS test.destination; @@ -1288,8 +1291,8 @@ def test_rabbitmq_hash_exchange(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = str(randrange(10)) for message in messages: + key = str(randrange(10)) channel.basic_publish(exchange='hash_exchange_testing', routing_key=key, body=message) connection.close() @@ -1389,7 +1392,9 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): for key in keys: for message in messages: - channel.basic_publish(exchange='multiple_bindings_testing', routing_key=key, body=message) + mes_id = str(randrange(10)) + channel.basic_publish(exchange='multiple_bindings_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) connection.close() @@ -1488,8 +1493,9 @@ def test_rabbitmq_headers_exchange(rabbitmq_cluster): key_num = 0 for message in messages: + mes_id = str(randrange(10)) channel.basic_publish(exchange='headers_exchange_testing', routing_key='', - properties=pika.BasicProperties(headers=fields), body=message) + properties=pika.BasicProperties(headers=fields, message_id=mes_id), body=message) connection.close() @@ -1499,16 +1505,11 @@ def test_rabbitmq_headers_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables_to_receive: break - for consumer_id in range(num_tables_to_receive): + for consumer_id in range(num_tables_to_receive + num_tables_to_ignore): instance.query(''' - DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; '''.format(consumer_id)) - for consumer_id in range(num_tables_to_ignore): - instance.query(''' - DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; - DROP TABLE IF EXISTS test.direct_exchange_{0}; - '''.format(consumer_id + num_tables_to_receive)) instance.query(''' DROP TABLE IF EXISTS test.destination; From 7780a74bd8e095c1ff9bc09eff27bba5d3fa0680 Mon Sep 17 00:00:00 2001 From: Avogar Date: Wed, 17 Jun 2020 22:32:11 +0300 Subject: [PATCH 038/330] Update ORCBlockOutputFormat and add orc include directory in include path --- src/CMakeLists.txt | 6 + .../Formats/Impl/ORCBlockOutputFormat.cpp | 167 ++++++++++-------- .../Formats/Impl/ORCBlockOutputFormat.h | 31 ++-- .../01307_orc_output_format.reference | 12 +- .../0_stateless/01307_orc_output_format.sh | 4 +- 5 files changed, 117 insertions(+), 103 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fe223373cf3..aa779f011b7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -361,6 +361,12 @@ target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_C target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) +target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR}) +configure_file ( + "${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" + "${ORC_INCLUDE_DIR}/orc/orc-config.hh" +) + if (ENABLE_TESTS AND USE_GTEST) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 3745ee229a8..4bd9dd230fc 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -136,14 +136,13 @@ template void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( orc::ColumnVectorBatch * orc_column, const IColumn & column, - const PaddedPODArray * null_bytemap, - size_t rows_num) + const PaddedPODArray * null_bytemap) { NumberVectorBatch * number_orc_column = dynamic_cast(orc_column); const auto & number_column = assert_cast &>(column); - number_orc_column->resize(rows_num); + number_orc_column->resize(number_column.size()); - for (size_t i = 0; i != rows_num; ++i) + for (size_t i = 0; i != number_column.size(); ++i) { if (null_bytemap && (*null_bytemap)[i]) { @@ -152,7 +151,7 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( } number_orc_column->data[i] = number_column.getElement(i); } - number_orc_column->numElements = rows_num; + number_orc_column->numElements = number_column.size(); } template @@ -161,7 +160,6 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDecimals( const IColumn & column, DataTypePtr & type, const PaddedPODArray * null_bytemap, - size_t rows_num, ConvertFunc convert) { DecimalVectorBatch *decimal_orc_column = dynamic_cast(orc_column); @@ -169,8 +167,8 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDecimals( const auto * decimal_type = typeid_cast *>(type.get()); decimal_orc_column->precision = decimal_type->getPrecision(); decimal_orc_column->scale = decimal_type->getScale(); - decimal_orc_column->resize(rows_num); - for (size_t i = 0; i != rows_num; ++i) + decimal_orc_column->resize(decimal_column.size()); + for (size_t i = 0; i != decimal_column.size(); ++i) { if (null_bytemap && (*null_bytemap)[i]) { @@ -179,134 +177,150 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDecimals( } decimal_orc_column->values[i] = convert(decimal_column.getElement(i).value); } - decimal_orc_column->numElements = rows_num; + decimal_orc_column->numElements = decimal_column.size(); +} + +template +void ORCBlockOutputFormat::ORCBlockOutputFormat::writeStrings( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap) +{ + orc::StringVectorBatch * string_orc_column = dynamic_cast(orc_column); + const auto & string_column = assert_cast(column); + string_orc_column->resize(string_column.size()); + + for (size_t i = 0; i != string_column.size(); ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + string_orc_column->notNull[i] = 0; + continue; + } + const StringRef & string = string_column.getDataAt(i); + string_orc_column->data[i] = const_cast(string.data); + string_orc_column->length[i] = string.size; + } + string_orc_column->numElements = string_column.size(); +} + +template +void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDateTimes( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap, + GetSecondsFunc get_seconds, + GetNanosecondsFunc get_nanoseconds) +{ + orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast(orc_column); + const auto & timestamp_column = assert_cast(column); + timestamp_orc_column->resize(timestamp_column.size()); + + for (size_t i = 0; i != timestamp_column.size(); ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + timestamp_orc_column->notNull[i] = 0; + continue; + } + timestamp_orc_column->data[i] = get_seconds(timestamp_column.getElement(i)); + timestamp_orc_column->nanoseconds[i] = get_nanoseconds(timestamp_column.getElement(i)); + } + timestamp_orc_column->numElements = timestamp_column.size(); } void ORCBlockOutputFormat::writeColumn( orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, - const PaddedPODArray * null_bytemap, - size_t rows_num) + const PaddedPODArray * null_bytemap) { if (null_bytemap) { orc_column->hasNulls = true; + orc_column->notNull.resize(column.size()); } switch (type->getTypeId()) { case TypeIndex::Int8: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::UInt8: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::Int16: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::Date: [[fallthrough]]; case TypeIndex::UInt16: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::Int32: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::UInt32: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::Int64: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::UInt64: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::Float32: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); break; } case TypeIndex::Float64: { - writeNumbers(orc_column, column, null_bytemap, rows_num); + writeNumbers(orc_column, column, null_bytemap); + break; + } + case TypeIndex::FixedString: + { + writeStrings(orc_column, column, null_bytemap); break; } - case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: { - orc::StringVectorBatch * string_orc_column = dynamic_cast(orc_column); - const auto & string_column = assert_cast(column); - string_orc_column->resize(rows_num); - - for (size_t i = 0; i != rows_num; ++i) - { - if (null_bytemap && (*null_bytemap)[i]) - { - string_orc_column->notNull[i] = 0; - continue; - } - const StringRef & string = string_column.getDataAt(i); - string_orc_column->data[i] = const_cast(string.data); - string_orc_column->length[i] = string.size; - } - string_orc_column->numElements = rows_num; + writeStrings(orc_column, column, null_bytemap); break; } case TypeIndex::DateTime: { - orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast(orc_column); - const auto & timestamp_column = assert_cast(column); - timestamp_orc_column->resize(rows_num); - - for (size_t i = 0; i != rows_num; ++i) - { - if (null_bytemap && (*null_bytemap)[i]) - { - timestamp_orc_column->notNull[i] = 0; - continue; - } - timestamp_orc_column->data[i] = timestamp_column.getElement(i); - timestamp_orc_column->nanoseconds[i] = 0; - } - timestamp_orc_column->numElements = rows_num; + writeDateTimes( + orc_column, + column, null_bytemap, + [](UInt32 value){ return value; }, + [](UInt32){ return 0; }); break; } case TypeIndex::DateTime64: { - orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast(orc_column); - const auto & timestamp_column = assert_cast(column); const auto * timestamp_type = assert_cast(type.get()); - UInt32 scale = timestamp_type->getScale(); - timestamp_orc_column->resize(rows_num); - - for (size_t i = 0; i != rows_num; ++i) - { - if (null_bytemap && (*null_bytemap)[i]) - { - timestamp_orc_column->notNull[i] = 0; - continue; - } - UInt64 value = timestamp_column.getElement(i); - timestamp_orc_column->data[i] = value / std::pow(10, scale); - timestamp_orc_column->nanoseconds[i] = (value % UInt64(std::pow(10, scale))) * std::pow(10, 9 - scale); - } - timestamp_orc_column->numElements = rows_num; + writeDateTimes( + orc_column, + column, null_bytemap, + [scale](UInt64 value){ return value / std::pow(10, scale); }, + [scale](UInt64 value){ return (value % UInt64(std::pow(10, scale))) * std::pow(10, 9 - scale); }); break; } case TypeIndex::Decimal32:; @@ -316,7 +330,6 @@ void ORCBlockOutputFormat::writeColumn( column, type, null_bytemap, - rows_num, [](Int32 value){ return value; }); break; } @@ -327,7 +340,6 @@ void ORCBlockOutputFormat::writeColumn( column, type, null_bytemap, - rows_num, [](Int64 value){ return value; }); break; } @@ -338,7 +350,6 @@ void ORCBlockOutputFormat::writeColumn( column, type, null_bytemap, - rows_num, [](Int128 value){ return orc::Int128(value >> 64, (value << 64) >> 64); }); break; } @@ -347,7 +358,7 @@ void ORCBlockOutputFormat::writeColumn( const auto & nullable_column = assert_cast(column); const PaddedPODArray & new_null_bytemap = assert_cast &>(*nullable_column.getNullMapColumnPtr()).getData(); auto nested_type = removeNullable(type); - writeColumn(orc_column, nullable_column.getNestedColumn(), nested_type, &new_null_bytemap, rows_num); + writeColumn(orc_column, nullable_column.getNestedColumn(), nested_type, &new_null_bytemap); break; } /* Doesn't work @@ -357,16 +368,16 @@ void ORCBlockOutputFormat::writeColumn( const auto & list_column = assert_cast(column); auto nested_type = assert_cast(*type).getNestedType(); const ColumnArray::Offsets & offsets = list_column.getOffsets(); - list_orc_column->resize(rows_num); + list_orc_column->resize(list_column.size()); list_orc_column->offsets[0] = 0; - for (size_t i = 0; i != rows_num; ++i) + for (size_t i = 0; i != list_column.size(); ++i) { list_orc_column->offsets[i + 1] = offsets[i]; } const IColumn & nested_column = list_column.getData(); orc::ColumnVectorBatch * nested_orc_column = list_orc_column->elements.get(); writeColumn(nested_orc_column, nested_column, nested_type, null_bytemap, nested_column.size()); - list_orc_column->numElements = rows_num; + list_orc_column->numElements = list_column.size(); break; } */ @@ -383,7 +394,7 @@ void ORCBlockOutputFormat::consume(Chunk chunk) orc::StructVectorBatch *root = dynamic_cast(batch.get()); for (size_t i = 0; i != columns_num; ++i) { - writeColumn(root->fields[i], *chunk.getColumns()[i], data_types[i], nullptr, rows_num); + writeColumn(root->fields[i], *chunk.getColumns()[i], data_types[i], nullptr); } root->numElements = rows_num; writer->add(*batch); diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index e075169b66f..18261a90acf 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -39,25 +39,22 @@ public: private: ORC_UNIQUE_PTR getORCType(const DataTypePtr & type); + template - void writeDecimals( - orc::ColumnVectorBatch * orc_column, - const IColumn & column, - DataTypePtr & type, - const PaddedPODArray * null_bytemap, - size_t rows_num, - ConvertFunc convert); + void writeDecimals(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, + const PaddedPODArray * null_bytemap, ConvertFunc convert); + template - void writeNumbers( - orc::ColumnVectorBatch * orc_column, - const IColumn & column, - const PaddedPODArray * null_bytemap, - size_t rows_num); - void writeColumn( - orc::ColumnVectorBatch * orc_column, - const IColumn & column, DataTypePtr & type, - const PaddedPODArray * null_bytemap, - size_t rows_num); + void writeNumbers(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap); + + template + void writeStrings(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap); + + template + void writeDateTimes(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap, + GetSecondsFunc get_seconds, GetNanosecondsFunc get_nanoseconds); + + void writeColumn(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray * null_bytemap); const FormatSettings format_settings; ORCOutputStream output_stream; diff --git a/tests/queries/0_stateless/01307_orc_output_format.reference b/tests/queries/0_stateless/01307_orc_output_format.reference index bd62476c2df..da719072eb2 100644 --- a/tests/queries/0_stateless/01307_orc_output_format.reference +++ b/tests/queries/0_stateless/01307_orc_output_format.reference @@ -1,6 +1,6 @@ -255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 1.0001 1.0000000100 100000.00000000000001000000 1 -4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.000 34.1234 123123.1231231230 123123123.12312312312312300000 \N -42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 0000-00-00 00:00:00.000 42.4200 42.4242424200 424242.42424242424242000000 42 -255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 1.0001 1.0000000100 100000.00000000000001000000 1 -4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 34.1234 123123.1231231230 123123123.12312312312312300000 \N -42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 42.4200 42.4242424200 424242.42424242424242000000 42 +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2020 2021-12-19 2021-12-19 03:00:00 1.0001 1.0000000100 100000.00000000000001000000 1 +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2000 2024-10-04 2028-04-21 01:20:00 34.1234 123123.1231231230 123123123.12312312312312300000 \N +42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 03:00:42 42.4200 42.4242424200 424242.42424242424242000000 42 +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2020 2021-12-19 2021-12-19 03:00:00 1.0001 1.0000000100 100000.00000000000001000000 1 +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2000 2024-10-04 2028-04-21 01:20:00 34.1234 123123.1231231230 123123123.12312312312312300000 \N +42 42 42 42 42 42 42 42 42.42 42.42 42 4242 1970-02-12 1970-01-01 03:00:42 42.4200 42.4242424200 424242.42424242424242000000 42 diff --git a/tests/queries/0_stateless/01307_orc_output_format.sh b/tests/queries/0_stateless/01307_orc_output_format.sh index 8d7e85a03de..c46131dcff6 100755 --- a/tests/queries/0_stateless/01307_orc_output_format.sh +++ b/tests/queries/0_stateless/01307_orc_output_format.sh @@ -5,9 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime, datetime64 DateTime64, decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, fixed FixedString(4), date Date, datetime DateTime, decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; -$CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', 18980, 1639872000, 1639872000000, 1.0001, 1.00000001, 100000.00000000000001, 1), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', 20000, 1839882000, 1639872891123, 34.1234, 123123.123123123, 123123123.123123123123123, NULL), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', 42, 42, 42, 42.42, 42.42424242, 424242.42424242424242, 42)"; +$CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', '2020', 18980, 1639872000, 1.0001, 1.00000001, 100000.00000000000001, 1), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', '2000', 20000, 1839882000, 34.1234, 123123.123123123, 123123123.123123123123123, NULL), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', '4242', 42, 42, 42.42, 42.42424242, 424242.42424242424242, 42)"; $CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC" > $CURDIR/tmp_orc_test_all_types.orc; From 4fd3bcd8232758ade3d2c353aedb184324b14387 Mon Sep 17 00:00:00 2001 From: Avogar Date: Thu, 18 Jun 2020 03:17:08 +0300 Subject: [PATCH 039/330] Fix build errors --- src/Processors/Formats/Impl/ORCBlockOutputFormat.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 18261a90acf..0cd50959ad7 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -19,11 +19,12 @@ public: uint64_t getNaturalWriteSize() const override; void write(const void* buf, size_t length) override; - void close() override {}; - const std::string& getName() const override { return "ORCOutputStream"; }; + void close() override {} + const std::string& getName() const override { return name; } private: WriteBuffer & out; + std::string name = "ORCOutputStream"; }; class ORCBlockOutputFormat : public IOutputFormat @@ -35,8 +36,6 @@ public: void consume(Chunk chunk) override; void finalize() override; - String getContentType() const override { return "application/octet-stream"; } - private: ORC_UNIQUE_PTR getORCType(const DataTypePtr & type); From 75a66fbba3af436cd3e6791d72e9b6eacd16b680 Mon Sep 17 00:00:00 2001 From: Avogar Date: Thu, 18 Jun 2020 15:03:48 +0300 Subject: [PATCH 040/330] Fix errors 2 --- .../Formats/Impl/ORCBlockOutputFormat.cpp | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 4bd9dd230fc..f34ca21a1b3 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -42,16 +42,7 @@ void ORCOutputStream::write(const void* buf, size_t length) } ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes()) -{ - schema = orc::createStructType(); - size_t columns_count = header_.columns(); - for (size_t i = 0; i != columns_count; ++i) - { - schema->addStructField(header_.safeGetByPosition(i).name, getORCType(data_types[i])); - } - writer = orc::createWriter(*schema, &output_stream, options); -} + : IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes()) {} ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & type) { @@ -149,7 +140,10 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( number_orc_column->notNull[i] = 0; continue; } - number_orc_column->data[i] = number_column.getElement(i); + if constexpr (std::is_same_v) + number_orc_column->data[i] = static_cast(number_column.getElement(i)); + else + number_orc_column->data[i] = number_column.getElement(i); } number_orc_column->numElements = number_column.size(); } @@ -390,6 +384,16 @@ void ORCBlockOutputFormat::consume(Chunk chunk) { size_t columns_num = chunk.getNumColumns(); size_t rows_num = chunk.getNumRows(); + if (!writer) + { + const Block & header = getPort(PortKind::Main).getHeader(); + schema = orc::createStructType(); + for (size_t i = 0; i != columns_num; ++i) + { + schema->addStructField(header.safeGetByPosition(i).name, getORCType(data_types[i])); + } + writer = orc::createWriter(*schema, &output_stream, options); + } ORC_UNIQUE_PTR batch = writer->createRowBatch(rows_num); orc::StructVectorBatch *root = dynamic_cast(batch.get()); for (size_t i = 0; i != columns_num; ++i) From 71b5d267ce1fcae6f47704bb91d8f95c63b688f2 Mon Sep 17 00:00:00 2001 From: Avogar Date: Thu, 18 Jun 2020 21:02:13 +0300 Subject: [PATCH 041/330] Set compression to None --- .../Formats/Impl/ORCBlockOutputFormat.cpp | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index f34ca21a1b3..d57a5b665ca 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -42,7 +42,17 @@ void ORCOutputStream::write(const void* buf, size_t length) } ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes()) {} + : IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes()) +{ + schema = orc::createStructType(); + options.setCompression(orc::CompressionKind::CompressionKind_NONE); + size_t columns_count = header_.columns(); + for (size_t i = 0; i != columns_count; ++i) + { + schema->addStructField(header_.safeGetByPosition(i).name, getORCType(data_types[i])); + } + writer = orc::createWriter(*schema, &output_stream, options); +} ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & type) { @@ -140,10 +150,7 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( number_orc_column->notNull[i] = 0; continue; } - if constexpr (std::is_same_v) - number_orc_column->data[i] = static_cast(number_column.getElement(i)); - else - number_orc_column->data[i] = number_column.getElement(i); + number_orc_column->data[i] = number_column.getElement(i); } number_orc_column->numElements = number_column.size(); } @@ -355,7 +362,7 @@ void ORCBlockOutputFormat::writeColumn( writeColumn(orc_column, nullable_column.getNestedColumn(), nested_type, &new_null_bytemap); break; } - /* Doesn't work + /* Doesn't work for unknown reason case TypeIndex::Array: { orc::ListVectorBatch * list_orc_column = dynamic_cast(orc_column); @@ -384,16 +391,6 @@ void ORCBlockOutputFormat::consume(Chunk chunk) { size_t columns_num = chunk.getNumColumns(); size_t rows_num = chunk.getNumRows(); - if (!writer) - { - const Block & header = getPort(PortKind::Main).getHeader(); - schema = orc::createStructType(); - for (size_t i = 0; i != columns_num; ++i) - { - schema->addStructField(header.safeGetByPosition(i).name, getORCType(data_types[i])); - } - writer = orc::createWriter(*schema, &output_stream, options); - } ORC_UNIQUE_PTR batch = writer->createRowBatch(rows_num); orc::StructVectorBatch *root = dynamic_cast(batch.get()); for (size_t i = 0; i != columns_num; ++i) From 07ad947c1ddb9081a32d9292a53ba9076cac2b96 Mon Sep 17 00:00:00 2001 From: Avogar Date: Fri, 19 Jun 2020 17:11:45 +0300 Subject: [PATCH 042/330] Add arrays support --- .../Formats/Impl/ORCBlockOutputFormat.cpp | 38 ++++++++++++++---- .../Formats/Impl/ORCBlockOutputFormat.h | 3 ++ .../01308_orc_output_format_arrays.reference | Bin 0 -> 567 bytes .../01308_orc_output_format_arrays.sh | 15 +++++++ 4 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/01308_orc_output_format_arrays.reference create mode 100755 tests/queries/0_stateless/01308_orc_output_format_arrays.sh diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index d57a5b665ca..77cdf1c352b 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -104,13 +104,11 @@ ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & t { return getORCType(removeNullable(type)); } - /* case TypeIndex::Array: { const auto * array_type = typeid_cast(type.get()); return orc::createListType(getORCType(array_type->getNestedType())); } - */ case TypeIndex::Decimal32: { const auto * decimal_type = typeid_cast *>(type.get()); @@ -150,7 +148,10 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( number_orc_column->notNull[i] = 0; continue; } - number_orc_column->data[i] = number_column.getElement(i); + if (std::is_same_v) + number_orc_column->data[i] = static_cast(number_column.getElement(i)); + else + number_orc_column->data[i] = number_column.getElement(i); } number_orc_column->numElements = number_column.size(); } @@ -362,7 +363,6 @@ void ORCBlockOutputFormat::writeColumn( writeColumn(orc_column, nullable_column.getNestedColumn(), nested_type, &new_null_bytemap); break; } - /* Doesn't work for unknown reason case TypeIndex::Array: { orc::ListVectorBatch * list_orc_column = dynamic_cast(orc_column); @@ -375,23 +375,45 @@ void ORCBlockOutputFormat::writeColumn( { list_orc_column->offsets[i + 1] = offsets[i]; } - const IColumn & nested_column = list_column.getData(); orc::ColumnVectorBatch * nested_orc_column = list_orc_column->elements.get(); - writeColumn(nested_orc_column, nested_column, nested_type, null_bytemap, nested_column.size()); + writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap); list_orc_column->numElements = list_column.size(); break; } - */ default: throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); } } +size_t ORCBlockOutputFormat::getColumnSize(const IColumn & column, DataTypePtr & type) +{ + if (type->getTypeId() == TypeIndex::Array) + { + auto nested_type = assert_cast(*type).getNestedType(); + const IColumn & nested_column = assert_cast(column).getData(); + return getColumnSize(nested_column, nested_type); + } + return column.size(); +} + +size_t ORCBlockOutputFormat::getMaxColumnSize(Chunk & chunk) +{ + size_t columns_num = chunk.getNumColumns(); + size_t max_column_size = 0; + for (size_t i = 0; i != columns_num; ++i) + { + max_column_size = std::max(max_column_size, getColumnSize(*chunk.getColumns()[i], data_types[i])); + } + return max_column_size; +} + void ORCBlockOutputFormat::consume(Chunk chunk) { size_t columns_num = chunk.getNumColumns(); size_t rows_num = chunk.getNumRows(); - ORC_UNIQUE_PTR batch = writer->createRowBatch(rows_num); + /// getMaxColumnSize is needed to write arrays. + /// The size of the batch must be no less than total amount of array elements. + ORC_UNIQUE_PTR batch = writer->createRowBatch(getMaxColumnSize(chunk)); orc::StructVectorBatch *root = dynamic_cast(batch.get()); for (size_t i = 0; i != columns_num; ++i) { diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 0cd50959ad7..0252afd3015 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -55,6 +55,9 @@ private: void writeColumn(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray * null_bytemap); + size_t getColumnSize(const IColumn & column, DataTypePtr & ptr); + size_t getMaxColumnSize(Chunk & chunk); + const FormatSettings format_settings; ORCOutputStream output_stream; DataTypes data_types; diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.reference b/tests/queries/0_stateless/01308_orc_output_format_arrays.reference new file mode 100644 index 0000000000000000000000000000000000000000..1f9646ac112132378f512bb4e3a610f6019698e1 GIT binary patch literal 567 zcmaKou}Z{15Qb-Fv+J_CUfj1(^yJ73Txpxm?(hk$eF1A9K)8pp z5ER#OZ*wFHHuLYd|I9G_i{;#AZq?1nOH zi@h-PB)k2_Z@x78PeSsIycPNP4-!J}r2;h0Sd#;1L6P@^5+4Osk&~#JS^h!#DJTY| z99adXz?c*3z$*GG`bp%3P4IQ>*K8yV+53Mj)#Y{L&fPY*Z5^i6IX{^Uqp&J}r7{({ zSCX4ST<`_rgrW2m5pNorkz6v0~6|4#a#fA`ch4R^#kxNzslh121M bv&Wk&w9hE%uGh(gAgwNH^YEtBVmaq;55+tO literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.sh b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh new file mode 100755 index 00000000000..8e36cf604ea --- /dev/null +++ b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (array1 Array(Int32), array2 Array(Array(Int32))) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES ([1,2,3,4,5], [[1,2], [3,4], [5]]), ([42], [[42, 42], [42]])"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE orc"; + From f6ab431f2f036514aa5c53680f27e96a4e04f1e1 Mon Sep 17 00:00:00 2001 From: Avogar Date: Fri, 19 Jun 2020 17:50:44 +0300 Subject: [PATCH 043/330] Change String type to Binary --- src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 77cdf1c352b..7400c30306b 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -98,7 +98,7 @@ ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & t case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: { - return orc::createPrimitiveType(orc::TypeKind::STRING); + return orc::createPrimitiveType(orc::TypeKind::BINARY); } case TypeIndex::Nullable: { From 2a923f434f0b19944bbd1c8206084d74ed629ef9 Mon Sep 17 00:00:00 2001 From: Avogar Date: Fri, 19 Jun 2020 22:38:51 +0300 Subject: [PATCH 044/330] Fix build errors --- src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp | 2 +- src/Processors/Formats/Impl/ORCBlockOutputFormat.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 7400c30306b..4ba6d3f5b7c 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -148,7 +148,7 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( number_orc_column->notNull[i] = 0; continue; } - if (std::is_same_v) + if constexpr (std::is_same::value) number_orc_column->data[i] = static_cast(number_column.getElement(i)); else number_orc_column->data[i] = number_column.getElement(i); diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 0252afd3015..9035cbef41c 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -55,7 +55,7 @@ private: void writeColumn(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray * null_bytemap); - size_t getColumnSize(const IColumn & column, DataTypePtr & ptr); + size_t getColumnSize(const IColumn & column, DataTypePtr & type); size_t getMaxColumnSize(Chunk & chunk); const FormatSettings format_settings; From e9eb2514d6328e03fac9881265e260e6e8cb3784 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 23 Jun 2020 09:58:31 +0300 Subject: [PATCH 045/330] Update polymorphic_parts_l.xml --- tests/performance/polymorphic_parts_l.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml index acda0de281a..ea20d08409a 100644 --- a/tests/performance/polymorphic_parts_l.xml +++ b/tests/performance/polymorphic_parts_l.xml @@ -25,9 +25,9 @@ - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(1000000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact From 61cf7d02d5cf9b0f728c411d704ef043aca78752 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 23 Jun 2020 09:59:29 +0300 Subject: [PATCH 046/330] Update polymorphic_parts_m.xml --- tests/performance/polymorphic_parts_m.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml index a9842496de0..2cf94d33ae8 100644 --- a/tests/performance/polymorphic_parts_m.xml +++ b/tests/performance/polymorphic_parts_m.xml @@ -25,9 +25,9 @@ - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact From 385d7d6674661721f4c05fa2ec4e392a5342d00a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 23 Jun 2020 10:00:05 +0300 Subject: [PATCH 047/330] Update polymorphic_parts_s.xml --- tests/performance/polymorphic_parts_s.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml index 3b9eea91b1d..fbb6903c3c0 100644 --- a/tests/performance/polymorphic_parts_s.xml +++ b/tests/performance/polymorphic_parts_s.xml @@ -25,9 +25,9 @@ - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(1000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact From ec3d1017680eb60de33da222e11447076a983eb5 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 23 Jun 2020 13:50:56 +0300 Subject: [PATCH 048/330] style fix + bump tests --- src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 4ba6d3f5b7c..b0125cd661f 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -48,9 +48,7 @@ ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & hea options.setCompression(orc::CompressionKind::CompressionKind_NONE); size_t columns_count = header_.columns(); for (size_t i = 0; i != columns_count; ++i) - { schema->addStructField(header_.safeGetByPosition(i).name, getORCType(data_types[i])); - } writer = orc::createWriter(*schema, &output_stream, options); } From a3b240ba7adbe44fb57046e62bbb707b2c583fd0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 24 Jun 2020 20:12:06 +0300 Subject: [PATCH 049/330] test empty commit From cb30dbfe285e571e81f266a0ec78f7436301499b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 24 Jun 2020 20:32:57 +0300 Subject: [PATCH 050/330] Correct merge with master --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 13 +++++++++---- src/Storages/RabbitMQ/RabbitMQBlockInputStream.h | 2 ++ .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 9 +++++++-- .../RabbitMQ/RabbitMQBlockOutputStream.h | 3 ++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 16 ++++++++++------ src/Storages/RabbitMQ/StorageRabbitMQ.h | 2 ++ 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 6257a60d678..c66009a5eca 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -14,13 +14,18 @@ namespace DB { RabbitMQBlockInputStream::RabbitMQBlockInputStream( - StorageRabbitMQ & storage_, const Context & context_, const Names & columns, Poco::Logger * log_) + StorageRabbitMQ & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const Context & context_, + const Names & columns, + Poco::Logger * log_) : storage(storage_) + , metadata_snapshot(metadata_snapshot_) , context(context_) , column_names(columns) , log(log_) - , non_virtual_header(storage.getSampleBlockNonMaterialized()) - , virtual_header(storage.getSampleBlockForColumns({"_exchange"})) + , non_virtual_header(metadata_snapshot->getSampleBlockNonMaterialized()) + , virtual_header(metadata_snapshot->getSampleBlockForColumns({"_exchange"}, storage.getVirtuals(), storage.getStorageID())) { } @@ -36,7 +41,7 @@ RabbitMQBlockInputStream::~RabbitMQBlockInputStream() Block RabbitMQBlockInputStream::getHeader() const { - return storage.getSampleBlockForColumns(column_names); + return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID()); } diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h index fbdb40bded8..d171893d3b3 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h @@ -14,6 +14,7 @@ class RabbitMQBlockInputStream : public IBlockInputStream public: RabbitMQBlockInputStream( StorageRabbitMQ & storage_, + const StorageMetadataPtr & metadata_snapshot_, const Context & context_, const Names & columns, Poco::Logger * log_); @@ -29,6 +30,7 @@ public: private: StorageRabbitMQ & storage; + StorageMetadataPtr metadata_snapshot; Context context; Names column_names; Poco::Logger * log; diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 5dc2c1f8fc4..ddcde7cf24f 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -15,14 +15,19 @@ namespace ErrorCodes RabbitMQBlockOutputStream::RabbitMQBlockOutputStream( - StorageRabbitMQ & storage_, const Context & context_) : storage(storage_), context(context_) + StorageRabbitMQ & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const Context & context_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , context(context_) { } Block RabbitMQBlockOutputStream::getHeader() const { - return storage.getSampleBlockNonMaterialized(); + return metadata_snapshot->getSampleBlockNonMaterialized(); } diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h index 2f7b89a2a30..f8ed79438f4 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h @@ -12,7 +12,7 @@ class RabbitMQBlockOutputStream : public IBlockOutputStream { public: - explicit RabbitMQBlockOutputStream(StorageRabbitMQ & storage_, const Context & context_); + explicit RabbitMQBlockOutputStream(StorageRabbitMQ & storage_, const StorageMetadataPtr & metadata_snapshot_, const Context & context_); Block getHeader() const override; @@ -22,6 +22,7 @@ public: private: StorageRabbitMQ & storage; + StorageMetadataPtr metadata_snapshot; Context context; ProducerBufferPtr buffer; BlockOutputStreamPtr child; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 3de8d193302..60a641064a8 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -98,7 +98,9 @@ StorageRabbitMQ::StorageRabbitMQ( } rabbitmq_context.makeQueryContext(); - setColumns(columns_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); @@ -115,6 +117,7 @@ StorageRabbitMQ::StorageRabbitMQ( Pipes StorageRabbitMQ::read( const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, @@ -129,8 +132,9 @@ Pipes StorageRabbitMQ::read( for (size_t i = 0; i < num_created_consumers; ++i) { - pipes.emplace_back(std::make_shared(std::make_shared( - *this, context, column_names, log))); + pipes.emplace_back( + std::make_shared(std::make_shared( + *this, metadata_snapshot, context, column_names, log))); } LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); @@ -139,9 +143,9 @@ Pipes StorageRabbitMQ::read( } -BlockOutputStreamPtr StorageRabbitMQ::write(const ASTPtr &, const Context & context) +BlockOutputStreamPtr StorageRabbitMQ::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context & context) { - return std::make_shared(*this, context); + return std::make_shared(*this, metadata_snapshot, context); } @@ -316,7 +320,7 @@ bool StorageRabbitMQ::streamToViews() for (size_t i = 0; i < num_created_consumers; ++i) { - auto stream = std::make_shared(*this, rabbitmq_context, block_io.out->getHeader().getNames(), log); + auto stream = std::make_shared(*this, getInMemoryMetadataPtr(), rabbitmq_context, block_io.out->getHeader().getNames(), log); streams.emplace_back(stream); // Limit read batch to maximum block size to allow DDL diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 79e4d5e4ca2..567951dee6b 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -32,6 +32,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -40,6 +41,7 @@ public: BlockOutputStreamPtr write( const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, const Context & context) override; void pushReadBuffer(ConsumerBufferPtr buf); From addee61bcb6179bf5807f3437980589b604d298a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 25 Jun 2020 00:14:49 +0300 Subject: [PATCH 051/330] Trying to do everything on top of libuv, add heartbeats --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 14 ++--- src/Storages/RabbitMQ/RabbitMQHandler.h | 8 +-- .../ReadBufferFromRabbitMQConsumer.cpp | 3 - src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 61 +++++++++++++------ src/Storages/RabbitMQ/StorageRabbitMQ.h | 16 +++-- .../WriteBufferToRabbitMQProducer.cpp | 26 +++++--- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 6 +- .../integration/test_storage_rabbitmq/test.py | 5 +- 9 files changed, 86 insertions(+), 55 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index c66009a5eca..a1442f91fbe 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -66,7 +66,7 @@ Block RabbitMQBlockInputStream::readImpl() finished = true; - MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); + MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); auto input_format = FormatFactory::instance().getInputFormat( diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 71c23bb9bc4..464cfdbd5b2 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -11,9 +11,9 @@ enum }; -RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : - LibEventHandler(evbase_), - evbase(evbase_), +RabbitMQHandler::RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_) : + AMQP::LibUvHandler(loop_), + loop(loop_), log(log_) { tv.tv_sec = 0; @@ -44,7 +44,7 @@ void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) loop_started.store(true); stop_scheduled = false; - event_base_loop(evbase, EVLOOP_NONBLOCK); + uv_run(loop, UV_RUN_NOWAIT); mutex_before_event_loop.unlock(); } } @@ -52,7 +52,7 @@ void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) void RabbitMQHandler::startProducerLoop() { - event_base_loop(evbase, EVLOOP_NONBLOCK); + uv_run(loop, UV_RUN_NOWAIT); } @@ -60,7 +60,7 @@ void RabbitMQHandler::stop() { if (mutex_before_loop_stop.try_lock()) { - event_base_loopbreak(evbase); + uv_stop(loop); mutex_before_loop_stop.unlock(); } } @@ -69,7 +69,7 @@ void RabbitMQHandler::stop() void RabbitMQHandler::stopWithTimeout() { stop_scheduled = true; - event_base_loopexit(evbase, &tv); + uv_stop(loop); } } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 9b2d273422d..7cb0c44261f 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -7,16 +7,16 @@ #include #include #include -#include +#include namespace DB { -class RabbitMQHandler : public AMQP::LibEventHandler +class RabbitMQHandler : public AMQP::LibUvHandler { public: - RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); + RabbitMQHandler(uv_loop_t * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; void startConsumerLoop(std::atomic & loop_started); @@ -26,7 +26,7 @@ public: std::atomic & checkStopIsScheduled() { return stop_scheduled; }; private: - event_base * evbase; + uv_loop_t * loop; Poco::Logger * log; timeval tv; diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index ef4398753c2..3c591b5352e 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -446,10 +446,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() } if (received.empty()) - { - LOG_TRACE(log, "No more messages to be fetched"); return false; - } messages.clear(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 60a641064a8..34b3c460315 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -43,6 +43,8 @@ enum Connection_setup_retries_max = 1000 }; +static const auto RESCHEDULE_MS = 500; + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -80,19 +82,21 @@ StorageRabbitMQ::StorageRabbitMQ( rabbitmq_context.getConfigRef().getString("rabbitmq_username", "root"), rabbitmq_context.getConfigRef().getString("rabbitmq_password", "clickhouse"))) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) - , evbase(event_base_new()) - , eventHandler(evbase, log) - , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, - AMQP::Login(login_password.first, login_password.second), "/")) { + loop = new uv_loop_t; + uv_loop_init(loop); + + event_handler = std::make_unique(loop, log); + connection = std::make_unique(event_handler.get(), AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); + size_t cnt_retries = 0; - while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) + while (!connection->ready() && ++cnt_retries != Connection_setup_retries_max) { - event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + uv_run(loop, UV_RUN_NOWAIT); std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); } - if (!connection.ready()) + if (!connection->ready()) { LOG_ERROR(log, "Cannot set up connection for consumer"); } @@ -102,8 +106,10 @@ StorageRabbitMQ::StorageRabbitMQ( storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); - task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); - task->deactivate(); + streaming_task = global_context.getSchedulePool().createTask("RabbitMQStreamingTask", [this]{ threadFunc(); }); + streaming_task->deactivate(); + heartbeat_task = global_context.getSchedulePool().createTask("RabbitMQHeartbeatTask", [this]{ heartbeatFunc(); }); + heartbeat_task->deactivate(); bind_by_id = num_consumers > 1 || num_queues > 1; @@ -115,6 +121,17 @@ StorageRabbitMQ::StorageRabbitMQ( } +void StorageRabbitMQ::heartbeatFunc() +{ + if (!stream_cancelled) + { + LOG_DEBUG(log, "Sending RabbitMQ heartbeat"); + connection->heartbeat(); + heartbeat_task->scheduleAfter(RESCHEDULE_MS * 10); + } +} + + Pipes StorageRabbitMQ::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -165,7 +182,8 @@ void StorageRabbitMQ::startup() } } - task->activateAndSchedule(); + streaming_task->activateAndSchedule(); + heartbeat_task->activateAndSchedule(); } @@ -178,8 +196,10 @@ void StorageRabbitMQ::shutdown() popReadBuffer(); } - connection.close(); - task->deactivate(); + streaming_task->deactivate(); + heartbeat_task->deactivate(); + + connection->close(); } @@ -223,18 +243,21 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() next_channel_id += num_queues; update_channel_id = true; - ChannelPtr consumer_channel = std::make_shared(&connection); + ChannelPtr consumer_channel = std::make_shared(connection.get()); - return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_keys, - next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, local_exchange_name, stream_cancelled); + return std::make_shared( + consumer_channel, *event_handler, exchange_name, routing_keys, + next_channel_id, log, row_delimiter, bind_by_id, num_queues, + exchange_type, local_exchange_name, stream_cancelled); } ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - return std::make_shared(parsed_address, login_password, routing_keys[0], local_exchange_name, - log, num_consumers * num_queues, bind_by_id, use_transactional_channel, - row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); + return std::make_shared( + parsed_address, login_password, routing_keys[0], local_exchange_name, + log, num_consumers * num_queues, bind_by_id, use_transactional_channel, + row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } @@ -296,7 +319,7 @@ void StorageRabbitMQ::threadFunc() /// Wait for attached views if (!stream_cancelled) - task->activateAndSchedule(); + streaming_task->schedule(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 567951dee6b..c3bb346376a 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -9,7 +9,8 @@ #include #include #include -#include +#include +#include namespace DB @@ -53,7 +54,6 @@ public: const String & getFormatName() const { return format_name; } NamesAndTypesList getVirtuals() const override; - const void pingConnection() { connection.heartbeat(); } protected: StorageRabbitMQ( @@ -91,9 +91,9 @@ private: std::pair parsed_address; std::pair login_password; - event_base * evbase; - RabbitMQHandler eventHandler; - AMQP::TcpConnection connection; /// Connection for all consumers + uv_loop_t * loop; + std::unique_ptr event_handler; + std::unique_ptr connection; /// Connection for all consumers Poco::Semaphore semaphore; std::mutex mutex; @@ -102,12 +102,16 @@ private: size_t next_channel_id = 1; /// Must >= 1 because it is used as a binding key, which has to be > 0 bool update_channel_id = false; - BackgroundSchedulePool::TaskHolder task; + BackgroundSchedulePool::TaskHolder streaming_task; + BackgroundSchedulePool::TaskHolder heartbeat_task; std::atomic stream_cancelled{false}; ConsumerBufferPtr createReadBuffer(); void threadFunc(); + void heartbeatFunc(); + + void pingConnection() { connection->heartbeat(); } bool streamToViews(); bool checkDependencies(const StorageID & table_id); }; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 6d74e2c8298..7c750f720a3 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -4,6 +4,7 @@ #include "Columns/ColumnsNumber.h" #include #include +#include #include #include #include @@ -43,28 +44,31 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , delim(delimiter) , max_rows(rows_per_message) , chunk_size(chunk_size_) - , producerEvbase(event_base_new()) - , eventHandler(producerEvbase, log) - , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, - AMQP::Login(login_password.first, login_password.second), "/")) { + + loop = new uv_loop_t; + uv_loop_init(loop); + + event_handler = std::make_unique(loop, log); + connection = std::make_unique(event_handler.get(), AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); + /* The reason behind making a separate connection for each concurrent producer is explained here: * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - publishing from * different threads (as outputStreams are asynchronous) with the same connection leads to internal library errors. */ size_t cnt_retries = 0; - while (!connection.ready() && ++cnt_retries != Loop_retries_max) + while (!connection->ready() && ++cnt_retries != Loop_retries_max) { - event_base_loop(producerEvbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + uv_run(loop, UV_RUN_NOWAIT); std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); } - if (!connection.ready()) + if (!connection->ready()) { LOG_ERROR(log, "Cannot set up connection for producer!"); } - producer_channel = std::make_shared(&connection); + producer_channel = std::make_shared(connection.get()); checkExchange(); /// If publishing should be wrapped in transactions @@ -78,7 +82,9 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() { finilizeProducer(); - connection.close(); + connection->close(); + event_handler->stop(); + assert(rows == 0 && chunks.empty()); } @@ -195,7 +201,7 @@ void WriteBufferToRabbitMQProducer::nextImpl() void WriteBufferToRabbitMQProducer::startEventLoop() { - eventHandler.startProducerLoop(); + event_handler->startProducerLoop(); } } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 7d2bb6e598f..12877c5591c 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -49,9 +49,9 @@ private: const size_t num_queues; const bool use_transactional_channel; - event_base * producerEvbase; - RabbitMQHandler eventHandler; - AMQP::TcpConnection connection; + uv_loop_t * loop; + std::unique_ptr event_handler; + std::unique_ptr connection; ProducerPtr producer_channel; size_t next_queue = 0; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 3c4c0b3215b..8f6f77459e4 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -551,6 +551,7 @@ def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view') time.sleep(1) + print("Result", result, "Expected", messages_num * threads_num) if int(result) == messages_num * threads_num: break @@ -641,7 +642,8 @@ def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster) DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = MergeTree - ORDER BY key; + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.rabbitmq; ''') @@ -1522,4 +1524,3 @@ if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") cluster.shutdown() - From d5847d29d620aa692838289a71eb7a19342d07dd Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 25 Jun 2020 12:44:39 +0300 Subject: [PATCH 052/330] Avoid memory leaks --- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 8 ++++---- src/Storages/RabbitMQ/StorageRabbitMQ.h | 2 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 8 ++++---- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 2 +- tests/integration/test_storage_rabbitmq/test.py | 13 +++++++++---- 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 34b3c460315..4c1104dc818 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -83,16 +83,16 @@ StorageRabbitMQ::StorageRabbitMQ( rabbitmq_context.getConfigRef().getString("rabbitmq_password", "clickhouse"))) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) { - loop = new uv_loop_t; - uv_loop_init(loop); + loop = std::make_unique(); + uv_loop_init(loop.get()); - event_handler = std::make_unique(loop, log); + event_handler = std::make_unique(loop.get(), log); connection = std::make_unique(event_handler.get(), AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); size_t cnt_retries = 0; while (!connection->ready() && ++cnt_retries != Connection_setup_retries_max) { - uv_run(loop, UV_RUN_NOWAIT); + uv_run(loop.get(), UV_RUN_NOWAIT); std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index c3bb346376a..5ea9bde9a4b 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -91,7 +91,7 @@ private: std::pair parsed_address; std::pair login_password; - uv_loop_t * loop; + std::unique_ptr loop; std::unique_ptr event_handler; std::unique_ptr connection; /// Connection for all consumers diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 7c750f720a3..bba1a6b693f 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -46,10 +46,10 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , chunk_size(chunk_size_) { - loop = new uv_loop_t; - uv_loop_init(loop); + loop = std::make_unique(); + uv_loop_init(loop.get()); - event_handler = std::make_unique(loop, log); + event_handler = std::make_unique(loop.get(), log); connection = std::make_unique(event_handler.get(), AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); /* The reason behind making a separate connection for each concurrent producer is explained here: @@ -59,7 +59,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( size_t cnt_retries = 0; while (!connection->ready() && ++cnt_retries != Loop_retries_max) { - uv_run(loop, UV_RUN_NOWAIT); + uv_run(loop.get(), UV_RUN_NOWAIT); std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 12877c5591c..5793a044f9e 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -49,7 +49,7 @@ private: const size_t num_queues; const bool use_transactional_channel; - uv_loop_t * loop; + std::unique_ptr loop; std::unique_ptr event_handler; std::unique_ptr connection; ProducerPtr producer_channel; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 8f6f77459e4..e90cf86486b 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -837,7 +837,8 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view_many (key UInt64, value UInt64) ENGINE = MergeTree - ORDER BY key; + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS SELECT * FROM test.rabbitmq_many; ''') @@ -899,7 +900,8 @@ def test_rabbitmq_sharding_between_channels_and_queues_insert(rabbitmq_cluster): rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view_sharding (key UInt64, value UInt64) ENGINE = MergeTree - ORDER BY key; + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; CREATE MATERIALIZED VIEW test.consumer_sharding TO test.view_sharding AS SELECT * FROM test.rabbitmq_sharding; ''') @@ -960,7 +962,8 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster): rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view_overload (key UInt64, value UInt64) ENGINE = MergeTree - ORDER BY key; + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS SELECT * FROM test.rabbitmq_overload; ''') @@ -993,6 +996,7 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view_overload') time.sleep(1) + print("Result", int(result), "Expected", messages_num * threads_num) if int(result) == messages_num * threads_num: break @@ -1015,7 +1019,8 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): CREATE TABLE test.destination(key UInt64, value UInt64, _consumed_by LowCardinality(String)) ENGINE = MergeTree() - ORDER BY key; + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; ''') num_tables = 5 From 36eb2c3028f5bb1911932cc901a0dd52c5be0d80 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 25 Jun 2020 13:25:22 +0300 Subject: [PATCH 053/330] Simplify code around locks --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 18 ++++++------------ src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- .../ReadBufferFromRabbitMQConsumer.cpp | 10 +++++----- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 4 ++-- .../integration/test_storage_rabbitmq/test.py | 1 + 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 464cfdbd5b2..cf2614d4de0 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -39,14 +39,11 @@ void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop and handler). But the loop should not be attempted to start if it is already running. */ - if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) - { - loop_started.store(true); - stop_scheduled = false; + std::lock_guard lock(mutex_before_event_loop); + loop_started.store(true); + stop_scheduled = false; - uv_run(loop, UV_RUN_NOWAIT); - mutex_before_event_loop.unlock(); - } + uv_run(loop, UV_RUN_NOWAIT); } @@ -58,11 +55,8 @@ void RabbitMQHandler::startProducerLoop() void RabbitMQHandler::stop() { - if (mutex_before_loop_stop.try_lock()) - { - uv_stop(loop); - mutex_before_loop_stop.unlock(); - } + std::lock_guard lock(mutex_before_loop_stop); + uv_stop(loop); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 7cb0c44261f..ed3625d4464 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -31,7 +31,7 @@ private: timeval tv; std::atomic stop_scheduled = false; - std::timed_mutex mutex_before_event_loop; + std::mutex mutex_before_event_loop; std::mutex mutex_before_loop_stop; }; diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 3c591b5352e..da4bfc24a2d 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -46,7 +46,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) - , eventHandler(eventHandler_) + , event_handler(eventHandler_) , exchange_name(exchange_name_) , routing_keys(routing_keys_) , channel_id(channel_id_) @@ -372,7 +372,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) * executing all callbacks on the connection (not only its own), then there should be some point to unblock. * loop_started == 1 if current consumer is started the loop and not another. */ - if (!loop_started.load() && !eventHandler.checkStopIsScheduled()) + if (!loop_started.load() && !event_handler.checkStopIsScheduled()) { stopEventLoopWithTimeout(); } @@ -415,19 +415,19 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() void ReadBufferFromRabbitMQConsumer::stopEventLoop() { - eventHandler.stop(); + event_handler.stop(); } void ReadBufferFromRabbitMQConsumer::stopEventLoopWithTimeout() { - eventHandler.stopWithTimeout(); + event_handler.stopWithTimeout(); } void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & loop_started) { - eventHandler.startConsumerLoop(loop_started); + event_handler.startConsumerLoop(loop_started); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index d4bf35c00b8..4ff2b0ff64c 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -23,7 +23,7 @@ class ReadBufferFromRabbitMQConsumer : public ReadBuffer public: ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, - RabbitMQHandler & eventHandler_, + RabbitMQHandler & event_handler_, const String & exchange_name_, const Names & routing_keys_, const size_t channel_id_, @@ -46,7 +46,7 @@ private: using Messages = std::vector; ChannelPtr consumer_channel; - RabbitMQHandler & eventHandler; + RabbitMQHandler & event_handler; const String & exchange_name; const Names & routing_keys; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index e90cf86486b..42b7101f9c6 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -485,6 +485,7 @@ def test_rabbitmq_big_message(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view') + print("Result", result, "Expected", batch_messages * rabbitmq_messages) if int(result) == batch_messages * rabbitmq_messages: break From 3c22479961cef377f8a51397d68095ccdc72251a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 25 Jun 2020 18:36:49 +0300 Subject: [PATCH 054/330] Experiments --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 12 ++++++++---- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 10 ++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index cf2614d4de0..4cd7d914125 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -39,11 +39,15 @@ void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop and handler). But the loop should not be attempted to start if it is already running. */ - std::lock_guard lock(mutex_before_event_loop); - loop_started.store(true); - stop_scheduled = false; + bool expected = false; + if (loop_started.compare_exchange_strong(expected, true)) + { + std::lock_guard lock(mutex_before_event_loop); + stop_scheduled = false; - uv_run(loop, UV_RUN_NOWAIT); + uv_run(loop, UV_RUN_NOWAIT); + loop_started.store(false); + } } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index da4bfc24a2d..f5d0651401c 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -372,7 +372,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) * executing all callbacks on the connection (not only its own), then there should be some point to unblock. * loop_started == 1 if current consumer is started the loop and not another. */ - if (!loop_started.load() && !event_handler.checkStopIsScheduled()) + if (!event_handler.checkStopIsScheduled()) { stopEventLoopWithTimeout(); } @@ -442,17 +442,15 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread. startEventLoop(loop_started); - loop_started.store(false); } + /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback. + std::lock_guard lock(mutex); + if (received.empty()) return false; messages.clear(); - - /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback. - std::lock_guard lock(mutex); - messages.swap(received); current = messages.begin(); } From 81d459a5a8a395db0894f545a4f2ce71621dcb94 Mon Sep 17 00:00:00 2001 From: Avogar Date: Fri, 26 Jun 2020 23:17:28 +0300 Subject: [PATCH 055/330] Small update ORCBlockOutputFormat and add ORC output format to performance test. --- src/CMakeLists.txt | 12 +++-- .../Formats/Impl/ORCBlockOutputFormat.cpp | 53 ++++++++++++------- .../Formats/Impl/ORCBlockOutputFormat.h | 17 ++++-- tests/performance/select_format.xml | 1 + 4 files changed, 55 insertions(+), 28 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ebb0cdc2e90..f659a17e2bd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -364,11 +364,13 @@ target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_C target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) -target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR}) -configure_file ( - "${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" - "${ORC_INCLUDE_DIR}/orc/orc-config.hh" -) +if (USE_ORC) + target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR}) + configure_file ( + "${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" + "${ORC_INCLUDE_DIR}/orc/orc-config.hh" + ) +endif () if (ENABLE_TESTS AND USE_GTEST) macro (grep_gtest_sources BASE_DIR DST_VAR) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index b0125cd661f..90c23d87288 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -1,5 +1,7 @@ #include +#if USE_ORC + #include #include @@ -129,11 +131,12 @@ ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & t } } -template -void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( +template +void ORCBlockOutputFormat::writeNumbers( orc::ColumnVectorBatch * orc_column, const IColumn & column, - const PaddedPODArray * null_bytemap) + const PaddedPODArray * null_bytemap, + ConvertFunc convert) { NumberVectorBatch * number_orc_column = dynamic_cast(orc_column); const auto & number_column = assert_cast &>(column); @@ -146,16 +149,13 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( number_orc_column->notNull[i] = 0; continue; } - if constexpr (std::is_same::value) - number_orc_column->data[i] = static_cast(number_column.getElement(i)); - else - number_orc_column->data[i] = number_column.getElement(i); + number_orc_column->data[i] = convert(number_column.getElement(i)); } number_orc_column->numElements = number_column.size(); } template -void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDecimals( +void ORCBlockOutputFormat::writeDecimals( orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, @@ -181,7 +181,7 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDecimals( } template -void ORCBlockOutputFormat::ORCBlockOutputFormat::writeStrings( +void ORCBlockOutputFormat::writeStrings( orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap) @@ -205,7 +205,7 @@ void ORCBlockOutputFormat::ORCBlockOutputFormat::writeStrings( } template -void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDateTimes( +void ORCBlockOutputFormat::writeDateTimes( orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap, @@ -244,53 +244,53 @@ void ORCBlockOutputFormat::writeColumn( { case TypeIndex::Int8: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const Int8 & value){ return value ;}); break; } case TypeIndex::UInt8: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const UInt8 & value){ return uint8_t(value) ;}); break; } case TypeIndex::Int16: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const Int16 & value){ return value ;}); break; } case TypeIndex::Date: [[fallthrough]]; case TypeIndex::UInt16: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const UInt16 & value){ return value ;}); break; } case TypeIndex::Int32: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const Int32 & value){ return value ;}); break; } case TypeIndex::UInt32: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const UInt32 & value){ return value ;}); break; } case TypeIndex::Int64: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const Int64 & value){ return value ;}); break; } case TypeIndex::UInt64: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const UInt64 & value){ return value ;}); break; } case TypeIndex::Float32: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const Float32 & value){ return value ;}); break; } case TypeIndex::Float64: { - writeNumbers(orc_column, column, null_bytemap); + writeNumbers(orc_column, column, null_bytemap, [](const Float64 & value){ return value ;}); break; } case TypeIndex::FixedString: @@ -368,6 +368,7 @@ void ORCBlockOutputFormat::writeColumn( auto nested_type = assert_cast(*type).getNestedType(); const ColumnArray::Offsets & offsets = list_column.getOffsets(); list_orc_column->resize(list_column.size()); + /// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i]. list_orc_column->offsets[0] = 0; for (size_t i = 0; i != list_column.size(); ++i) { @@ -439,3 +440,15 @@ void registerOutputFormatProcessorORC(FormatFactory & factory) } } + +#else + +namespace DB +{ + class FormatFactory; + void registerOutputFormatProcessorORC(FormatFactory &) + { + } +} + +#endif diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 9035cbef41c..8d2f5d76d53 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -1,5 +1,7 @@ #pragma once +#include "config_formats.h" +#if USE_ORC #include #include #include @@ -10,6 +12,7 @@ namespace DB class WriteBuffer; +/// orc::Writer writes only in orc::OutputStream class ORCOutputStream : public orc::OutputStream { public: @@ -39,22 +42,29 @@ public: private: ORC_UNIQUE_PTR getORCType(const DataTypePtr & type); + /// ConvertFunc is needed for type UInt8, because firstly UInt8 (char8_t) must be + /// converted to unsigned char (bugprone-signed-char-misuse in clang). + template + void writeNumbers(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap, ConvertFunc convert); + + /// ConvertFunc is needed to convert ClickHouse Int128 to ORC Int128. template void writeDecimals(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray * null_bytemap, ConvertFunc convert); - template - void writeNumbers(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap); - template void writeStrings(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap); + /// ORC column TimestampVectorBatch stores only seconds and nanoseconds, + /// GetSecondsFunc and GetNanosecondsFunc are needed to extract them from DataTime type. template void writeDateTimes(orc::ColumnVectorBatch * orc_column, const IColumn & column, const PaddedPODArray * null_bytemap, GetSecondsFunc get_seconds, GetNanosecondsFunc get_nanoseconds); void writeColumn(orc::ColumnVectorBatch * orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray * null_bytemap); + /// These two functions are needed to know maximum nested size of arrays to + /// create an ORC Batch with the appropriate size size_t getColumnSize(const IColumn & column, DataTypePtr & type); size_t getMaxColumnSize(Chunk & chunk); @@ -67,3 +77,4 @@ private: }; } +#endif diff --git a/tests/performance/select_format.xml b/tests/performance/select_format.xml index 0cd0e3fe732..bbe489c06c6 100644 --- a/tests/performance/select_format.xml +++ b/tests/performance/select_format.xml @@ -35,6 +35,7 @@ ODBCDriver2 Avro MsgPack + ORC From e58e1c547167bff5f73c26b79efa8b41b6bcc71d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Sat, 27 Jun 2020 00:02:13 +0300 Subject: [PATCH 056/330] Try to enable long perf tests --- tests/performance/date_time.xml | 7 ------- tests/performance/float_formatting.xml | 10 ++-------- tests/performance/float_parsing.xml | 6 ------ tests/performance/set.xml | 6 ------ 4 files changed, 2 insertions(+), 27 deletions(-) diff --git a/tests/performance/date_time.xml b/tests/performance/date_time.xml index 43cd1f353e2..8f0ec23cdad 100644 --- a/tests/performance/date_time.xml +++ b/tests/performance/date_time.xml @@ -1,11 +1,4 @@ - - - - long - - - datetime_transform diff --git a/tests/performance/float_formatting.xml b/tests/performance/float_formatting.xml index f65efbe83cf..18289fe2862 100644 --- a/tests/performance/float_formatting.xml +++ b/tests/performance/float_formatting.xml @@ -1,10 +1,4 @@ - - long - - - - expr @@ -49,6 +43,6 @@ - SELECT count() FROM numbers(1000000) WHERE NOT ignore(toString({expr})) - SELECT count() FROM zeros(1000000) WHERE NOT ignore(toString({expr_zero})) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(toString({expr})) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(toString({expr_zero})) diff --git a/tests/performance/float_parsing.xml b/tests/performance/float_parsing.xml index 2acbb636fed..7d159fbfced 100644 --- a/tests/performance/float_parsing.xml +++ b/tests/performance/float_parsing.xml @@ -1,10 +1,4 @@ - - long - - - - expr diff --git a/tests/performance/set.xml b/tests/performance/set.xml index 576a26390d1..f6eeffec6e7 100644 --- a/tests/performance/set.xml +++ b/tests/performance/set.xml @@ -1,10 +1,4 @@ - - long - - - - table From be57bc56d78940a2f71c989990c6c09281ae51da Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Sat, 27 Jun 2020 03:45:00 +0300 Subject: [PATCH 057/330] perf test: report queries with new functions separately --- docker/test/performance-comparison/compare.sh | 38 +++++++--- .../test/performance-comparison/download.sh | 26 ++++++- .../test/performance-comparison/entrypoint.sh | 16 +++- docker/test/performance-comparison/perf.py | 74 +++++++++++++------ docker/test/performance-comparison/report.py | 31 ++++++++ 5 files changed, 147 insertions(+), 38 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index f39b8b593ab..8fb9bc06003 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -274,10 +274,11 @@ for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print) do test_name=$(basename "$test_file" "-raw.tsv") sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv" - sed -n "s/^client-time/$test_name/p" < "$test_file" >> "analyze/client-times.tsv" - sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "analyze/report-thresholds.tsv" - sed -n "s/^skipped/$test_name/p" < "$test_file" >> "analyze/skipped-tests.tsv" - sed -n "s/^display-name/$test_name/p" < "$test_file" >> "analyze/query-display-names.tsv" + sed -n "s/^client-time\t/$test_name\t/p" < "$test_file" >> "analyze/client-times.tsv" + sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv" + sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv" + sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv" + sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv" done unset IFS @@ -286,6 +287,18 @@ clickhouse-local --query " create view query_runs as select * from file('analyze/query-runs.tsv', TSV, 'test text, query_index int, query_id text, version UInt8, time float'); +create view partial_queries as select test, query_index + from file('analyze/partial-queries.tsv', TSV, + 'test text, query_index int, servers Array(int)'); + +create table partial_query_times engine File(TSVWithNamesAndTypes, + 'analyze/partial-query-times.tsv') + as select test, query_index, stddevPop(time) time_stddev, median(time) time_median + from query_runs + where (test, query_index) in partial_queries + group by test, query_index + ; + create view left_query_log as select * from file('left-query-log.tsv', TSVWithNamesAndTypes, '$(cat "left-query-log.tsv.columns")'); @@ -329,6 +342,7 @@ create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics- right join query_runs on query_logs.query_id = query_runs.query_id and query_logs.version = query_runs.version + where (test, query_index) not in partial_queries ; create table query_run_metrics engine File( @@ -351,6 +365,7 @@ create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-n # query. We also don't have lateral joins. So I just put all runs of each # query into a separate file, and then compute randomization distribution # for each file. I do this in parallel using GNU parallel. +( set +x # do not bloat the log IFS=$'\n' for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq) do @@ -367,6 +382,7 @@ do done wait unset IFS +) parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log } @@ -390,12 +406,20 @@ create view query_display_names as select * from 'test text, query_index int, query_display_name text') ; +create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') + as select floor(time_median, 3) m, floor(time_stddev / time_median, 3) v, + test, query_index, query_display_name + from file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes, + 'test text, query_index int, time_stddev float, time_median float') t + join query_display_names using (test, query_index) + order by test, query_index + ; + -- WITH, ARRAY JOIN and CROSS JOIN do not like each other: -- https://github.com/ClickHouse/ClickHouse/issues/11868 -- https://github.com/ClickHouse/ClickHouse/issues/11757 -- Because of this, we make a view with arrays first, and then apply all the -- array joins. - create view query_metric_stat_arrays as with (select * from file('analyze/query-run-metric-names.tsv', TSV, 'n Array(String)')) as metric_name @@ -860,10 +884,6 @@ case "$stage" in cat "/proc/$pid/smaps" > "$pid-smaps.txt" ||: done - # Sleep for five minutes to see how the servers enter a quiescent state (e.g. - # how fast the memory usage drops). - sleep 300 - # We had a bug where getting profiles froze sometimes, so try to save some # logs if this happens again. Give the servers some time to collect all info, # then trace and kill. Start in a subshell, so that both function don't diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index befc23ad041..93888a9ab02 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -24,14 +24,32 @@ dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_e function download { + # Historically there were various path for the performance test package. + # Test all of them. + for path in "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/"{,clickhouse_build_check/}"performance/performance.tgz" + do + if curl --fail --head "$path" + then + left_path="$path" + fi + done + + for path in "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/"{,clickhouse_build_check/}"performance/performance.tgz" + do + if curl --fail --head "$path" + then + right_path="$path" + fi + done + # might have the same version on left and right - if ! [ "$left_sha" = "$right_sha" ] + if ! [ "$left_path" = "$right_path" ] then - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv & - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv & + wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv & + wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv & else mkdir right ||: - wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right & + wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right & fi for dataset_name in $datasets diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 6dfd2f9c454..64336d0a038 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -50,10 +50,18 @@ function find_reference_sha # FIXME sometimes we have testing tags on commits without published builds -- # normally these are documentation commits. Loop to skip them. - if curl --fail --head "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz" - then - break - fi + # Historically there were various path for the performance test package. + # Test all of them. + unset found + for path in "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/"{,clickhouse_build_check/}"performance/performance.tgz" + do + if curl --fail --head "$path" + then + found="$path" + break + fi + done + if [ -n "$found" ] ; then break; fi start_ref="$REF_SHA~" done diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 9a6081b751c..e8323fbcca0 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -7,6 +7,7 @@ import clickhouse_driver import xml.etree.ElementTree as et import argparse import pprint +import re import string import time import traceback @@ -102,10 +103,11 @@ for s in servers: # connection loses the changes in settings. drop_query_templates = [q.text for q in root.findall('drop_query')] drop_queries = substitute_parameters(drop_query_templates) -for c in connections: +for conn_index, c in enumerate(connections): for q in drop_queries: try: c.execute(q) + print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') except: pass @@ -117,10 +119,12 @@ for c in connections: # configurable). So the end result is uncertain, but hopefully we'll be able to # run at least some queries. settings = root.findall('settings/*') -for c in connections: +for conn_index, c in enumerate(connections): for s in settings: try: - c.execute("set {} = '{}'".format(s.tag, s.text)) + q = f"set {s.tag} = '{s.text}'" + c.execute(q) + print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') except: print(traceback.format_exc(), file=sys.stderr) @@ -139,16 +143,28 @@ for t in tables: # Run create queries create_query_templates = [q.text for q in root.findall('create_query')] create_queries = substitute_parameters(create_query_templates) -for c in connections: + +# Disallow temporary tables, because the clickhouse_driver reconnects on errors, +# and temporary tables are destroyed. We want to be able to continue after some +# errors. +for q in create_queries: + if re.search('create temporary table', q, flags=re.IGNORECASE): + print(f"Temporary tables are not allowed in performance tests: '{q}'", + file = sys.stderr) + sys.exit(1) + +for conn_index, c in enumerate(connections): for q in create_queries: c.execute(q) + print(f'create\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') # Run fill queries fill_query_templates = [q.text for q in root.findall('fill_query')] fill_queries = substitute_parameters(fill_query_templates) -for c in connections: +for conn_index, c in enumerate(connections): for q in fill_queries: c.execute(q) + print(f'fill\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') # Run test queries for query_index, q in enumerate(test_queries): @@ -165,31 +181,47 @@ for query_index, q in enumerate(test_queries): # Prewarm: run once on both servers. Helps to bring the data into memory, # precompile the queries, etc. - try: - for conn_index, c in enumerate(connections): + # A query might not run on the old server if it uses a function added in the + # new one. We want to run them on the new server only, so that the PR author + # can ensure that the test works properly. Remember the errors we had on + # each server. + query_error_on_connection = [None] * len(connections); + for conn_index, c in enumerate(connections): + try: prewarm_id = f'{query_prefix}.prewarm0' res = c.execute(q, query_id = prewarm_id) print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') - except KeyboardInterrupt: - raise - except: - # If prewarm fails for some query -- skip it, and try to test the others. - # This might happen if the new test introduces some function that the - # old server doesn't support. Still, report it as an error. - # FIXME the driver reconnects on error and we lose settings, so this might - # lead to further errors or unexpected behavior. - print(traceback.format_exc(), file=sys.stderr) + except KeyboardInterrupt: + raise + except: + # FIXME the driver reconnects on error and we lose settings, so this + # might lead to further errors or unexpected behavior. + query_error_on_connection[conn_index] = traceback.format_exc(); + continue + + # If prewarm fails for the query on both servers -- report the error, skip + # the query and continue testing the next query. + if query_error_on_connection.count(None) == 0: + print(query_error_on_connection[0], file = sys.stderr) continue + # If prewarm fails on one of the servers, run the query on the rest of them. + # Useful for queries that use new functions added in the new server version. + if query_error_on_connection.count(None) < len(query_error_on_connection): + no_error = [i for i, e in enumerate(query_error_on_connection) if not e] + print(f'partial\t{query_index}\t{no_error}') + # Now, perform measured runs. - # Track the time spent by the client to process this query, so that we can notice - # out the queries that take long to process on the client side, e.g. by sending - # excessive data. + # Track the time spent by the client to process this query, so that we can + # notice the queries that take long to process on the client side, e.g. by + # sending excessive data. start_seconds = time.perf_counter() server_seconds = 0 for run in range(0, args.runs): run_id = f'{query_prefix}.run{run}' for conn_index, c in enumerate(connections): + if query_error_on_connection[conn_index]: + continue res = c.execute(q, query_id = run_id) print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}') server_seconds += c.last_query.elapsed @@ -198,8 +230,8 @@ for query_index, q in enumerate(test_queries): print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') # Run drop queries -drop_query_templates = [q.text for q in root.findall('drop_query')] drop_queries = substitute_parameters(drop_query_templates) -for c in connections: +for conn_index, c in enumerate(connections): for q in drop_queries: c.execute(q) + print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 227722a8bea..02ec017d15b 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -23,6 +23,7 @@ faster_queries = 0 slower_queries = 0 unstable_queries = 0 very_unstable_queries = 0 +unstable_partial_queries = 0 # max seconds to run one query by itself, not counting preparation allowed_single_run_time = 2 @@ -194,6 +195,31 @@ if args.report == 'main': ['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'], slow_on_client_rows) + def print_partial(): + rows = tsvRows('report/partial-queries-report.tsv') + if not rows: + return + global unstable_partial_queries, slow_average_tests + print(tableStart('Partial queries')) + columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query'] + print(tableHeader(columns)) + attrs = ['' for c in columns] + for row in rows: + if float(row[1]) > 0.10: + attrs[1] = f'style="background: {color_bad}"' + unstable_partial_queries += 1 + else: + attrs[1] = '' + if float(row[0]) > allowed_single_run_time: + attrs[0] = f'style="background: {color_bad}"' + slow_average_tests += 1 + else: + attrs[0] = '' + print(tableRow(row, attrs)) + print(tableEnd()) + + print_partial() + def print_changes(): rows = tsvRows('report/changed-perf.tsv') if not rows: @@ -417,6 +443,11 @@ if args.report == 'main': status = 'failure' message_array.append(str(slower_queries) + ' slower') + if unstable_partial_queries: + unstable_queries += unstable_partial_queries + error_tests += unstable_partial_queries + status = 'failure' + if unstable_queries: message_array.append(str(unstable_queries) + ' unstable') From 87c81e9ad2551a6ed3c9157ff3ed9f46ef90d811 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Sat, 27 Jun 2020 03:59:00 +0300 Subject: [PATCH 058/330] longer --- tests/performance/date_time.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/performance/date_time.xml b/tests/performance/date_time.xml index 8f0ec23cdad..91b1feed491 100644 --- a/tests/performance/date_time.xml +++ b/tests/performance/date_time.xml @@ -119,8 +119,8 @@ - SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) - SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) - SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) - SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) From 8b82ff215fe4b42216aa41adf9907aba37a3817d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Sat, 27 Jun 2020 04:08:21 +0300 Subject: [PATCH 059/330] muzzy decay 10 ms --- contrib/jemalloc-cmake/CMakeLists.txt | 7 ++++++- .../config/config.d/perf-comparison-tweaks-config.xml | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 79b351c3721..07e89c6d802 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -22,7 +22,12 @@ if (ENABLE_JEMALLOC) # # By enabling percpu_arena number of arenas limited to number of CPUs and hence # this problem should go away. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0") + # + # muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to + # avoid spurious latencies and additional work associated with + # MADV_DONTNEED. See + # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0") endif() diff --git a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml index 5dcc3c51eca..6f1726ab36b 100644 --- a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml @@ -20,4 +20,6 @@ 1000000000 + + 10 From 649eb8e3486de117c0c06a26d0b4b6ff6250e2d0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 27 Jun 2020 17:26:00 +0000 Subject: [PATCH 060/330] Move reading from RabbitMQ into background task --- src/Common/ErrorCodes.cpp | 1 + src/Storages/RabbitMQ/RabbitMQHandler.cpp | 39 ++++++--------- src/Storages/RabbitMQ/RabbitMQHandler.h | 8 +-- .../ReadBufferFromRabbitMQConsumer.cpp | 38 +++----------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 7 +-- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 50 ++++++++++++++----- src/Storages/RabbitMQ/StorageRabbitMQ.h | 10 ++-- 7 files changed, 75 insertions(+), 78 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 09f4879c3f4..fbdac27360b 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -496,6 +496,7 @@ namespace ErrorCodes extern const int NO_SUITABLE_FUNCTION_IMPLEMENTATION = 527; extern const int CASSANDRA_INTERNAL_ERROR = 528; extern const int NOT_A_LEADER = 529; + extern const int CANNOT_CONNECT_RABBITMQ = 530; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index cf2614d4de0..6d3ed41cf78 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -4,20 +4,16 @@ namespace DB { -enum -{ - Lock_timeout = 50, - Loop_stop_timeout = 200 -}; - +static const auto Lock_timeout = 50; +/* The object of this class is shared between concurrent consumers (who share the same connection == share the same + * event loop and handler). + */ RabbitMQHandler::RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_) : AMQP::LibUvHandler(loop_), loop(loop_), log(log_) { - tv.tv_sec = 0; - tv.tv_usec = Loop_stop_timeout; } @@ -34,15 +30,18 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes } +void RabbitMQHandler::startLoop() +{ + while (!stop_loop) + { + uv_run(loop, UV_RUN_NOWAIT); + } +} + + void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) { - /* The object of this class is shared between concurrent consumers (who share the same connection == share the same - * event loop and handler). But the loop should not be attempted to start if it is already running. - */ std::lock_guard lock(mutex_before_event_loop); - loop_started.store(true); - stop_scheduled = false; - uv_run(loop, UV_RUN_NOWAIT); } @@ -55,15 +54,9 @@ void RabbitMQHandler::startProducerLoop() void RabbitMQHandler::stop() { - std::lock_guard lock(mutex_before_loop_stop); - uv_stop(loop); -} - - -void RabbitMQHandler::stopWithTimeout() -{ - stop_scheduled = true; - uv_stop(loop); + //std::lock_guard lock(mutex_before_loop_stop); + //uv_stop(loop); + stop_loop = true; } } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index ed3625d4464..7b3a40df852 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -21,16 +21,16 @@ public: void onError(AMQP::TcpConnection * connection, const char * message) override; void startConsumerLoop(std::atomic & loop_started); void startProducerLoop(); - void stopWithTimeout(); void stop(); - std::atomic & checkStopIsScheduled() { return stop_scheduled; }; + void startLoop(); private: uv_loop_t * loop; Poco::Logger * log; - timeval tv; - std::atomic stop_scheduled = false; + std::atomic stop_loop = false, running_loop = false; + + std::timed_mutex starting_loop; std::mutex mutex_before_event_loop; std::mutex mutex_before_loop_stop; }; diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index da4bfc24a2d..001a83f104b 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -33,7 +33,7 @@ namespace ExchangeType ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, - RabbitMQHandler & eventHandler_, + HandlerPtr eventHandler_, const String & exchange_name_, const Names & routing_keys_, const size_t channel_id_, @@ -117,8 +117,6 @@ void ReadBufferFromRabbitMQConsumer::initExchange() return; } - /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is supported. - AMQP::ExchangeType type; if (exchange_type == ExchangeType::FANOUT) type = AMQP::ExchangeType::fanout; else if (exchange_type == ExchangeType::DIRECT) type = AMQP::ExchangeType::direct; @@ -247,7 +245,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) }); /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed, because - * if moved there, it must(!) be wrapped inside a channel->onReady callback or any other (and the looping), otherwise + * if moved there, it must(!) be wrapped inside a channel->onReady callback or any other, otherwise * consumer might fail to subscribe and no resubscription will help. */ subscribe(queues.back()); @@ -280,7 +278,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) AMQP::Table binding_arguments; std::vector matching; - /// It is not parsed for the second time - if it was parsed above, then it would go to the first if statement, not here. + /// It is not parsed for the second time - if it was parsed above, then it would never end up here. for (const auto & header : routing_keys) { boost::split(matching, header, [](char c){ return c == '='; }); @@ -367,15 +365,6 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) std::lock_guard lock(mutex); received.push_back(message_received); } - - /* As event loop is blocking to the thread that started it and a single thread should not be blocked while - * executing all callbacks on the connection (not only its own), then there should be some point to unblock. - * loop_started == 1 if current consumer is started the loop and not another. - */ - if (!loop_started.load() && !event_handler.checkStopIsScheduled()) - { - stopEventLoopWithTimeout(); - } } }) .onError([&](const char * message) @@ -415,19 +404,13 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() void ReadBufferFromRabbitMQConsumer::stopEventLoop() { - event_handler.stop(); -} - - -void ReadBufferFromRabbitMQConsumer::stopEventLoopWithTimeout() -{ - event_handler.stopWithTimeout(); + event_handler->stop(); } void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & loop_started) { - event_handler.startConsumerLoop(loop_started); + event_handler->startConsumerLoop(loop_started); } @@ -438,21 +421,12 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() if (current == messages.end()) { - if (received.empty()) - { - /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread. - startEventLoop(loop_started); - loop_started.store(false); - } - if (received.empty()) return false; - messages.clear(); - /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback. std::lock_guard lock(mutex); - + messages.clear(); messages.swap(received); current = messages.begin(); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 4ff2b0ff64c..010c8673a58 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace Poco @@ -16,6 +17,7 @@ namespace DB { using ChannelPtr = std::shared_ptr; +using HandlerPtr = std::shared_ptr; class ReadBufferFromRabbitMQConsumer : public ReadBuffer { @@ -23,7 +25,7 @@ class ReadBufferFromRabbitMQConsumer : public ReadBuffer public: ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, - RabbitMQHandler & event_handler_, + HandlerPtr event_handler_, const String & exchange_name_, const Names & routing_keys_, const size_t channel_id_, @@ -46,7 +48,7 @@ private: using Messages = std::vector; ChannelPtr consumer_channel; - RabbitMQHandler & event_handler; + HandlerPtr event_handler; const String & exchange_name; const Names & routing_keys; @@ -92,7 +94,6 @@ private: void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); void startEventLoop(std::atomic & loop_started); - void stopEventLoopWithTimeout(); void stopEventLoop(); }; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 4c1104dc818..ebf6ec2795c 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -39,16 +39,17 @@ namespace DB enum { - Connection_setup_sleep = 200, - Connection_setup_retries_max = 1000 }; +static const auto CONNECT_SLEEP = 200; +static const auto RETRIES_MAX = 1000; static const auto RESCHEDULE_MS = 500; namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int CANNOT_CONNECT_RABBITMQ; } @@ -79,27 +80,25 @@ StorageRabbitMQ::StorageRabbitMQ( , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , login_password(std::make_pair( - rabbitmq_context.getConfigRef().getString("rabbitmq_username", "root"), - rabbitmq_context.getConfigRef().getString("rabbitmq_password", "clickhouse"))) + global_context.getConfigRef().getString("rabbitmq_username", "root"), + global_context.getConfigRef().getString("rabbitmq_password", "clickhouse"))) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) { loop = std::make_unique(); uv_loop_init(loop.get()); - event_handler = std::make_unique(loop.get(), log); - connection = std::make_unique(event_handler.get(), AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); + event_handler = std::make_shared(loop.get(), log); + connection = std::make_shared(event_handler.get(), AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); size_t cnt_retries = 0; - while (!connection->ready() && ++cnt_retries != Connection_setup_retries_max) + while (!connection->ready() && ++cnt_retries != RETRIES_MAX) { uv_run(loop.get(), UV_RUN_NOWAIT); - std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); + std::this_thread::sleep_for(std::chrono::milliseconds(CONNECT_SLEEP)); } if (!connection->ready()) - { - LOG_ERROR(log, "Cannot set up connection for consumer"); - } + throw Exception("Cannot set up connection for consumers", ErrorCodes::CANNOT_CONNECT_RABBITMQ); rabbitmq_context.makeQueryContext(); StorageInMemoryMetadata storage_metadata; @@ -118,6 +117,10 @@ StorageRabbitMQ::StorageRabbitMQ( /// Make sure that local exchange name is unique for each table and is not the same as client's exchange name local_exchange_name = exchange_name + "_" + table_name; + + /// One looping task for all consumers as they share the same connection == the same handler == the same event loop + looping_task = global_context.getSchedulePool().createTask("RabbitMQLoopingTask", [this]{ loopingFunc(); }); + looping_task->deactivate(); } @@ -132,6 +135,13 @@ void StorageRabbitMQ::heartbeatFunc() } +void StorageRabbitMQ::loopingFunc() +{ + LOG_DEBUG(log, "Starting event looping iterations"); + event_handler->startLoop(); +} + + Pipes StorageRabbitMQ::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -154,8 +164,13 @@ Pipes StorageRabbitMQ::read( *this, metadata_snapshot, context, column_names, log))); } - LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); + if (!loop_started) + { + loop_started = true; + looping_task->activateAndSchedule(); + } + LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); return pipes; } @@ -199,6 +214,9 @@ void StorageRabbitMQ::shutdown() streaming_task->deactivate(); heartbeat_task->deactivate(); + event_handler->stop(); + looping_task->deactivate(); + connection->close(); } @@ -246,7 +264,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(connection.get()); return std::make_shared( - consumer_channel, *event_handler, exchange_name, routing_keys, + consumer_channel, event_handler, exchange_name, routing_keys, next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, local_exchange_name, stream_cancelled); } @@ -354,6 +372,12 @@ bool StorageRabbitMQ::streamToViews() stream->setLimits(limits); } + if (!loop_started) + { + loop_started = true; + looping_task->activateAndSchedule(); + } + // Join multiple streams if necessary BlockInputStreamPtr in; if (streams.size() > 1) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 5ea9bde9a4b..c40211bed70 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -91,9 +91,9 @@ private: std::pair parsed_address; std::pair login_password; - std::unique_ptr loop; - std::unique_ptr event_handler; - std::unique_ptr connection; /// Connection for all consumers + std::shared_ptr loop; + std::shared_ptr event_handler; + std::shared_ptr connection; /// Connection for all consumers Poco::Semaphore semaphore; std::mutex mutex; @@ -101,15 +101,19 @@ private: size_t next_channel_id = 1; /// Must >= 1 because it is used as a binding key, which has to be > 0 bool update_channel_id = false; + std::atomic loop_started = false; BackgroundSchedulePool::TaskHolder streaming_task; BackgroundSchedulePool::TaskHolder heartbeat_task; + BackgroundSchedulePool::TaskHolder looping_task; + std::atomic stream_cancelled{false}; ConsumerBufferPtr createReadBuffer(); void threadFunc(); void heartbeatFunc(); + void loopingFunc(); void pingConnection() { connection->heartbeat(); } bool streamToViews(); From 4dcdad2f080cbe15d259b5855ec8f97a5c10a217 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jun 2020 23:55:45 +0300 Subject: [PATCH 061/330] Fix ORDER BY tuple with COLLATE on const column --- src/Columns/ColumnConst.cpp | 4 +- src/Interpreters/sortBlock.cpp | 3 + ...354_order_by_tuple_collate_const.reference | 66 +++++++++++++++++++ .../01354_order_by_tuple_collate_const.sql | 1 + 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01354_order_by_tuple_collate_const.reference create mode 100644 tests/queries/0_stateless/01354_order_by_tuple_collate_const.sql diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index 545c0b1b300..2fa1fbce32d 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -120,7 +120,9 @@ void ColumnConst::getPermutation(bool /*reverse*/, size_t /*limit*/, int /*nan_d res[i] = i; } -void ColumnConst::updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const {} +void ColumnConst::updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const +{ +} void ColumnConst::updateWeakHash32(WeakHash32 & hash) const { diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 4b8d4f2b859..cb3c36e5356 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -187,6 +187,9 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) if (ranges.empty()) break; + if (column.column_const) + continue; + if (isCollationRequired(column.description)) { const ColumnString & column_string = assert_cast(*column.column); diff --git a/tests/queries/0_stateless/01354_order_by_tuple_collate_const.reference b/tests/queries/0_stateless/01354_order_by_tuple_collate_const.reference new file mode 100644 index 00000000000..0359bf9ccb2 --- /dev/null +++ b/tests/queries/0_stateless/01354_order_by_tuple_collate_const.reference @@ -0,0 +1,66 @@ +0 +0 +1 +1 +10 +10 +2 +2 +3 +3 +4 +4 +5 +5 +6 +6 +7 +7 +8 +8 +9 +9 +0 +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/01354_order_by_tuple_collate_const.sql b/tests/queries/0_stateless/01354_order_by_tuple_collate_const.sql new file mode 100644 index 00000000000..d9b596f3ec3 --- /dev/null +++ b/tests/queries/0_stateless/01354_order_by_tuple_collate_const.sql @@ -0,0 +1 @@ +SELECT number FROM numbers(11) ORDER BY arrayJoin(['а', 'я', '\0�', '', 'Я', '']) ASC, toString(number) ASC, 'y' ASC COLLATE 'el'; From de95d42de6e4782c256b9cbf52b4f6e33dc78c31 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 29 Jun 2020 10:58:50 +0300 Subject: [PATCH 062/330] make a short test for DateTime --- .../{date_time.xml => date_time_long.xml} | 1 + tests/performance/date_time_short.xml | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+) rename tests/performance/{date_time.xml => date_time_long.xml} (99%) create mode 100644 tests/performance/date_time_short.xml diff --git a/tests/performance/date_time.xml b/tests/performance/date_time_long.xml similarity index 99% rename from tests/performance/date_time.xml rename to tests/performance/date_time_long.xml index 91b1feed491..b358945b35b 100644 --- a/tests/performance/date_time.xml +++ b/tests/performance/date_time_long.xml @@ -1,4 +1,5 @@ + long datetime_transform diff --git a/tests/performance/date_time_short.xml b/tests/performance/date_time_short.xml new file mode 100644 index 00000000000..a9fd0908694 --- /dev/null +++ b/tests/performance/date_time_short.xml @@ -0,0 +1,40 @@ + + + + + date_transform + + toDayOfWeek + toMonday + toRelativeDayNum + toYYYYMMDDhhmmss + toUnixTimestamp + + + + time_zone + + Europe/Moscow + + + + binary_function + + lessOrEquals + greater + plus + addWeeks + + + + + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {date_transform}(t, '{time_zone}')) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) + From 1b49feae6bf6d0d499fb747528505909e2b1343c Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 29 Jun 2020 11:29:40 +0300 Subject: [PATCH 063/330] Fixup --- docker/test/performance-comparison/compare.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 7c7a18beffb..268c4841a5a 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -157,7 +157,11 @@ function run_tests TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n") # the grep is to filter out set -x output and keep only time output - { time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; } 2>&1 >/dev/null | grep -v ^+ >> "wall-clock-times.tsv" || continue + { \ + time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 \ + -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \ + } 2>&1 >/dev/null | grep -v ^+ >> "wall-clock-times.tsv" \ + || echo "Test $test_name failed with error code $?" >> "$test_name-err.log" done unset TIMEFORMAT @@ -793,7 +797,7 @@ unset IFS # Remember that grep sets error code when nothing is found, hence the bayan # operator. -grep -H -m2 -i '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||: +grep -H -m2 -i '' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||: } function report_metrics From 5fc0b93400177cb005a347821aae3bb76f255cc3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 29 Jun 2020 06:33:53 +0000 Subject: [PATCH 064/330] Use ConcurentBoundedQueue instead of vector --- .../ReadBufferFromRabbitMQConsumer.cpp | 34 +++++-------------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 7 ++-- 2 files changed, 12 insertions(+), 29 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 001a83f104b..dbb91bf19c4 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -30,6 +30,7 @@ namespace ExchangeType static const String HEADERS = "headers"; } +static const auto QUEUE_SIZE = 50000; /// Equals capacity of single rabbitmq queue ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, @@ -59,10 +60,8 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , local_default_exchange(local_exchange + "_" + ExchangeType::DIRECT) , local_hash_exchange(local_exchange + "_" + ExchangeType::HASH) , stopped(stopped_) + , messages(QUEUE_SIZE) { - messages.clear(); - current = messages.begin(); - exchange_type_set = exchange_type != ExchangeType::DEFAULT; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. @@ -81,7 +80,6 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() consumer_channel->close(); messages.clear(); - current = messages.begin(); BufferBase::set(nullptr, 0, 0); } @@ -354,17 +352,12 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (message_size && message.body() != nullptr) { String message_received = std::string(message.body(), message.body() + message_size); - if (row_delimiter != '\0') { message_received += row_delimiter; } - /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl(). - { - std::lock_guard lock(mutex); - received.push_back(message_received); - } + messages.push(message_received); } }) .onError([&](const char * message) @@ -419,25 +412,16 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() if (stopped || !allowed) return false; - if (current == messages.end()) + if (messages.tryPop(current)) { - if (received.empty()) - return false; + auto * new_position = const_cast(current.data()); + BufferBase::set(new_position, current.size(), 0); + allowed = false; - /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback. - std::lock_guard lock(mutex); - messages.clear(); - messages.swap(received); - current = messages.begin(); + return true; } - auto * new_position = const_cast(current->data()); - BufferBase::set(new_position, current->size(), 0); - - ++current; - allowed = false; - - return true; + return false; } } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 010c8673a58..7be31c7b8f2 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include namespace Poco @@ -74,10 +74,9 @@ private: std::atomic loop_started = false, consumer_error = false; std::atomic count_subscribed = 0, wait_subscribed; + ConcurrentBoundedQueue messages; + String current; std::vector queues; - Messages received; - Messages messages; - Messages::iterator current; std::unordered_map subscribed_queue; /* Note: as all consumers share the same connection => they also share the same From 8c0177b21641f7b2fd54d4c36b1986ce551837b5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 29 Jun 2020 15:01:29 +0300 Subject: [PATCH 065/330] Update contrib/jemalloc-cmake/CMakeLists.txt --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 07e89c6d802..13f7ea3326b 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -29,7 +29,7 @@ if (ENABLE_JEMALLOC) # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000") else() - set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0") + set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:10000") endif() # CACHE variable is empty, to allow changing defaults without necessity # to purge cache From 3d2cc9d4b2b7ec44e8ce43f619e2e45f9b76d978 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 29 Jun 2020 10:11:55 +0000 Subject: [PATCH 066/330] Remove libevent library --- .gitmodules | 3 - CMakeLists.txt | 1 - cmake/find/libevent.cmake | 22 - contrib/CMakeLists.txt | 4 - contrib/amqpcpp-cmake/CMakeLists.txt | 2 +- contrib/libevent | 1 - contrib/libevent-cmake/CMakeLists.txt | 42 -- .../libevent-cmake/linux/evconfig-private.h | 39 -- .../linux/event2/event-config.h | 516 ------------------ src/Storages/RabbitMQ/RabbitMQHandler.h | 1 - .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 1 - 11 files changed, 1 insertion(+), 631 deletions(-) delete mode 100644 cmake/find/libevent.cmake delete mode 160000 contrib/libevent delete mode 100644 contrib/libevent-cmake/CMakeLists.txt delete mode 100644 contrib/libevent-cmake/linux/evconfig-private.h delete mode 100644 contrib/libevent-cmake/linux/event2/event-config.h diff --git a/.gitmodules b/.gitmodules index b2523231e03..fe69bcdfd39 100644 --- a/.gitmodules +++ b/.gitmodules @@ -160,9 +160,6 @@ [submodule "contrib/AMQP-CPP"] path = contrib/AMQP-CPP url = https://github.com/CopernicaMarketingSoftware/AMQP-CPP.git -[submodule "contrib/libevent"] - path = contrib/libevent - url = https://github.com/libevent/libevent.git [submodule "contrib/cassandra"] path = contrib/cassandra url = https://github.com/ClickHouse-Extras/cpp-driver.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 30b2e11d396..b69dea81186 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -342,7 +342,6 @@ include (cmake/find/sparsehash.cmake) include (cmake/find/re2.cmake) include (cmake/find/libgsasl.cmake) include (cmake/find/rdkafka.cmake) -include (cmake/find/libevent.cmake) include (cmake/find/amqpcpp.cmake) include (cmake/find/capnp.cmake) include (cmake/find/llvm.cmake) diff --git a/cmake/find/libevent.cmake b/cmake/find/libevent.cmake deleted file mode 100644 index 2f714b43475..00000000000 --- a/cmake/find/libevent.cmake +++ /dev/null @@ -1,22 +0,0 @@ -SET(ENABLE_LIBEVENT 1) -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libevent/CMakeLists.txt") - message (WARNING "submodule contrib/libevent is missing. to fix try run: - \n git submodule update --init --recursive") - - set (ENABLE_LIBEVENT 0) -endif () - -if (ENABLE_LIBEVENT) - - set (USE_LIBEVENT 1) - set (LIBEVENT_LIBRARY LIBEVENT) - - set (LIBEVENT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libevent") - - list (APPEND LIBEVENT_INCLUDE_DIR - "${ClickHouse_SOURCE_DIR}/contrib/libevent/include/event2" - "${ClickHouse_SOURCE_DIR}/contrib/libevent/include") - -endif() - -message (STATUS "Using libevent=${USE_LIBEVENT}: ${LIBEVENT_INCLUDE_DIR} : ${LIBEVENT_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 65902fda0be..6c1e1437066 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -299,10 +299,6 @@ if (USE_AMQPCPP) add_subdirectory (amqpcpp-cmake) endif() -if (USE_LIBEVENT) - add_subdirectory(libevent-cmake) -endif() - if (USE_CASSANDRA) add_subdirectory (libuv) add_subdirectory (cassandra) diff --git a/contrib/amqpcpp-cmake/CMakeLists.txt b/contrib/amqpcpp-cmake/CMakeLists.txt index eae3122e216..452a5f7f6aa 100644 --- a/contrib/amqpcpp-cmake/CMakeLists.txt +++ b/contrib/amqpcpp-cmake/CMakeLists.txt @@ -40,5 +40,5 @@ target_compile_options (amqp-cpp target_include_directories (amqp-cpp PUBLIC ${LIBRARY_DIR}/include) -target_link_libraries (amqp-cpp PUBLIC libevent ssl) +target_link_libraries (amqp-cpp PUBLIC ssl) diff --git a/contrib/libevent b/contrib/libevent deleted file mode 160000 index eee26deed38..00000000000 --- a/contrib/libevent +++ /dev/null @@ -1 +0,0 @@ -Subproject commit eee26deed38fc7a6b6780b54628b007a2810efcd diff --git a/contrib/libevent-cmake/CMakeLists.txt b/contrib/libevent-cmake/CMakeLists.txt deleted file mode 100644 index f99bc221482..00000000000 --- a/contrib/libevent-cmake/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libevent) - -set(SRCS - ${LIBRARY_DIR}/buffer.c - ${LIBRARY_DIR}/bufferevent_filter.c - ${LIBRARY_DIR}/bufferevent_pair.c - ${LIBRARY_DIR}/bufferevent_ratelim.c - ${LIBRARY_DIR}/bufferevent_sock.c - ${LIBRARY_DIR}/bufferevent.c - ${LIBRARY_DIR}/event.c - ${LIBRARY_DIR}/evmap.c - ${LIBRARY_DIR}/evthread.c - ${LIBRARY_DIR}/evutil_rand.c - ${LIBRARY_DIR}/evutil_time.c - ${LIBRARY_DIR}/evutil.c - ${LIBRARY_DIR}/listener.c - ${LIBRARY_DIR}/log.c - ${LIBRARY_DIR}/signal.c - ${LIBRARY_DIR}/strlcpy.c - ${LIBRARY_DIR}/watch.c -) - -if (OS_LINUX) - list (APPEND SRCS - ${LIBRARY_DIR}/epoll.c - ${LIBRARY_DIR}/poll.c - ${LIBRARY_DIR}/select.c - ) - -elseif (OS_DARWIN) - list (APPEND SRCS ${LIBRARY_DIR}/kqueue.c) -endif () - -add_library(libevent ${SRCS}) - -target_compile_options (libevent PUBLIC -Wno-reserved-id-macro) - -if (OS_LINUX) - target_include_directories (libevent PUBLIC linux) -endif () - -target_include_directories (libevent PUBLIC ${LIBRARY_DIR}/include) diff --git a/contrib/libevent-cmake/linux/evconfig-private.h b/contrib/libevent-cmake/linux/evconfig-private.h deleted file mode 100644 index a39d2b71fbc..00000000000 --- a/contrib/libevent-cmake/linux/evconfig-private.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef EVCONFIG_PRIVATE_H_INCLUDED_ -#define EVCONFIG_PRIVATE_H_INCLUDED_ - -/* Enable extensions on AIX 3, Interix. */ -/* #undef _ALL_SOURCE */ - -/* Enable GNU extensions on systems that have them. */ -#define _GNU_SOURCE 1 - -/* Enable threading extensions on Solaris. */ -/* #undef _POSIX_PTHREAD_SEMANTICS */ - -/* Enable extensions on HP NonStop. */ -/* #undef _TANDEM_SOURCE */ - -/* Enable general extensions on Solaris. */ -/* #undef __EXTENSIONS__ */ - -/* Number of bits in a file offset, on hosts where this is settable. */ -/* #undef _FILE_OFFSET_BITS */ -/* Define for large files, on AIX-style hosts. */ -/* #undef _LARGE_FILES */ - -/* Define to 1 if on MINIX. */ -/* #undef _MINIX */ - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -/* #undef _POSIX_1_SOURCE */ - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -/* #undef _POSIX_SOURCE */ - -/* Enable POSIX.2 extensions on QNX for getopt */ -#ifdef __QNX__ -/* #undef __EXT_POSIX2 */ -#endif - -#endif diff --git a/contrib/libevent-cmake/linux/event2/event-config.h b/contrib/libevent-cmake/linux/event2/event-config.h deleted file mode 100644 index 09067412490..00000000000 --- a/contrib/libevent-cmake/linux/event2/event-config.h +++ /dev/null @@ -1,516 +0,0 @@ -/* event-config.h - * - * This file was generated by cmake when the makefiles were generated. - * - * DO NOT EDIT THIS FILE. - * - * Do not rely on macros in this file existing in later versions. - */ -#ifndef EVENT2_EVENT_CONFIG_H_INCLUDED_ -#define EVENT2_EVENT_CONFIG_H_INCLUDED_ - -/* Numeric representation of the version */ -#define EVENT__NUMERIC_VERSION 0x02020001 -#define EVENT__PACKAGE_VERSION "2.2.0" - -#define EVENT__VERSION_MAJOR 2 -#define EVENT__VERSION_MINOR 2 -#define EVENT__VERSION_PATCH 0 - -/* Version number of package */ -#define EVENT__VERSION "2.2.0-alpha-dev" - -/* Name of package */ -#define EVENT__PACKAGE "libevent" - -/* Define to the address where bug reports for this package should be sent. */ -#define EVENT__PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define EVENT__PACKAGE_NAME "" - -/* Define to the full name and version of this package. */ -#define EVENT__PACKAGE_STRING "" - -/* Define to the one symbol short name of this package. */ -#define EVENT__PACKAGE_TARNAME "" - -/* Define if libevent should build without support for a debug mode */ -/* #undef EVENT__DISABLE_DEBUG_MODE */ - -/* Define if libevent should not allow replacing the mm functions */ -/* #undef EVENT__DISABLE_MM_REPLACEMENT */ - -/* Define if libevent should not be compiled with thread support */ -/* #undef EVENT__DISABLE_THREAD_SUPPORT */ - -/* Define to 1 if you have the `accept4' function. */ -#define EVENT__HAVE_ACCEPT4 1 - -/* Define to 1 if you have the `arc4random' function. */ -/* #undef EVENT__HAVE_ARC4RANDOM */ - -/* Define to 1 if you have the `arc4random_buf' function. */ -/* #undef EVENT__HAVE_ARC4RANDOM_BUF */ - -/* Define to 1 if you have the `arc4random_addrandom' function. */ -/* #undef EVENT__HAVE_ARC4RANDOM_ADDRANDOM */ - -/* Define if clock_gettime is available in libc */ -#define EVENT__DNS_USE_CPU_CLOCK_FOR_ID 1 - -/* Define is no secure id variant is available */ -/* #undef EVENT__DNS_USE_GETTIMEOFDAY_FOR_ID */ -/* #undef EVENT__DNS_USE_FTIME_FOR_ID */ - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_ARPA_INET_H 1 - -/* Define to 1 if you have the `clock_gettime' function. */ -#define EVENT__HAVE_CLOCK_GETTIME 1 - -/* Define to 1 if you have the declaration of `CTL_KERN'. */ -#define EVENT__HAVE_DECL_CTL_KERN 1 - -/* Define to 1 if you have the declaration of `KERN_ARND'. */ -#define EVENT__HAVE_DECL_KERN_ARND 0 - -/* Define to 1 if you have `getrandom' function. */ -#define EVENT__HAVE_GETRANDOM 1 - -/* Define if /dev/poll is available */ -/* #undef EVENT__HAVE_DEVPOLL */ - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_NETDB_H 1 - -/* Define to 1 if fd_mask type is defined */ -#define EVENT__HAVE_FD_MASK 1 - -/* Define to 1 if the header file defines TAILQ_FOREACH. */ -#define EVENT__HAVE_TAILQFOREACH 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_DLFCN_H 1 - -/* Define if your system supports the epoll system calls */ -#define EVENT__HAVE_EPOLL 1 - -/* Define to 1 if you have the `epoll_create1' function. */ -#define EVENT__HAVE_EPOLL_CREATE1 1 - -/* Define to 1 if you have the `epoll_ctl' function. */ -#define EVENT__HAVE_EPOLL_CTL 1 - -/* Define to 1 if you have the `eventfd' function. */ -#define EVENT__HAVE_EVENTFD 1 - -/* Define if your system supports event ports */ -/* #undef EVENT__HAVE_EVENT_PORTS */ - -/* Define to 1 if you have the `fcntl' function. */ -#define EVENT__HAVE_FCNTL 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_FCNTL_H 1 - -/* Define to 1 if you have the `getaddrinfo' function. */ -#define EVENT__HAVE_GETADDRINFO 1 - -/* Define to 1 if you have the `getegid' function. */ -#define EVENT__HAVE_GETEGID 1 - -/* Define to 1 if you have the `geteuid' function. */ -#define EVENT__HAVE_GETEUID 1 - -/* TODO: Check for different gethostname argument counts. CheckPrototypeDefinition.cmake can be used. */ -/* Define this if you have any gethostbyname_r() */ -#define EVENT__HAVE_GETHOSTBYNAME_R 1 - -/* Define this if gethostbyname_r takes 3 arguments */ -/* #undef EVENT__HAVE_GETHOSTBYNAME_R_3_ARG */ - -/* Define this if gethostbyname_r takes 5 arguments */ -/* #undef EVENT__HAVE_GETHOSTBYNAME_R_5_ARG */ - -/* Define this if gethostbyname_r takes 6 arguments */ -#define EVENT__HAVE_GETHOSTBYNAME_R_6_ARG 1 - -/* Define to 1 if you have the `getifaddrs' function. */ -#define EVENT__HAVE_GETIFADDRS 1 - -/* Define to 1 if you have the `getnameinfo' function. */ -#define EVENT__HAVE_GETNAMEINFO 1 - -/* Define to 1 if you have the `getprotobynumber' function. */ -#define EVENT__HAVE_GETPROTOBYNUMBER 1 - -/* Define to 1 if you have the `getservbyname' function. */ -#define EVENT__HAVE_GETSERVBYNAME 1 - -/* Define to 1 if you have the `gettimeofday' function. */ -#define EVENT__HAVE_GETTIMEOFDAY 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_IFADDRS_H 1 - -/* Define to 1 if you have the `inet_ntop' function. */ -#define EVENT__HAVE_INET_NTOP 1 - -/* Define to 1 if you have the `inet_pton' function. */ -#define EVENT__HAVE_INET_PTON 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the `issetugid' function. */ -/* #undef EVENT__HAVE_ISSETUGID */ - -/* Define to 1 if you have the `kqueue' function. */ -/* #undef EVENT__HAVE_KQUEUE */ - -/* Define if the system has zlib */ -#define EVENT__HAVE_LIBZ 1 - -/* Define to 1 if you have the `mach_absolute_time' function. */ -/* #undef EVENT__HAVE_MACH_ABSOLUTE_TIME */ - -/* Define to 1 if you have the header file. */ -/* #undef EVENT__HAVE_MACH_MACH_TIME_H */ - -/* Define to 1 if you have the header file. */ -/* #undef EVENT__HAVE_MACH_MACH_H */ - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `mmap' function. */ -#define EVENT__HAVE_MMAP 1 - -/* Define to 1 if you have the `nanosleep' function. */ -#define EVENT__HAVE_NANOSLEEP 1 - -/* Define to 1 if you have the `usleep' function. */ -#define EVENT__HAVE_USLEEP 1 - -/* Define to 1 if you have the header file. */ -/* #undef EVENT__HAVE_NETINET_IN6_H */ - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_NETINET_IN_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_NETINET_TCP_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_UN_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef EVENT__HAVE_AFUNIX_H */ - -/* Define if the system has openssl */ -#define EVENT__HAVE_OPENSSL 1 - -/* Define to 1 if you have the `pipe' function. */ -#define EVENT__HAVE_PIPE 1 - -/* Define to 1 if you have the `pipe2' function. */ -#define EVENT__HAVE_PIPE2 1 - -/* Define to 1 if you have the `poll' function. */ -#define EVENT__HAVE_POLL 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_POLL_H 1 - -/* Define to 1 if you have the `port_create' function. */ -/* #undef EVENT__HAVE_PORT_CREATE */ - -/* Define to 1 if you have the header file. */ -/* #undef EVENT__HAVE_PORT_H */ - -/* Define if we have pthreads on this system */ -#define EVENT__HAVE_PTHREADS 1 - -/* Define to 1 if you have the `putenv' function. */ -#define EVENT__HAVE_PUTENV 1 - -/* Define to 1 if the system has the type `sa_family_t'. */ -#define EVENT__HAVE_SA_FAMILY_T 1 - -/* Define to 1 if you have the `select' function. */ -#define EVENT__HAVE_SELECT 1 - -/* Define to 1 if you have the `setenv' function. */ -#define EVENT__HAVE_SETENV 1 - -/* Define if F_SETFD is defined in */ -#define EVENT__HAVE_SETFD 1 - -/* Define to 1 if you have the `setrlimit' function. */ -#define EVENT__HAVE_SETRLIMIT 1 - -/* Define to 1 if you have the `sendfile' function. */ -#define EVENT__HAVE_SENDFILE 1 - -/* Define to 1 if you have the `sigaction' function. */ -#define EVENT__HAVE_SIGACTION 1 - -/* Define to 1 if you have the `signal' function. */ -#define EVENT__HAVE_SIGNAL 1 - -/* Define to 1 if you have the `strsignal' function. */ -#define EVENT__HAVE_STRSIGNAL 1 - -/* Define to 1 if you have the `splice' function. */ -#define EVENT__HAVE_SPLICE 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_STDARG_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_STDDEF_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_STRING_H 1 - -/* Define to 1 if you have the `strlcpy' function. */ -/* #undef EVENT__HAVE_STRLCPY */ - -/* Define to 1 if you have the `strsep' function. */ -#define EVENT__HAVE_STRSEP 1 - -/* Define to 1 if you have the `strtok_r' function. */ -#define EVENT__HAVE_STRTOK_R 1 - -/* Define to 1 if you have the `strtoll' function. */ -#define EVENT__HAVE_STRTOLL 1 - -/* Define to 1 if you have the `_gmtime64_s' function. */ -/* #undef EVENT__HAVE__GMTIME64_S */ - -/* Define to 1 if you have the `_gmtime64' function. */ -/* #undef EVENT__HAVE__GMTIME64 */ - -/* Define to 1 if the system has the type `struct addrinfo'. */ -#define EVENT__HAVE_STRUCT_ADDRINFO 1 - -/* Define to 1 if the system has the type `struct in6_addr'. */ -#define EVENT__HAVE_STRUCT_IN6_ADDR 1 - -/* Define to 1 if `s6_addr16' is member of `struct in6_addr'. */ -#define EVENT__HAVE_STRUCT_IN6_ADDR_S6_ADDR16 1 - -/* Define to 1 if `s6_addr32' is member of `struct in6_addr'. */ -#define EVENT__HAVE_STRUCT_IN6_ADDR_S6_ADDR32 1 - -/* Define to 1 if the system has the type `struct sockaddr_in6'. */ -#define EVENT__HAVE_STRUCT_SOCKADDR_IN6 1 - -/* Define to 1 if `sin6_len' is member of `struct sockaddr_in6'. */ -/* #undef EVENT__HAVE_STRUCT_SOCKADDR_IN6_SIN6_LEN */ - -/* Define to 1 if `sin_len' is member of `struct sockaddr_in'. */ -/* #undef EVENT__HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ - -/* Define to 1 if the system has the type `struct sockaddr_un'. */ -#define EVENT__HAVE_STRUCT_SOCKADDR_UN 1 - -/* Define to 1 if the system has the type `struct sockaddr_storage'. */ -#define EVENT__HAVE_STRUCT_SOCKADDR_STORAGE 1 - -/* Define to 1 if `ss_family' is a member of `struct sockaddr_storage'. */ -#define EVENT__HAVE_STRUCT_SOCKADDR_STORAGE_SS_FAMILY 1 - -/* Define to 1 if `__ss_family' is a member of `struct sockaddr_storage'. */ -/* #undef EVENT__HAVE_STRUCT_SOCKADDR_STORAGE___SS_FAMILY */ - -/* Define to 1 if the system has the type `struct linger'. */ -#define EVENT__HAVE_STRUCT_LINGER 1 - -/* Define to 1 if you have the `sysctl' function. */ -/* #undef EVENT__HAVE_SYSCTL */ - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_EPOLL_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_EVENTFD_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef EVENT__HAVE_SYS_EVENT_H */ - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_IOCTL_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_MMAN_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_PARAM_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_QUEUE_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_RESOURCE_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_SELECT_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_SENDFILE_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_SOCKET_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_RANDOM_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef EVENT__HAVE_SYS_SYSCTL_H */ - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_TIMERFD_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_TIME_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_UIO_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_SYS_WAIT_H 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_ERRNO_H 1 - -/* Define if TAILQ_FOREACH is defined in */ -#define EVENT__HAVE_TAILQFOREACH 1 - -/* Define if timeradd is defined in */ -#define EVENT__HAVE_TIMERADD 1 - -/* Define if timerclear is defined in */ -#define EVENT__HAVE_TIMERCLEAR 1 - -/* Define if timercmp is defined in */ -#define EVENT__HAVE_TIMERCMP 1 - - -/* Define to 1 if you have the `timerfd_create' function. */ -#define EVENT__HAVE_TIMERFD_CREATE 1 - -/* Define if timerisset is defined in */ -#define EVENT__HAVE_TIMERISSET 1 - -/* Define to 1 if the system has the type `uint8_t'. */ -#define EVENT__HAVE_UINT8_T 1 - -/* Define to 1 if the system has the type `uint16_t'. */ -#define EVENT__HAVE_UINT16_T 1 - -/* Define to 1 if the system has the type `uint32_t'. */ -#define EVENT__HAVE_UINT32_T 1 - -/* Define to 1 if the system has the type `uint64_t'. */ -#define EVENT__HAVE_UINT64_T 1 - -/* Define to 1 if the system has the type `uintptr_t'. */ -#define EVENT__HAVE_UINTPTR_T 1 - -/* Define to 1 if you have the `umask' function. */ -#define EVENT__HAVE_UMASK 1 - -/* Define to 1 if you have the header file. */ -#define EVENT__HAVE_UNISTD_H 1 - -/* Define to 1 if you have the `unsetenv' function. */ -#define EVENT__HAVE_UNSETENV 1 - -/* Define to 1 if you have the `vasprintf' function. */ -#define EVENT__HAVE_VASPRINTF 1 - -/* Define if kqueue works correctly with pipes */ -/* #undef EVENT__HAVE_WORKING_KQUEUE */ - -#ifdef __USE_UNUSED_DEFINITIONS__ -/* Define to necessary symbol if this constant uses a non-standard name on your system. */ -/* XXX: Hello, this isn't even used, nor is it defined anywhere... - Ellzey */ -#define EVENT__PTHREAD_CREATE_JOINABLE -#endif - -/* The size of `pthread_t', as computed by sizeof. */ -#define EVENT__SIZEOF_PTHREAD_T 8 - -/* The size of a `int', as computed by sizeof. */ -#define EVENT__SIZEOF_INT 4 - -/* The size of a `long', as computed by sizeof. */ -#define EVENT__SIZEOF_LONG 8 - -/* The size of a `long long', as computed by sizeof. */ -#define EVENT__SIZEOF_LONG_LONG 8 - -/* The size of `off_t', as computed by sizeof. */ -#define EVENT__SIZEOF_OFF_T 8 - -#define EVENT__SIZEOF_SSIZE_T 8 - - -/* The size of a `short', as computed by sizeof. */ -#define EVENT__SIZEOF_SHORT 2 - -/* The size of `size_t', as computed by sizeof. */ -#define EVENT__SIZEOF_SIZE_T 8 - -/* Define to 1 if you can safely include both and . */ -/* #undef EVENT__TIME_WITH_SYS_TIME */ - -/* The size of `socklen_t', as computed by sizeof. */ -#define EVENT__SIZEOF_SOCKLEN_T 4 - -/* The size of 'void *', as computer by sizeof */ -#define EVENT__SIZEOF_VOID_P 8 - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* why not c++? - * - * and are we really expected to use EVENT__inline everywhere, - * shouldn't we just do: - * ifdef EVENT__inline - * define inline EVENT__inline - * - * - Ellzey - */ - -#define EVENT__inline inline -#endif - -#define EVENT__HAVE___func__ 1 -#define EVENT__HAVE___FUNCTION__ 1 - -/* Define to `unsigned' if does not define. */ -#define EVENT__size_t size_t - -/* Define to unsigned int if you dont have it */ -#define EVENT__socklen_t socklen_t - -/* Define to `int' if does not define. */ -#define EVENT__ssize_t ssize_t - -#endif /* \EVENT2_EVENT_CONFIG_H_INCLUDED_ */ diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 7b3a40df852..60cfd5c4868 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 7be31c7b8f2..21f18491ca4 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -6,7 +6,6 @@ #include #include #include -#include namespace Poco { From 88ece429c9a4e9487f02b6c88329452886b0152b Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 29 Jun 2020 12:32:04 +0000 Subject: [PATCH 067/330] Move writing to RabbitMQ into background task --- .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 4 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../WriteBufferToRabbitMQProducer.cpp | 76 +++++++++++-------- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 11 ++- 4 files changed, 59 insertions(+), 34 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index ddcde7cf24f..1a03fc4969e 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -37,13 +37,13 @@ void RabbitMQBlockOutputStream::writePrefix() if (!buffer) throw Exception("Failed to create RabbitMQ producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER); + buffer->activateWriting(); + child = FormatFactory::instance().getOutput( storage.getFormatName(), *buffer, getHeader(), context, [this](const Columns & /* columns */, size_t /* rows */) { buffer->countRow(); }); - - buffer->startEventLoop(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index ebf6ec2795c..8a704661882 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -273,7 +273,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { return std::make_shared( - parsed_address, login_password, routing_keys[0], local_exchange_name, + parsed_address, global_context, login_password, routing_keys[0], local_exchange_name, log, num_consumers * num_queues, bind_by_id, use_transactional_channel, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index bba1a6b693f..a4cdc09d4b9 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -13,16 +13,19 @@ namespace DB { -enum +namespace ErrorCodes { - Connection_setup_sleep = 200, - Loop_retries_max = 1000, - Loop_wait = 10, - Batch = 10000 -}; + extern const int CANNOT_CONNECT_RABBITMQ; +} + +static const auto QUEUE_SIZE = 100000; +static const auto CONNECT_SLEEP = 200; +static const auto RETRIES_MAX = 1000; +static const auto LOOP_WAIT = 10; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( std::pair & parsed_address, + Context & global_context, std::pair & login_password_, const String & routing_key_, const String exchange_, @@ -44,6 +47,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , delim(delimiter) , max_rows(rows_per_message) , chunk_size(chunk_size_) + , payloads(QUEUE_SIZE) { loop = std::make_unique(); @@ -57,15 +61,15 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( * different threads (as outputStreams are asynchronous) with the same connection leads to internal library errors. */ size_t cnt_retries = 0; - while (!connection->ready() && ++cnt_retries != Loop_retries_max) + while (!connection->ready() && ++cnt_retries != RETRIES_MAX) { uv_run(loop.get(), UV_RUN_NOWAIT); - std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); + std::this_thread::sleep_for(std::chrono::milliseconds(CONNECT_SLEEP)); } if (!connection->ready()) { - LOG_ERROR(log, "Cannot set up connection for producer!"); + throw Exception("Cannot set up connection for producer", ErrorCodes::CANNOT_CONNECT_RABBITMQ); } producer_channel = std::make_shared(connection.get()); @@ -76,15 +80,19 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( { producer_channel->startTransaction(); } + + writing_task = global_context.getSchedulePool().createTask("RabbitMQWritingTask", [this]{ writingFunc(); }); + writing_task->deactivate(); } WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() { - finilizeProducer(); - connection->close(); - event_handler->stop(); + stop_loop.store(true); + writing_task->deactivate(); + checkExchange(); + connection->close(); assert(rows == 0 && chunks.empty()); } @@ -111,26 +119,34 @@ void WriteBufferToRabbitMQProducer::countRow() chunks.clear(); set(nullptr, 0); - next_queue = next_queue % num_queues + 1; + payloads.push(payload); + } +} - if (bind_by_id) + +void WriteBufferToRabbitMQProducer::writingFunc() +{ + String payload; + while (!stop_loop || !payloads.empty()) + { + while (!payloads.empty()) { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload); - } - else - { - producer_channel->publish(exchange_name, routing_key, payload); + payloads.pop(payload); + next_queue = next_queue % num_queues + 1; + + if (bind_by_id) + { + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + } + else + { + producer_channel->publish(exchange_name, routing_key, payload); + } + + ++message_counter; } - ++message_counter; - - /* Run event loop to actually publish, checking exchange is just a point to stop the event loop. Messages are not sent - * without looping and looping after every batch is much better than processing all the messages in one time. - */ - if ((message_counter %= Batch) == 0) - { - checkExchange(); - } + startEventLoop(); } } @@ -182,10 +198,10 @@ void WriteBufferToRabbitMQProducer::finilizeProducer() }); size_t count_retries = 0; - while (!answer_received && ++count_retries != Loop_retries_max) + while (!answer_received && ++count_retries != RETRIES_MAX) { startEventLoop(); - std::this_thread::sleep_for(std::chrono::milliseconds(Loop_wait)); + std::this_thread::sleep_for(std::chrono::milliseconds(LOOP_WAIT)); } } } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 5793a044f9e..2b16403fc44 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include namespace DB @@ -20,6 +22,7 @@ class WriteBufferToRabbitMQProducer : public WriteBuffer public: WriteBufferToRabbitMQProducer( std::pair & parsed_address, + Context & global_context, std::pair & login_password_, const String & routing_key_, const String exchange_, @@ -35,11 +38,13 @@ public: ~WriteBufferToRabbitMQProducer() override; void countRow(); - void startEventLoop(); + void activateWriting() { writing_task->activateAndSchedule(); } private: void nextImpl() override; void checkExchange(); + void startEventLoop(); + void writingFunc(); void finilizeProducer(); std::pair & login_password; @@ -49,11 +54,15 @@ private: const size_t num_queues; const bool use_transactional_channel; + BackgroundSchedulePool::TaskHolder writing_task; + std::atomic stop_loop = false; + std::unique_ptr loop; std::unique_ptr event_handler; std::unique_ptr connection; ProducerPtr producer_channel; + ConcurrentBoundedQueue payloads; size_t next_queue = 0; UInt64 message_counter = 0; From af8d62bbcb1f91b9ef2e452e7b2c49cf2722e39e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 29 Jun 2020 15:48:18 +0300 Subject: [PATCH 068/330] Add TLB misses perf counters. --- src/Common/ProfileEvents.cpp | 8 +++-- src/Common/ThreadProfileEvents.cpp | 54 ++++++++++++++++++++++++---- src/Common/ThreadProfileEvents.h | 10 ++++-- src/Interpreters/ThreadStatusExt.cpp | 4 +++ 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 7a7a6bc6162..5c88b2ee849 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -196,8 +196,12 @@ M(PerfCpuMigrations, "Number of times the process has migrated to a new CPU") \ M(PerfAlignmentFaults, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \ M(PerfEmulationFaults, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \ - M(PerfPageFaultsMinor, "This counts the number of minor page faults. These did not require disk I/O to handle.") \ - M(PerfPageFaultsMajor, "This counts the number of major page faults. These required disk I/O to handle.") \ + M(PerfMinEnabledTime, "For all events, minimum time that an event was enabled. Used to track event multiplexing influence") \ + M(PerfMinEnabledRunningTime, "Running time for event with minimum enabled time. Used to track the amount of event multiplexing") \ + M(PerfDataTLBReferences, "Data TLB references") \ + M(PerfDataTLBMisses, "Data TLB misses") \ + M(PerfInstructionTLBReferences, "Instruction TLB references") \ + M(PerfInstructionTLBMisses, "Instruction TLB misses") \ \ M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \ \ diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index 1d65a16ba66..f009096cfec 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -147,6 +147,19 @@ thread_local PerfEventsCounters current_thread_counters; .settings_name = #LOCAL_NAME \ } +// One event for cache accesses and one for cache misses. +// Type is ACCESS or MISS +#define CACHE_EVENT(PERF_NAME, LOCAL_NAME, TYPE) \ + PerfEventInfo \ + { \ + .event_type = perf_type_id::PERF_TYPE_HW_CACHE, \ + .event_config = (PERF_NAME) \ + | (PERF_COUNT_HW_CACHE_OP_READ << 8) \ + | (PERF_COUNT_HW_CACHE_RESULT_ ## TYPE << 16), \ + .profile_event = ProfileEvents::LOCAL_NAME, \ + .settings_name = #LOCAL_NAME \ + } + // descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html static const PerfEventInfo raw_events_info[] = { HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles), @@ -167,8 +180,16 @@ static const PerfEventInfo raw_events_info[] = { SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations), SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults), SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults), - SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor), - SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor) + + // Don't add them -- they are the same as SoftPageFaults and HardPageFaults, + // match well numerically. + // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor), + // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor), + + CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfInstructionTLBReferences, ACCESS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfInstructionTLBMisses, MISS), }; static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS); @@ -455,7 +476,12 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile } } - // actually process counters' values + // Actually process counters' values. Track the minimal time that a performance + // counter was enabled, and the corresponding running time, to give some idea + // about the amount of counter multiplexing. + UInt64 min_enabled_time = -1; + UInt64 running_time_for_min_enabled_time = 0; + for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i) { int fd = thread_events_descriptors_holder.descriptors[i]; @@ -469,14 +495,30 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile // Account for counter multiplexing. time_running and time_enabled are // not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate // deltas from old values. + const auto enabled = current_value.time_enabled - previous_value.time_enabled; + const auto running = current_value.time_running - previous_value.time_running; const UInt64 delta = (current_value.value - previous_value.value) - * (current_value.time_enabled - previous_value.time_enabled) - / std::max(1.f, - float(current_value.time_running - previous_value.time_running)); + * enabled / std::max(1.f, float(running)); + + if (min_enabled_time > enabled) + { + min_enabled_time = enabled; + running_time_for_min_enabled_time = running; + } profile_events.increment(info.profile_event, delta); } + // If we had at least one enabled event, also show multiplexing-related + // statistics. + if (min_enabled_time != UInt64(-1)) + { + profile_events.increment(ProfileEvents::PerfMinEnabledTime, + min_enabled_time); + profile_events.increment(ProfileEvents::PerfMinEnabledRunningTime, + running_time_for_min_enabled_time); + } + // Store current counter values for the next profiling period. memcpy(previous_values, current_values, sizeof(current_values)); } diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h index a4ee0628629..7118e927162 100644 --- a/src/Common/ThreadProfileEvents.h +++ b/src/Common/ThreadProfileEvents.h @@ -53,8 +53,12 @@ namespace ProfileEvents extern const Event PerfCpuMigrations; extern const Event PerfAlignmentFaults; extern const Event PerfEmulationFaults; - extern const Event PerfPageFaultsMinor; - extern const Event PerfPageFaultsMajor; + extern const Event PerfMinEnabledTime; + extern const Event PerfMinEnabledRunningTime; + extern const Event PerfDataTLBReferences; + extern const Event PerfDataTLBMisses; + extern const Event PerfInstructionTLBReferences; + extern const Event PerfInstructionTLBMisses; #endif } @@ -158,7 +162,7 @@ struct PerfEventValue UInt64 time_running = 0; }; -static constexpr size_t NUMBER_OF_RAW_EVENTS = 18; +static constexpr size_t NUMBER_OF_RAW_EVENTS = 20; struct PerfDescriptorsHolder : boost::noncopyable { diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 04265734ce7..e3e695f80f9 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -191,6 +191,10 @@ void ThreadStatus::finalizePerformanceCounters() performance_counters_finalized = true; updatePerformanceCounters(); + // We want to close perf file descriptors if the perf events were enabled for + // one query. What this code does in practice is less clear -- e.g., if I run + // 'select 1 settings metrics_perf_events_enabled = 1', I still get + // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*. bool close_perf_descriptors = true; if (query_context) close_perf_descriptors = !query_context->getSettingsRef().metrics_perf_events_enabled; From 9b9030caad1c20bc271946a0fb2ee3313223a3ee Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 29 Jun 2020 15:57:36 +0300 Subject: [PATCH 069/330] Update ThreadProfileEvents.cpp --- src/Common/ThreadProfileEvents.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index f009096cfec..0e5d5c78a6f 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -188,8 +188,8 @@ static const PerfEventInfo raw_events_info[] = { CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS), CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS), - CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfInstructionTLBReferences, ACCESS), - CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfInstructionTLBMisses, MISS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS), + CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBMisses, MISS), }; static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS); From 0a455fc0e1fec0f38fc0001266f12281116f8a28 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 29 Jun 2020 16:25:28 +0300 Subject: [PATCH 070/330] Fix tuple nullable comparison. --- src/Functions/FunctionsComparison.h | 60 ++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 1d4fb00cc60..e3e91453fcd 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -931,6 +931,8 @@ private: if (0 == tuple_size) throw Exception("Comparison of zero-sized tuples is not implemented.", ErrorCodes::NOT_IMPLEMENTED); + ColumnsWithTypeAndName convolution_types(tuple_size); + Block tmp_block; for (size_t i = 0; i < tuple_size; ++i) { @@ -938,9 +940,10 @@ private: tmp_block.insert(y[i]); auto impl = func_compare->build({x[i], y[i]}); + convolution_types[i].type = impl->getReturnType(); /// Comparison of the elements. - tmp_block.insert({ nullptr, std::make_shared(), "" }); + tmp_block.insert({ nullptr, impl->getReturnType(), "" }); impl->execute(tmp_block, {i * 3, i * 3 + 1}, i * 3 + 2, input_rows_count); } @@ -952,14 +955,13 @@ private: } /// Logical convolution. - tmp_block.insert({ nullptr, std::make_shared(), "" }); ColumnNumbers convolution_args(tuple_size); for (size_t i = 0; i < tuple_size; ++i) convolution_args[i] = i * 3 + 2; - ColumnsWithTypeAndName convolution_types(convolution_args.size(), { nullptr, std::make_shared(), "" }); auto impl = func_convolution->build(convolution_types); + tmp_block.insert({ nullptr, impl->getReturnType(), "" }); impl->execute(tmp_block, convolution_args, tuple_size * 3, input_rows_count); block.getByPosition(result).column = tmp_block.getByPosition(tuple_size * 3).column; @@ -981,46 +983,71 @@ private: ColumnsWithTypeAndName bin_args = {{ nullptr, std::make_shared(), "" }, { nullptr, std::make_shared(), "" }}; - auto func_and_adaptor = func_and->build(bin_args); - auto func_or_adaptor = func_or->build(bin_args); - Block tmp_block; /// Pairwise comparison of the inequality of all elements; on the equality of all elements except the last. + /// (x[i], y[i], x[i] < y[i], x[i] == y[i]) for (size_t i = 0; i < tuple_size; ++i) { tmp_block.insert(x[i]); tmp_block.insert(y[i]); - tmp_block.insert({ nullptr, std::make_shared(), "" }); + tmp_block.insert(ColumnWithTypeAndName()); // pos == i * 4 + 2 if (i + 1 != tuple_size) { auto impl_head = func_compare_head->build({x[i], y[i]}); + tmp_block.getByPosition(i * 4 + 2).type = impl_head->getReturnType(); impl_head->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count); - tmp_block.insert({ nullptr, std::make_shared(), "" }); + tmp_block.insert(ColumnWithTypeAndName()); // i * 4 + 3 auto impl_equals = func_equals->build({x[i], y[i]}); + tmp_block.getByPosition(i * 4 + 3).type = impl_equals->getReturnType(); impl_equals->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 3, input_rows_count); } else { auto impl_tail = func_compare_tail->build({x[i], y[i]}); + tmp_block.getByPosition(i * 4 + 2).type = impl_tail->getReturnType(); impl_tail->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count); } } /// Combination. Complex code - make a drawing. It can be replaced by a recursive comparison of tuples. + /// Last column contains intermediate result. + /// Code is generally equivalent to: + /// res = `x < y`[tuple_size - 1]; + /// for (int i = tuple_size - 2; i >= 0; --i) + /// res = (res && `x == y`[i]) || `x < y`[i]; size_t i = tuple_size - 1; while (i > 0) { - tmp_block.insert({ nullptr, std::make_shared(), "" }); - func_and_adaptor->execute(tmp_block, {tmp_block.columns() - 2, (i - 1) * 4 + 3}, tmp_block.columns() - 1, input_rows_count); - tmp_block.insert({ nullptr, std::make_shared(), "" }); - func_or_adaptor->execute(tmp_block, {tmp_block.columns() - 2, (i - 1) * 4 + 2}, tmp_block.columns() - 1, input_rows_count); --i; + + size_t and_lhs_pos = tmp_block.columns() - 1; // res + size_t and_rhs_pos = i * 4 + 3; // `x == y`[i] + tmp_block.insert(ColumnWithTypeAndName()); + + ColumnsWithTypeAndName and_args = {{ nullptr, tmp_block.getByPosition(and_lhs_pos).type, "" }, + { nullptr, tmp_block.getByPosition(and_rhs_pos).type, "" }}; + + auto func_and_adaptor = func_and->build(and_args); + tmp_block.getByPosition(tmp_block.columns() - 1).type = func_and_adaptor->getReturnType(); + func_and_adaptor->execute(tmp_block, {and_lhs_pos, and_rhs_pos}, tmp_block.columns() - 1, input_rows_count); + + size_t or_lhs_pos = tmp_block.columns() - 1; // (res && `x == y`[i]) + size_t or_rhs_pos = i * 4 + 2; // `x < y`[i] + tmp_block.insert(ColumnWithTypeAndName()); + + ColumnsWithTypeAndName or_args = {{ nullptr, tmp_block.getByPosition(or_lhs_pos).type, "" }, + { nullptr, tmp_block.getByPosition(or_rhs_pos).type, "" }}; + + auto func_or_adaptor = func_or->build(bin_args); + tmp_block.getByPosition(tmp_block.columns() - 1).type = func_or_adaptor->getReturnType(); + func_or_adaptor->execute(tmp_block, {or_lhs_pos, or_rhs_pos}, tmp_block.columns() - 1, input_rows_count); + } block.getByPosition(result).column = tmp_block.getByPosition(tmp_block.columns() - 1).column; @@ -1109,13 +1136,20 @@ public: auto adaptor = FunctionOverloadResolverAdaptor(std::make_unique( FunctionComparison::create(context))); + bool has_nullable = false; + size_t size = left_tuple->getElements().size(); for (size_t i = 0; i < size; ++i) { ColumnsWithTypeAndName args = {{nullptr, left_tuple->getElements()[i], ""}, {nullptr, right_tuple->getElements()[i], ""}}; - adaptor.build(args); + has_nullable = has_nullable || adaptor.build(args)->getReturnType()->isNullable(); } + + /// If any element comparison is nullable, return type will also be nullable. + /// We useDefaultImplementationForNulls, but it doesn't work for tuples. + if (has_nullable) + return std::make_shared(std::make_shared()); } return std::make_shared(); From eb27814fbe9d6e3151fcd170d2d17a168176b3dc Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 28 Jun 2020 21:39:44 +0300 Subject: [PATCH 071/330] Fix access rights: cannot grant INTROSPECTION when allow_introspection_functions=0. --- src/Access/ContextAccess.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index e7bd0f8287d..62aebfd4367 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -431,7 +431,7 @@ boost::shared_ptr ContextAccess::calculateResultAccess(bool if (!allow_ddl_) merged_access->revoke(table_and_dictionary_ddl); - if (!allow_introspection_ && !grant_option) + if (!allow_introspection_) merged_access->revoke(AccessType::INTROSPECTION); /// Anyone has access to the "system" database. From 6d67c77e68b1d4809f2583694c8e70e41b05fe49 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 29 Jun 2020 17:09:51 +0300 Subject: [PATCH 072/330] fixup --- docker/test/performance-comparison/report.py | 4 ++++ tests/performance/float_formatting.xml | 7 +++++- tests/performance/set.xml | 23 +++++++++++++++----- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 227722a8bea..826ec60c9ef 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -7,6 +7,7 @@ import csv import itertools import json import os +import os.path import pprint import sys import traceback @@ -324,6 +325,9 @@ if args.report == 'main': print_test_times() def print_benchmark_results(): + if not os.path.isfile('benchmark/website-left.json'): + return + json_reports = [json.load(open(f'benchmark/website-{x}.json')) for x in ['left', 'right']] stats = [next(iter(x.values()))["statistics"] for x in json_reports] qps = [x["QPS"] for x in stats] diff --git a/tests/performance/float_formatting.xml b/tests/performance/float_formatting.xml index 18289fe2862..4588f048d20 100644 --- a/tests/performance/float_formatting.xml +++ b/tests/performance/float_formatting.xml @@ -1,4 +1,9 @@ - + + expr diff --git a/tests/performance/set.xml b/tests/performance/set.xml index f6eeffec6e7..09301d5637c 100644 --- a/tests/performance/set.xml +++ b/tests/performance/set.xml @@ -1,24 +1,37 @@ - table + table_small zeros(10000000) zeros_mt(100000000) - size + table_large + + zeros(100000000) + zeros_mt(1000000000) + + + + size_large + + 1048576 + 10000000 + + + + size_small 1 16 1024 16384 - 1048576 - 10000000 - SELECT count() FROM {table} WHERE rand64() IN (SELECT number FROM system.numbers LIMIT {size}) + SELECT count() FROM {table_large} WHERE rand64() IN (SELECT number FROM system.numbers LIMIT {size_small}) + SELECT count() FROM {table_small} WHERE rand64() IN (SELECT number FROM system.numbers LIMIT {size_large}) From 877c10f5fabc6a51c4b9068d4e4b049e30a50c19 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 29 Jun 2020 17:13:16 +0300 Subject: [PATCH 073/330] Update ThreadProfileEvents.cpp --- src/Common/ThreadProfileEvents.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index 0e5d5c78a6f..c63050792c2 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -180,7 +180,7 @@ static const PerfEventInfo raw_events_info[] = { SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations), SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults), SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults), - + // Don't add them -- they are the same as SoftPageFaults and HardPageFaults, // match well numerically. // SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor), From 6bf9d670becaef33c7db254e6482e8d7cd6cd81e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 29 Jun 2020 18:22:21 +0300 Subject: [PATCH 074/330] Fix test. --- src/Functions/FunctionsComparison.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index e3e91453fcd..72930be8c63 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -980,9 +980,6 @@ private: size_t tuple_size, size_t input_rows_count) { - ColumnsWithTypeAndName bin_args = {{ nullptr, std::make_shared(), "" }, - { nullptr, std::make_shared(), "" }}; - Block tmp_block; /// Pairwise comparison of the inequality of all elements; on the equality of all elements except the last. @@ -1044,7 +1041,7 @@ private: ColumnsWithTypeAndName or_args = {{ nullptr, tmp_block.getByPosition(or_lhs_pos).type, "" }, { nullptr, tmp_block.getByPosition(or_rhs_pos).type, "" }}; - auto func_or_adaptor = func_or->build(bin_args); + auto func_or_adaptor = func_or->build(or_args); tmp_block.getByPosition(tmp_block.columns() - 1).type = func_or_adaptor->getReturnType(); func_or_adaptor->execute(tmp_block, {or_lhs_pos, or_rhs_pos}, tmp_block.columns() - 1, input_rows_count); @@ -1169,7 +1166,7 @@ public: /// NOTE: Nullable types are special case. /// (BTW, this function use default implementation for Nullable, so Nullable types cannot be here. Check just in case.) /// NOTE: We consider NaN comparison to be implementation specific (and in our implementation NaNs are sometimes equal sometimes not). - if (left_type->equals(*right_type) && !left_type->isNullable() && col_left_untyped == col_right_untyped) + if (left_type->equals(*right_type) && !left_type->isNullable() && !isTuple(left_type) && col_left_untyped == col_right_untyped) { /// Always true: =, <=, >= if constexpr (std::is_same_v, EqualsOp> From 840d5185b90be94abe392217f2a4ce38ede8aa28 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 29 Jun 2020 18:32:28 +0300 Subject: [PATCH 075/330] Added test. --- .../01353_nullable_tuple.reference | 92 +++++++++++++++ .../0_stateless/01353_nullable_tuple.sql | 107 ++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 tests/queries/0_stateless/01353_nullable_tuple.reference create mode 100644 tests/queries/0_stateless/01353_nullable_tuple.sql diff --git a/tests/queries/0_stateless/01353_nullable_tuple.reference b/tests/queries/0_stateless/01353_nullable_tuple.reference new file mode 100644 index 00000000000..b7939182780 --- /dev/null +++ b/tests/queries/0_stateless/01353_nullable_tuple.reference @@ -0,0 +1,92 @@ +single argument +1 +0 +1 +0 +1 +0 +- 1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +1 +1 +1 +- 2 +1 +1 +1 +0 +0 +0 +0 +0 +1 +1 +1 +1 +- 3 +1 +1 +1 +1 +1 +1 +- 4 +\N +\N +\N +\N +\N +\N +two arguments +1 +1 +1 +1 +1 +1 +- 1 +0 +0 +0 +0 +0 +0 +- 2 +1 +1 +1 +1 +1 +1 +- 3 +\N +\N +\N +\N +\N +1 +\N +\N +0 +many arguments +1 +1 +0 +0 +1 +0 +1 +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/01353_nullable_tuple.sql b/tests/queries/0_stateless/01353_nullable_tuple.sql new file mode 100644 index 00000000000..f757e2c42d7 --- /dev/null +++ b/tests/queries/0_stateless/01353_nullable_tuple.sql @@ -0,0 +1,107 @@ +select 'single argument'; +select tuple(number) = tuple(number) from numbers(1); +select tuple(number) = tuple(number + 1) from numbers(1); +select tuple(toNullable(number)) = tuple(number) from numbers(1); +select tuple(toNullable(number)) = tuple(number + 1) from numbers(1); +select tuple(toNullable(number)) = tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number)) = tuple(toNullable(number + 1)) from numbers(1); +select '- 1'; +select tuple(toNullable(number)) < tuple(number + 1) from numbers(1); +select tuple(number) < tuple(toNullable(number + 1)) from numbers(1); +select tuple(toNullable(number)) < tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number)) > tuple(number + 1) from numbers(1); +select tuple(number) > tuple(toNullable(number + 1)) from numbers(1); +select tuple(toNullable(number)) > tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number + 1)) < tuple(number) from numbers(1); +select tuple(number + 1) < tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number + 1)) < tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number + 1)) > tuple(number) from numbers(1); +select tuple(number + 1) > tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number + 1)) > tuple(toNullable(number)) from numbers(1); + +select '- 2'; +select tuple(toNullable(number)) <= tuple(number + 1) from numbers(1); +select tuple(number) <= tuple(toNullable(number + 1)) from numbers(1); +select tuple(toNullable(number)) <= tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number)) >= tuple(number + 1) from numbers(1); +select tuple(number) > tuple(toNullable(number + 1)) from numbers(1); +select tuple(toNullable(number)) >= tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number + 1)) <= tuple(number) from numbers(1); +select tuple(number + 1) <= tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number + 1)) <= tuple(toNullable(number + 1)) from numbers(1); + +select tuple(toNullable(number + 1)) >= tuple(number) from numbers(1); +select tuple(number + 1) >= tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number + 1)) >= tuple(toNullable(number)) from numbers(1); + +select '- 3'; +select tuple(toNullable(number)) <= tuple(number) from numbers(1); +select tuple(number) <= tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number)) <= tuple(toNullable(number)) from numbers(1); + +select tuple(toNullable(number)) >= tuple(number) from numbers(1); +select tuple(number) >= tuple(toNullable(number)) from numbers(1); +select tuple(toNullable(number)) >= tuple(toNullable(number)) from numbers(1); + +select '- 4'; +select tuple(number) = tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(materialize(toUInt64OrNull(''))) = tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(number) <= tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(materialize(toUInt64OrNull(''))) <= tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(number) >= tuple(materialize(toUInt64OrNull(''))) from numbers(1); +select tuple(materialize(toUInt64OrNull(''))) >= tuple(materialize(toUInt64OrNull(''))) from numbers(1); + +select 'two arguments'; +select tuple(toNullable(number), number) = tuple(number, number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) = tuple(number, number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) = tuple(toNullable(number), number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) = tuple(toNullable(number), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) = tuple(toNullable(number), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) = tuple(toNullable(number), number) from numbers(1); + +select '- 1'; +select tuple(toNullable(number), number) < tuple(number, number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(number, number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(toNullable(number), number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(toNullable(number), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) < tuple(toNullable(number), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) < tuple(toNullable(number), number) from numbers(1); + +select '- 2'; +select tuple(toNullable(number), number) < tuple(number, number + 1) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(number, number + 1) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(toNullable(number + 1), number) from numbers(1); +select tuple(toNullable(number), toNullable(number)) < tuple(toNullable(number + 1), toNullable(number)) from numbers(1); +select tuple(number, toNullable(number)) < tuple(toNullable(number), toNullable(number + 1)) from numbers(1); +select tuple(number, toNullable(number)) < tuple(toNullable(number), number + 1) from numbers(1); + +select '- 3'; +select tuple(materialize(toUInt64OrNull('')), number) = tuple(number, number) from numbers(1); +select tuple(materialize(toUInt64OrNull('')), number) = tuple(number, toUInt64OrNull('')) from numbers(1); +select tuple(materialize(toUInt64OrNull('')), toUInt64OrNull('')) = tuple(toUInt64OrNull(''), toUInt64OrNull('')) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) < tuple(number, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) <= tuple(number, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) < tuple(number + 1, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) > tuple(number, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) >= tuple(number, number) from numbers(1); +select tuple(number, materialize(toUInt64OrNull(''))) > tuple(number + 1, number) from numbers(1); + +select 'many arguments'; +select tuple(toNullable(number), number, number) = tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), materialize('a'), number) = tuple(number, materialize('a'), number) from numbers(1); +select tuple(toNullable(number), materialize('a'), number) = tuple(number, materialize('a'), number + 1) from numbers(1); +select tuple(toNullable(number), number, number) < tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), number, number) <= tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), materialize('a'), number) < tuple(number, materialize('a'), number) from numbers(1); +select tuple(toNullable(number), materialize('a'), number) < tuple(number, materialize('a'), number + 1) from numbers(1); +select tuple(toNullable(number), number, materialize(toUInt64OrNull(''))) = tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), materialize('a'), materialize(toUInt64OrNull(''))) = tuple(number, materialize('a'), number) from numbers(1); +select tuple(toNullable(number), materialize('a'), materialize(toUInt64OrNull(''))) = tuple(number, materialize('a'), number + 1) from numbers(1); +select tuple(toNullable(number), number, materialize(toUInt64OrNull(''))) <= tuple(number, number, number) from numbers(1); +select tuple(toNullable(number), materialize('a'), materialize(toUInt64OrNull(''))) <= tuple(number, materialize('a'), number) from numbers(1); +select tuple(toNullable(number), materialize('a'), materialize(toUInt64OrNull(''))) <= tuple(number, materialize('a'), number + 1) from numbers(1); From fd9b4168717a300000aab2c7f4eb8e15af49f6f5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 29 Jun 2020 15:41:17 +0000 Subject: [PATCH 076/330] Fix and simplify code --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 36 ++++++++----------- src/Storages/RabbitMQ/RabbitMQHandler.h | 12 +++---- .../ReadBufferFromRabbitMQConsumer.cpp | 22 +++++------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 15 ++------ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../WriteBufferToRabbitMQProducer.cpp | 28 +++++++++------ .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 5 ++- 7 files changed, 49 insertions(+), 71 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 6d3ed41cf78..2fdb142423f 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -1,10 +1,14 @@ #include +#include #include namespace DB { -static const auto Lock_timeout = 50; +namespace ErrorCodes +{ + extern const int CANNOT_CONNECT_RABBITMQ; +} /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop and handler). @@ -23,15 +27,14 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes if (!connection->usable() || !connection->ready()) { - LOG_ERROR(log, "Connection lost completely"); + throw Exception("Connection error", ErrorCodes::CANNOT_CONNECT_RABBITMQ); } - - stop(); } -void RabbitMQHandler::startLoop() +void RabbitMQHandler::startBackgroundLoop() { + /// stop_loop variable is updated in a separate thread while (!stop_loop) { uv_run(loop, UV_RUN_NOWAIT); @@ -39,24 +42,13 @@ void RabbitMQHandler::startLoop() } -void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) +void RabbitMQHandler::startLoop() { - std::lock_guard lock(mutex_before_event_loop); - uv_run(loop, UV_RUN_NOWAIT); -} - - -void RabbitMQHandler::startProducerLoop() -{ - uv_run(loop, UV_RUN_NOWAIT); -} - - -void RabbitMQHandler::stop() -{ - //std::lock_guard lock(mutex_before_loop_stop); - //uv_stop(loop); - stop_loop = true; + if (starting_loop.try_lock()) + { + uv_run(loop, UV_RUN_NOWAIT); + starting_loop.unlock(); + } } } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 60cfd5c4868..eb358da9425 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -16,22 +16,18 @@ class RabbitMQHandler : public AMQP::LibUvHandler public: RabbitMQHandler(uv_loop_t * evbase_, Poco::Logger * log_); - void onError(AMQP::TcpConnection * connection, const char * message) override; - void startConsumerLoop(std::atomic & loop_started); - void startProducerLoop(); - void stop(); + + void stop() { stop_loop.store(true); } + void startBackgroundLoop(); void startLoop(); private: uv_loop_t * loop; Poco::Logger * log; - std::atomic stop_loop = false, running_loop = false; - + std::atomic stop_loop = false; std::timed_mutex starting_loop; - std::mutex mutex_before_event_loop; - std::mutex mutex_before_loop_stop; }; } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index dbb91bf19c4..78a8b3c69e3 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -30,7 +30,7 @@ namespace ExchangeType static const String HEADERS = "headers"; } -static const auto QUEUE_SIZE = 50000; /// Equals capacity of single rabbitmq queue +static const auto QUEUE_SIZE = 50000; /// Equals capacity of a single rabbitmq queue ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, @@ -60,7 +60,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , local_default_exchange(local_exchange + "_" + ExchangeType::DIRECT) , local_hash_exchange(local_exchange + "_" + ExchangeType::HASH) , stopped(stopped_) - , messages(QUEUE_SIZE) + , messages(QUEUE_SIZE * num_queues) { exchange_type_set = exchange_type != ExchangeType::DEFAULT; @@ -125,7 +125,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() /* Declare client's exchange of the specified type and bind it to hash-exchange (if it is not already hash-exchange), which * will evenly distribute messages between all consumers. (This enables better scaling as without hash-exchange - the only - * option to avoid getting the same messages more than once - is having only one consumer with one queue, which is not good.) + * option to avoid getting the same messages more than once - is having only one consumer with one queue) */ consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) { @@ -243,7 +243,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) }); /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed, because - * if moved there, it must(!) be wrapped inside a channel->onReady callback or any other, otherwise + * if moved there, it must(!) be wrapped inside a channel->onSuccess callback or any other, otherwise * consumer might fail to subscribe and no resubscription will help. */ subscribe(queues.back()); @@ -327,7 +327,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) */ while (!default_bindings_created && !default_bindings_error || (exchange_type_set && !bindings_created && !bindings_error)) { - startEventLoop(loop_started); + startEventLoop(); } } @@ -378,7 +378,7 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() /// These variables are updated in a separate thread. while (count_subscribed != wait_subscribed && !consumer_error) { - startEventLoop(loop_started); + startEventLoop(); } LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); @@ -395,15 +395,9 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() } -void ReadBufferFromRabbitMQConsumer::stopEventLoop() +void ReadBufferFromRabbitMQConsumer::startEventLoop() { - event_handler->stop(); -} - - -void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & loop_started) -{ - event_handler->startConsumerLoop(loop_started); + event_handler->startLoop(); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 21f18491ca4..c9452fb249d 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -44,8 +44,6 @@ public: auto getExchange() const { return exchange_name; } private: - using Messages = std::vector; - ChannelPtr consumer_channel; HandlerPtr event_handler; @@ -70,7 +68,7 @@ private: bool local_exchange_declared = false, local_hash_exchange_declared = false; bool exchange_type_set = false, hash_exchange = false; - std::atomic loop_started = false, consumer_error = false; + std::atomic consumer_error = false; std::atomic count_subscribed = 0, wait_subscribed; ConcurrentBoundedQueue messages; @@ -78,21 +76,12 @@ private: std::vector queues; std::unordered_map subscribed_queue; - /* Note: as all consumers share the same connection => they also share the same - * event loop, which can be started by any consumer and the loop is blocking only to the thread that - * started it, and the loop executes ALL active callbacks on the connection => in case num_consumers > 1, - * at most two threads will be present: main thread and the one that executes callbacks (1 thread if - * main thread is the one that started the loop). - */ - std::mutex mutex; - bool nextImpl() override; void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startEventLoop(std::atomic & loop_started); - void stopEventLoop(); + void startEventLoop(); }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 8a704661882..7083da56f9a 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -138,7 +138,7 @@ void StorageRabbitMQ::heartbeatFunc() void StorageRabbitMQ::loopingFunc() { LOG_DEBUG(log, "Starting event looping iterations"); - event_handler->startLoop(); + event_handler->startBackgroundLoop(); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index a4cdc09d4b9..9b6bf39e6cd 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -18,7 +18,7 @@ namespace ErrorCodes extern const int CANNOT_CONNECT_RABBITMQ; } -static const auto QUEUE_SIZE = 100000; +static const auto QUEUE_SIZE = 50000; static const auto CONNECT_SLEEP = 200; static const auto RETRIES_MAX = 1000; static const auto LOOP_WAIT = 10; @@ -47,7 +47,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , delim(delimiter) , max_rows(rows_per_message) , chunk_size(chunk_size_) - , payloads(QUEUE_SIZE) + , payloads(QUEUE_SIZE * num_queues) { loop = std::make_unique(); @@ -127,6 +127,7 @@ void WriteBufferToRabbitMQProducer::countRow() void WriteBufferToRabbitMQProducer::writingFunc() { String payload; + while (!stop_loop || !payloads.empty()) { while (!payloads.empty()) @@ -142,10 +143,7 @@ void WriteBufferToRabbitMQProducer::writingFunc() { producer_channel->publish(exchange_name, routing_key, payload); } - - ++message_counter; } - startEventLoop(); } } @@ -183,7 +181,7 @@ void WriteBufferToRabbitMQProducer::finilizeProducer() if (use_transactional_channel) { - std::atomic answer_received = false; + std::atomic answer_received = false, wait_rollback = false; producer_channel->commitTransaction() .onSuccess([&]() { @@ -193,12 +191,22 @@ void WriteBufferToRabbitMQProducer::finilizeProducer() .onError([&](const char * message) { answer_received = true; - LOG_TRACE(log, "None of messages were publishd: {}", message); - /// Probably should do something here + wait_rollback = true; + LOG_TRACE(log, "Publishing not successful: {}", message); + producer_channel->rollbackTransaction() + .onSuccess([&]() + { + wait_rollback = false; + }) + .onError([&](const char * message) + { + LOG_ERROR(log, "Failed to rollback transaction: {}", message); + wait_rollback = false; + }); }); size_t count_retries = 0; - while (!answer_received && ++count_retries != RETRIES_MAX) + while ((!answer_received || wait_rollback) && ++count_retries != RETRIES_MAX) { startEventLoop(); std::this_thread::sleep_for(std::chrono::milliseconds(LOOP_WAIT)); @@ -217,7 +225,7 @@ void WriteBufferToRabbitMQProducer::nextImpl() void WriteBufferToRabbitMQProducer::startEventLoop() { - event_handler->startProducerLoop(); + event_handler->startLoop(); } } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 2b16403fc44..8339fc0abb3 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -14,8 +14,7 @@ namespace DB { -using ProducerPtr = std::shared_ptr; -using Messages = std::vector; +using ChannelPtr = std::shared_ptr; class WriteBufferToRabbitMQProducer : public WriteBuffer { @@ -60,7 +59,7 @@ private: std::unique_ptr loop; std::unique_ptr event_handler; std::unique_ptr connection; - ProducerPtr producer_channel; + ChannelPtr producer_channel; ConcurrentBoundedQueue payloads; size_t next_queue = 0; From c97d071b4beba23e4e6716e4f8565cffeee34cde Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 29 Jun 2020 19:28:14 +0300 Subject: [PATCH 077/330] Fix test. --- src/Functions/FunctionsComparison.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 72930be8c63..577ea19fe8a 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include From 690f5431f3be132ffe12a88d5cbd215e287512f6 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 29 Jun 2020 20:48:48 +0300 Subject: [PATCH 078/330] trigger From bfc815339f5714c889d3898e80eadec6926f9b54 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 29 Jun 2020 20:54:35 +0300 Subject: [PATCH 079/330] fix yandex sync --- src/Processors/ya.make | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 4e6ec2372da..93caf9b756d 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -38,6 +38,7 @@ SRCS( Formats/Impl/NullFormat.cpp Formats/Impl/ODBCDriver2BlockOutputFormat.cpp Formats/Impl/ODBCDriverBlockOutputFormat.cpp + Formats/Impl/ORCBlockOutputFormat.cpp Formats/Impl/PostgreSQLOutputFormat.cpp Formats/Impl/PrettyBlockOutputFormat.cpp Formats/Impl/PrettyCompactBlockOutputFormat.cpp From 9c94993f1457336a9afd3e86834e3e584021c837 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Jun 2020 09:47:39 +0300 Subject: [PATCH 080/330] Fix "Sharding key is not deterministic" message --- src/Storages/StorageDistributed.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a9d2d6cfdfd..37703f9a719 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -722,7 +722,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons std::stringstream exception_message; if (!has_sharding_key) exception_message << "No sharding key"; - else if (sharding_key_is_deterministic) + else if (!sharding_key_is_deterministic) exception_message << "Sharding key is not deterministic"; else exception_message << "Sharding key " << sharding_key_column_name << " is not used"; From 177d699cde9f042784010daad095134ea34cc6f8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 29 Jun 2020 22:58:05 +0300 Subject: [PATCH 081/330] Do not check *optimize_skip_unused_shards_nesting w/o *optimize_skip_unused_shards This will avoid supperior log message in case of *optimize_skip_unused_shards_nesting already disables it. And also it is logical. --- src/Interpreters/ClusterProxy/executeQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index aac78b755da..451a8873f41 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -28,7 +28,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin new_settings.max_concurrent_queries_for_user.changed = false; new_settings.max_memory_usage_for_user.changed = false; - if (settings.force_optimize_skip_unused_shards_nesting) + if (settings.force_optimize_skip_unused_shards_nesting && settings.force_optimize_skip_unused_shards) { if (new_settings.force_optimize_skip_unused_shards_nesting == 1) { @@ -48,7 +48,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin } } - if (settings.optimize_skip_unused_shards_nesting) + if (settings.optimize_skip_unused_shards_nesting && settings.optimize_skip_unused_shards) { if (new_settings.optimize_skip_unused_shards_nesting == 1) { From e547f6b6d7dd6c85c88a72db58c5968d2c6126bd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Jun 2020 09:47:02 +0300 Subject: [PATCH 082/330] Add example in into dist config --- programs/server/config.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/programs/server/config.xml b/programs/server/config.xml index ca2e6072cd2..a0e6b18df3e 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -262,6 +262,8 @@ + + localhost 9000 From 03def348c8b8eff38fc9c847ce52eadcca3fcb52 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Jun 2020 09:55:01 +0300 Subject: [PATCH 083/330] Sync comments for / in Cluster.h --- src/Interpreters/Cluster.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index 517083d2606..cf4b77177ff 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -44,7 +44,7 @@ public: * * example01-01-1 * 9000 - * + * * * ... * or in and inside in elements: @@ -52,7 +52,7 @@ public: * * example01-01-1 * 9000 - * + * * * */ From ebff4eae7d50d50070c1789afa820928c8a36a01 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Jun 2020 09:52:10 +0300 Subject: [PATCH 084/330] Add replica priority for load_balancing Make default 1, to match with --- programs/server/config.xml | 2 ++ src/Client/ConnectionPool.h | 18 ++++++++++++++---- src/Client/ConnectionPoolWithFailover.cpp | 8 ++++++++ src/Client/ConnectionPoolWithFailover.h | 2 ++ src/Common/PoolWithFailoverBase.h | 9 +++++++-- src/Interpreters/Cluster.cpp | 23 +++++++++++++++-------- src/Interpreters/Cluster.h | 12 ++++++++---- 7 files changed, 56 insertions(+), 18 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index a0e6b18df3e..dc016136290 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -267,6 +267,8 @@ localhost 9000 + + diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index bbdcae894e7..95cb81c8052 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -33,6 +33,8 @@ public: virtual Entry get(const ConnectionTimeouts & timeouts, const Settings * settings = nullptr, bool force_connected = true) = 0; + + virtual Int64 getPriority() const { return 1; } }; using ConnectionPoolPtr = std::shared_ptr; @@ -54,7 +56,8 @@ public: const String & password_, const String & client_name_ = "client", Protocol::Compression compression_ = Protocol::Compression::Enable, - Protocol::Secure secure_ = Protocol::Secure::Disable) + Protocol::Secure secure_ = Protocol::Secure::Disable, + Int64 priority_ = 1) : Base(max_connections_, &Poco::Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")), host(host_), @@ -64,7 +67,8 @@ public: password(password_), client_name(client_name_), compression(compression_), - secure{secure_} + secure(secure_), + priority(priority_) { } @@ -93,6 +97,11 @@ public: return host + ":" + toString(port); } + Int64 getPriority() const override + { + return priority; + } + protected: /** Creates a new object to put in the pool. */ ConnectionPtr allocObject() override @@ -111,8 +120,9 @@ private: String password; String client_name; - Protocol::Compression compression; /// Whether to compress data when interacting with the server. - Protocol::Secure secure; /// Whether to encrypt data when interacting with the server. + Protocol::Compression compression; /// Whether to compress data when interacting with the server. + Protocol::Secure secure; /// Whether to encrypt data when interacting with the server. + Int64 priority; /// priority from }; diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 128a4836a42..5d01a3dd196 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -90,6 +90,14 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority); } +Int64 ConnectionPoolWithFailover::getPriority() const +{ + return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto &a, const auto &b) + { + return a->getPriority() - b->getPriority(); + }))->getPriority(); +} + ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const { const Base::PoolStates states = getPoolStates(); diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 10dea98c8f7..d9baa3aff66 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -47,6 +47,8 @@ public: const Settings * settings, bool force_connected) override; /// From IConnectionPool + Int64 getPriority() const override; /// From IConnectionPool + /** Allocates up to the specified number of connections to work. * Connections provide access to different replicas of one shard. */ diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 5b4e736a907..cee747d106f 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -64,6 +64,8 @@ public: , shared_pool_states(nested_pools.size()) , log(log_) { + for (size_t i = 0;i < nested_pools.size(); ++i) + shared_pool_states[i].config_priority = nested_pools[i]->getPriority(); } struct TryResult @@ -304,6 +306,9 @@ template struct PoolWithFailoverBase::PoolState { UInt64 error_count = 0; + /// Priority from the configuration. + Int64 config_priority = 1; + /// Priority from the GetPriorityFunc. Int64 priority = 0; UInt32 random = 0; @@ -314,8 +319,8 @@ struct PoolWithFailoverBase::PoolState static bool compare(const PoolState & lhs, const PoolState & rhs) { - return std::forward_as_tuple(lhs.error_count, lhs.priority, lhs.random) - < std::forward_as_tuple(rhs.error_count, rhs.priority, rhs.random); + return std::forward_as_tuple(lhs.error_count, lhs.config_priority, lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.error_count, rhs.config_priority, rhs.priority, rhs.random); } private: diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index c01d0188e5c..1a12c596f31 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -85,18 +85,20 @@ Cluster::Address::Address(const Poco::Util::AbstractConfiguration & config, cons default_database = config.getString(config_prefix + ".default_database", ""); secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable; compression = config.getBool(config_prefix + ".compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable; + priority = config.getInt(config_prefix + ".priority", 1); const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; is_local = isLocal(config.getInt(port_type, 0)); } -Cluster::Address::Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_) +Cluster::Address::Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_, Int64 priority_) : user(user_), password(password_) { auto parsed_host_port = parseAddress(host_port_, clickhouse_port); host_name = parsed_host_port.first; port = parsed_host_port.second; secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable; + priority = priority_; is_local = isLocal(clickhouse_port); } @@ -208,6 +210,7 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string) address.user = unescapeForFileName(std::string(address_begin, has_pw ? colon : user_pw_end)); address.password = has_pw ? unescapeForFileName(std::string(colon + 1, user_pw_end)) : std::string(); address.default_database = has_db ? unescapeForFileName(std::string(has_db + 1, address_end)) : std::string(); + // address.priority ignored return address; } } @@ -301,7 +304,8 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting settings.distributed_connections_pool_size, address.host_name, address.port, address.default_database, address.user, address.password, - "server", address.compression, address.secure); + "server", address.compression, + address.secure, address.priority); info.pool = std::make_shared( ConnectionPoolPtrs{pool}, settings.load_balancing); @@ -374,7 +378,8 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting settings.distributed_connections_pool_size, replica.host_name, replica.port, replica.default_database, replica.user, replica.password, - "server", replica.compression, replica.secure); + "server", replica.compression, + replica.secure, replica.priority); all_replicas_pools.emplace_back(replica_pool); if (replica.is_local) @@ -413,7 +418,8 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting Cluster::Cluster(const Settings & settings, const std::vector> & names, - const String & username, const String & password, UInt16 clickhouse_port, bool treat_local_as_remote, bool secure) + const String & username, const String & password, UInt16 clickhouse_port, bool treat_local_as_remote, + bool secure, Int64 priority) { UInt32 current_shard_num = 1; @@ -421,7 +427,7 @@ Cluster::Cluster(const Settings & settings, const std::vector(ConnectionPoolPtrs{pool}, settings.load_balancing); info.per_replica_pools = {std::move(pool)}; diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index cf4b77177ff..9633577bf6a 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -28,7 +28,8 @@ public: /// This parameter is needed only to check that some address is local (points to ourself). Cluster(const Settings & settings, const std::vector> & names, const String & username, const String & password, - UInt16 clickhouse_port, bool treat_local_as_remote, bool secure = false); + UInt16 clickhouse_port, bool treat_local_as_remote, + bool secure = false, Int64 priority = 1); Cluster(const Cluster &)= delete; Cluster & operator=(const Cluster &) = delete; @@ -44,7 +45,7 @@ public: * * example01-01-1 * 9000 - * + * * * ... * or in and inside in elements: @@ -52,7 +53,7 @@ public: * * example01-01-1 * 9000 - * + * * * */ @@ -73,6 +74,8 @@ public: Protocol::Compression compression = Protocol::Compression::Enable; Protocol::Secure secure = Protocol::Secure::Disable; + Int64 priority = 1; + Address() = default; Address( const Poco::Util::AbstractConfiguration & config, @@ -84,7 +87,8 @@ public: const String & user_, const String & password_, UInt16 clickhouse_port, - bool secure_ = false); + bool secure_ = false, + Int64 priority_ = 1); /// Returns 'escaped_host_name:port' String toString() const; From 72db50fe22754ea4be2993774eef789514f570c6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Jun 2020 10:27:00 +0300 Subject: [PATCH 085/330] Drop TODO about possible failures from test_distributed_load_balancing Fixed in: #11669 bd45592539a2351c1547c5e3ef8951bcb13dd1a0 ("Fix test_distributed_load_balancing flaps (due to config reload)") --- tests/integration/test_distributed_load_balancing/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py index 07986de6a85..5c6c1026423 100644 --- a/tests/integration/test_distributed_load_balancing/test.py +++ b/tests/integration/test_distributed_load_balancing/test.py @@ -120,7 +120,6 @@ def test_load_balancing_first_or_random(): assert len(unique_nodes) == 1, unique_nodes assert unique_nodes == set(['n1']) -# TODO: last_used will be reset on config reload, hence may fail def test_load_balancing_round_robin(): unique_nodes = set() for _ in range(0, nodes): From beb341275cc68ecbf6c80a315d29dc792db4587b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Jun 2020 10:29:47 +0300 Subject: [PATCH 086/330] Cover in test_distributed_load_balancing --- .../configs/remote_servers.xml | 21 +++++++++++++++++++ .../test_distributed_load_balancing/test.py | 19 +++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml b/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml index 9efd681e74e..3e3dd00bc9f 100644 --- a/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml +++ b/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml @@ -17,6 +17,27 @@ + + + + n1 + 9000 + 0 + + + n2 + 9000 + + + + n3 + 9000 + 0 + + + + + n1 diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py index 5c6c1026423..5ca5fae9a71 100644 --- a/tests/integration/test_distributed_load_balancing/test.py +++ b/tests/integration/test_distributed_load_balancing/test.py @@ -43,6 +43,13 @@ def bootstrap(): currentDatabase(), data) """.format()) + n.query(""" + CREATE TABLE dist_priority AS data + Engine=Distributed( + replicas_priority_cluster, + currentDatabase(), + data) + """.format()) def make_uuid(): return uuid.uuid4().hex @@ -56,7 +63,7 @@ def start_cluster(): finally: cluster.shutdown() -def get_node(query_node, *args, **kwargs): +def get_node(query_node, table='dist', *args, **kwargs): query_id = make_uuid() settings = { @@ -70,7 +77,7 @@ def get_node(query_node, *args, **kwargs): else: kwargs['settings'].update(settings) - query_node.query('SELECT * FROM dist', *args, **kwargs) + query_node.query('SELECT * FROM ' + table, *args, **kwargs) for n in cluster.instances.values(): n.query('SYSTEM FLUSH LOGS') @@ -127,6 +134,14 @@ def test_load_balancing_round_robin(): assert len(unique_nodes) == nodes, unique_nodes assert unique_nodes == set(['n1', 'n2', 'n3']) +def test_load_balancing_priority_round_robin(): + unique_nodes = set() + for _ in range(0, nodes): + unique_nodes.add(get_node(n1, 'dist_priority', settings={'load_balancing': 'round_robin'})) + assert len(unique_nodes) == 2, unique_nodes + # n2 has bigger priority in config + assert unique_nodes == set(['n1', 'n3']) + def test_distributed_replica_max_ignored_errors(): settings = { 'load_balancing': 'in_order', From 3395276748c5046e77b38e5197a6f19a3d33dc60 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Jun 2020 10:47:17 +0300 Subject: [PATCH 087/330] Add replica priority into documentation --- docs/en/engines/table-engines/special/distributed.md | 2 ++ docs/ru/engines/table-engines/special/distributed.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 3446f820a71..12e60512e47 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -50,6 +50,8 @@ Clusters are set like this: false + + 1 example01-01-1 9000 diff --git a/docs/ru/engines/table-engines/special/distributed.md b/docs/ru/engines/table-engines/special/distributed.md index cc88bab6264..53c5a02e752 100644 --- a/docs/ru/engines/table-engines/special/distributed.md +++ b/docs/ru/engines/table-engines/special/distributed.md @@ -44,6 +44,8 @@ logs - имя кластера в конфигурационном файле с false + + 1 example01-01-1 9000 From b2712aa24224be910811a03e012d8b4ba5f3c16e Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 29 Jun 2020 23:48:18 +0300 Subject: [PATCH 088/330] disable orc in arcadia at all --- src/Processors/Formats/Impl/ORCBlockOutputFormat.h | 3 +++ src/Processors/ya.make | 1 - tests/queries/0_stateless/arcadia_skip_list.txt | 6 ++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 8d2f5d76d53..ce599dabe23 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -1,5 +1,8 @@ #pragma once + +#if !defined(ARCADIA_BUILD) #include "config_formats.h" +#endif #if USE_ORC #include diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 93caf9b756d..4e6ec2372da 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -38,7 +38,6 @@ SRCS( Formats/Impl/NullFormat.cpp Formats/Impl/ODBCDriver2BlockOutputFormat.cpp Formats/Impl/ODBCDriverBlockOutputFormat.cpp - Formats/Impl/ORCBlockOutputFormat.cpp Formats/Impl/PostgreSQLOutputFormat.cpp Formats/Impl/PrettyBlockOutputFormat.cpp Formats/Impl/PrettyCompactBlockOutputFormat.cpp diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 2b61d384b00..a4b69391cdb 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -57,6 +57,7 @@ 01044_h3_edge_angle 01046_materialized_view_with_join_over_distributed 01050_clickhouse_dict_source_with_subquery +01053_ssd_dictionary 01059_storage_file_brotli 01070_h3_get_base_cell 01070_h3_hex_area_m2 @@ -114,6 +115,7 @@ 01273_h3EdgeAngle_range_check 01274_alter_rename_column_distributed 01276_system_licenses +01280_ssd_complex_key_dictionary 01291_distributed_low_cardinality_memory_efficient 01292_create_user 01293_show_clusters @@ -121,7 +123,7 @@ 01294_system_distributed_on_cluster 01297_alter_distributed 01303_aggregate_function_nothing_serde +01307_orc_output_format +01308_orc_output_format_arrays 01319_query_formatting_in_server_log 01326_build_id -01053_ssd_dictionary -01280_ssd_complex_key_dictionary From 6fa1204698bfaed1d0f407a09c41bdff8ae46fbd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jun 2020 00:10:34 +0300 Subject: [PATCH 089/330] Fix function if with FixedString arguments of different sizes --- src/Functions/if.cpp | 26 ++++++++++++++----- .../01355_if_fixed_string.reference | 20 ++++++++++++++ .../0_stateless/01355_if_fixed_string.sql | 2 ++ 3 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/01355_if_fixed_string.reference create mode 100644 tests/queries/0_stateless/01355_if_fixed_string.sql diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 593cf56caf8..ecee5590b35 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -432,8 +432,7 @@ private: const PaddedPODArray & cond_data = cond_col->getData(); size_t rows = cond_data.size(); - if ((col_then_fixed || col_then_const_fixed) - && (col_else_fixed || col_else_const_fixed)) + if (isFixedString(block.getByPosition(result).type)) { /// The result is FixedString. @@ -448,16 +447,19 @@ private: else if (col_then_const_fixed && col_else_fixed) conditional(ConstSource(*col_then_const_fixed), FixedStringSource(*col_else_fixed), sink, cond_data); else if (col_then_const_fixed && col_else_const_fixed) - conditional(ConstSource(*col_then_const_fixed), ConstSource(*col_else_const_fixed), sink, cond_data); + conditional(ConstSource(*col_then_const_fixed), + ConstSource(*col_else_const_fixed), sink, cond_data); + else + return false; block.getByPosition(result).column = std::move(col_res_untyped); return true; } - if ((col_then || col_then_const || col_then_fixed || col_then_const_fixed) - && (col_else || col_else_const || col_else_fixed || col_else_const_fixed)) + if (isString(block.getByPosition(result).type)) { /// The result is String. + auto col_res = ColumnString::create(); auto sink = StringSink(*col_res, rows); @@ -485,6 +487,17 @@ private: conditional(ConstSource(*col_then_const), ConstSource(*col_else_const_fixed), sink, cond_data); else if (col_then_const_fixed && col_else_const) conditional(ConstSource(*col_then_const_fixed), ConstSource(*col_else_const), sink, cond_data); + if (col_then_fixed && col_else_fixed) + conditional(FixedStringSource(*col_then_fixed), FixedStringSource(*col_else_fixed), sink, cond_data); + else if (col_then_fixed && col_else_const_fixed) + conditional(FixedStringSource(*col_then_fixed), ConstSource(*col_else_const_fixed), sink, cond_data); + else if (col_then_const_fixed && col_else_fixed) + conditional(ConstSource(*col_then_const_fixed), FixedStringSource(*col_else_fixed), sink, cond_data); + else if (col_then_const_fixed && col_else_const_fixed) + conditional(ConstSource(*col_then_const_fixed), + ConstSource(*col_else_const_fixed), sink, cond_data); + else + return false; block.getByPosition(result).column = std::move(col_res); return true; @@ -590,7 +603,8 @@ private: return true; } - static void executeGeneric(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) + static void executeGeneric( + const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { /// Convert both columns to the common type (if needed). diff --git a/tests/queries/0_stateless/01355_if_fixed_string.reference b/tests/queries/0_stateless/01355_if_fixed_string.reference new file mode 100644 index 00000000000..b638fb63d8f --- /dev/null +++ b/tests/queries/0_stateless/01355_if_fixed_string.reference @@ -0,0 +1,20 @@ +0\0\0\0\0 String +1\0 String +-2\0\0\0 String +3\0 String +-4\0\0\0 String +5\0 String +-6\0\0\0 String +7\0 String +-8\0\0\0 String +9\0 String +0\0 FixedString(2) +1\0 FixedString(2) +-2 FixedString(2) +3\0 FixedString(2) +-4 FixedString(2) +5\0 FixedString(2) +-6 FixedString(2) +7\0 FixedString(2) +-8 FixedString(2) +9\0 FixedString(2) diff --git a/tests/queries/0_stateless/01355_if_fixed_string.sql b/tests/queries/0_stateless/01355_if_fixed_string.sql new file mode 100644 index 00000000000..237d2d84daf --- /dev/null +++ b/tests/queries/0_stateless/01355_if_fixed_string.sql @@ -0,0 +1,2 @@ +SELECT if(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 5)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; +SELECT if(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 2)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; From 0bf7a8c554cd8b8b0b2664f1845e0a3881a75add Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jun 2020 00:14:38 +0300 Subject: [PATCH 090/330] Add multiIf --- src/Functions/multiIf.cpp | 2 +- .../01355_if_fixed_string.reference | 20 +++++++++++++++++++ .../0_stateless/01355_if_fixed_string.sql | 3 +++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index b57c9f6316a..5f3f62fe6cb 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -39,6 +39,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } + ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override { ColumnNumbers args; @@ -70,7 +71,6 @@ public: throw Exception{"Invalid number of arguments for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - for_conditions([&](const DataTypePtr & arg) { const IDataType * nested_type; diff --git a/tests/queries/0_stateless/01355_if_fixed_string.reference b/tests/queries/0_stateless/01355_if_fixed_string.reference index b638fb63d8f..43c8af518b4 100644 --- a/tests/queries/0_stateless/01355_if_fixed_string.reference +++ b/tests/queries/0_stateless/01355_if_fixed_string.reference @@ -18,3 +18,23 @@ 7\0 FixedString(2) -8 FixedString(2) 9\0 FixedString(2) +0 String +1 String +-2 String +3 String +-4 String +5 String +-6 String +7 String +-8 String +9 String +0\0 FixedString(2) +1\0 FixedString(2) +-2 FixedString(2) +3\0 FixedString(2) +-4 FixedString(2) +5\0 FixedString(2) +-6 FixedString(2) +7\0 FixedString(2) +-8 FixedString(2) +9\0 FixedString(2) diff --git a/tests/queries/0_stateless/01355_if_fixed_string.sql b/tests/queries/0_stateless/01355_if_fixed_string.sql index 237d2d84daf..a0afcc5f197 100644 --- a/tests/queries/0_stateless/01355_if_fixed_string.sql +++ b/tests/queries/0_stateless/01355_if_fixed_string.sql @@ -1,2 +1,5 @@ SELECT if(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 5)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; SELECT if(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 2)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; + +SELECT multiIf(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 5)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; +SELECT multiIf(number % 2, toFixedString(toString(number), 2), toFixedString(toString(-number), 2)) AS x, toTypeName(x) FROM system.numbers LIMIT 10; From 1194863b3d238b565c24ff3cf4f07713bfe0986a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2020 01:49:23 +0300 Subject: [PATCH 091/330] Fix error --- src/Functions/if.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index ecee5590b35..6e46a03c69a 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -487,7 +487,7 @@ private: conditional(ConstSource(*col_then_const), ConstSource(*col_else_const_fixed), sink, cond_data); else if (col_then_const_fixed && col_else_const) conditional(ConstSource(*col_then_const_fixed), ConstSource(*col_else_const), sink, cond_data); - if (col_then_fixed && col_else_fixed) + else if (col_then_fixed && col_else_fixed) conditional(FixedStringSource(*col_then_fixed), FixedStringSource(*col_else_fixed), sink, cond_data); else if (col_then_fixed && col_else_const_fixed) conditional(FixedStringSource(*col_then_fixed), ConstSource(*col_else_const_fixed), sink, cond_data); From 84fa7fa3d89f8420b3cf7044165838d8f04fcf45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2020 01:51:49 +0300 Subject: [PATCH 092/330] Remove test that is not supported by "Arcadia" build system --- tests/queries/0_stateless/arcadia_skip_list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 2b61d384b00..3995f905332 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -125,3 +125,4 @@ 01326_build_id 01053_ssd_dictionary 01280_ssd_complex_key_dictionary +01354_order_by_tuple_collate_const From d97cb1e7b4c918a871326280ed6a06cb8e6b8378 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2020 01:54:20 +0300 Subject: [PATCH 093/330] Fix constraints check for constant columns --- .../CheckConstraintsBlockOutputStream.cpp | 88 +++++++++++++------ src/Interpreters/InterpreterCreateQuery.cpp | 1 - 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 8e075e5bf08..542cecf4dd2 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -14,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int VIOLATED_CONSTRAINT; + extern const int LOGICAL_ERROR; } @@ -40,46 +42,74 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) for (size_t i = 0; i < expressions.size(); ++i) { auto constraint_expr = expressions[i]; - constraint_expr->execute(block_to_calculate); + + auto * constraint_ptr = constraints.constraints[i]->as(); + ColumnWithTypeAndName res_column = block_to_calculate.getByPosition(block_to_calculate.columns() - 1); - const ColumnUInt8 & res_column_uint8 = assert_cast(*res_column.column); - const UInt8 * data = res_column_uint8.getData().data(); - size_t size = res_column_uint8.size(); + if (!isUInt8(res_column.type)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Constraint {} does not return a value of type UInt8", + backQuote(constraint_ptr->name)); - /// Is violated. - if (!memoryIsByte(data, size, 1)) + if (const ColumnConst * res_const = typeid_cast(res_column.column.get())) { - size_t row_idx = 0; - for (; row_idx < size; ++row_idx) - if (data[row_idx] != 1) - break; + UInt8 value = res_const->getValue(); - Names related_columns = constraint_expr->getRequiredColumns(); - - std::stringstream exception_message; - - auto * constraint_ptr = constraints.constraints[i]->as(); - exception_message << "Constraint " << backQuote(constraint_ptr->name) - << " for table " << table_id.getNameForLogs() - << " is violated at row " << (rows_written + row_idx + 1) - << ". Expression: (" << serializeAST(*(constraint_ptr->expr), true) << ")" - << ". Column values"; - - bool first = true; - for (const auto & name : related_columns) + /// Is violated. + if (!value) { - const IColumn & column = *block.getByName(name).column; - assert(row_idx < column.size()); + Names related_columns = constraint_expr->getRequiredColumns(); - exception_message << (first ? ": " : ", ") - << backQuoteIfNeed(name) << " = " << applyVisitor(FieldVisitorToString(), column[row_idx]); + std::stringstream exception_message; - first = false; + exception_message << "Constraint " << backQuote(constraint_ptr->name) + << " for table " << table_id.getNameForLogs() + << " is violated, because it is a constant expression returning 0." + << " It is most likely an error in table definition."; + + throw Exception{exception_message.str(), ErrorCodes::VIOLATED_CONSTRAINT}; } + } + else + { + const ColumnUInt8 & res_column_uint8 = assert_cast(*res_column.column); - throw Exception{exception_message.str(), ErrorCodes::VIOLATED_CONSTRAINT}; + const UInt8 * data = res_column_uint8.getData().data(); + size_t size = res_column_uint8.size(); + + /// Is violated. + if (!memoryIsByte(data, size, 1)) + { + size_t row_idx = 0; + for (; row_idx < size; ++row_idx) + if (data[row_idx] != 1) + break; + + Names related_columns = constraint_expr->getRequiredColumns(); + + std::stringstream exception_message; + + exception_message << "Constraint " << backQuote(constraint_ptr->name) + << " for table " << table_id.getNameForLogs() + << " is violated at row " << (rows_written + row_idx + 1) + << ". Expression: (" << serializeAST(*(constraint_ptr->expr), true) << ")" + << ". Column values"; + + bool first = true; + for (const auto & name : related_columns) + { + const IColumn & column = *block.getByName(name).column; + assert(row_idx < column.size()); + + exception_message << (first ? ": " : ", ") + << backQuoteIfNeed(name) << " = " << applyVisitor(FieldVisitorToString(), column[row_idx]); + + first = false; + } + + throw Exception{exception_message.str(), ErrorCodes::VIOLATED_CONSTRAINT}; + } } } } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 3e09d728c4c..503807be0a7 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -325,7 +325,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( const auto tmp_column_name = final_column_name + "_tmp"; const auto * data_type_ptr = column_names_and_types.back().type.get(); - default_expr_list->children.emplace_back( setAlias(addTypeConversionToAST(std::make_shared(tmp_column_name), data_type_ptr->getName()), final_column_name)); From 15cddc3c6e659cec18f4b16f7fc4cd3b3b36a1cb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2020 02:17:20 +0300 Subject: [PATCH 094/330] Make topK return Enum for Enum types --- src/AggregateFunctions/AggregateFunctionTopK.cpp | 3 ++- src/AggregateFunctions/AggregateFunctionTopK.h | 2 +- tests/queries/0_stateless/01353_topk_enum.reference | 1 + tests/queries/0_stateless/01353_topk_enum.sql | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01353_topk_enum.reference create mode 100644 tests/queries/0_stateless/01353_topk_enum.sql diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 7f2da260c2d..344ab340d62 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -100,7 +100,8 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const threshold = k; } - AggregateFunctionPtr res(createWithNumericType(*argument_types[0], threshold, load_factor, argument_types, params)); + AggregateFunctionPtr res(createWithNumericType( + *argument_types[0], threshold, load_factor, argument_types, params)); if (!res) res = AggregateFunctionPtr(createWithExtraTypes(argument_types[0], threshold, load_factor, params)); diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 68317d0bdf0..f77fc482685 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -47,7 +47,7 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared(std::make_shared>()); + return std::make_shared(this->argument_types[0]); } void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/tests/queries/0_stateless/01353_topk_enum.reference b/tests/queries/0_stateless/01353_topk_enum.reference new file mode 100644 index 00000000000..d650850c434 --- /dev/null +++ b/tests/queries/0_stateless/01353_topk_enum.reference @@ -0,0 +1 @@ +['test','world','hello',''] diff --git a/tests/queries/0_stateless/01353_topk_enum.sql b/tests/queries/0_stateless/01353_topk_enum.sql new file mode 100644 index 00000000000..ba048401b23 --- /dev/null +++ b/tests/queries/0_stateless/01353_topk_enum.sql @@ -0,0 +1 @@ +WITH CAST(round(sqrt(number)) % 4 AS Enum('' = 0, 'hello' = 1, 'world' = 2, 'test' = 3)) AS x SELECT topK(10)(x) FROM numbers(1000); From 987e64acfe789689b8710add5d04371b8f21b604 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 30 Jun 2020 02:28:25 +0300 Subject: [PATCH 095/330] parse metadata in parallel when loading tables --- src/Databases/DatabaseOnDisk.cpp | 21 ++++++- src/Databases/DatabaseOrdinary.cpp | 4 +- .../01193_metadata_loading.reference | 5 ++ .../0_stateless/01193_metadata_loading.sh | 59 +++++++++++++++++++ 4 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01193_metadata_loading.reference create mode 100755 tests/queries/0_stateless/01193_metadata_loading.sh diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 0a16b6eacff..2ad7af9c703 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -389,6 +389,9 @@ void DatabaseOnDisk::iterateMetadataFiles(const Context & context, const Iterati } }; + /// Metadata files to load: name and flag for .tmp_drop files + std::set> metadata_files; + Poco::DirectoryIterator dir_end; for (Poco::DirectoryIterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it) { @@ -404,7 +407,7 @@ void DatabaseOnDisk::iterateMetadataFiles(const Context & context, const Iterati if (endsWith(dir_it.name(), tmp_drop_ext)) { /// There are files that we tried to delete previously - process_tmp_drop_metadata_file(dir_it.name()); + metadata_files.emplace(dir_it.name(), false); } else if (endsWith(dir_it.name(), ".sql.tmp")) { @@ -415,12 +418,26 @@ void DatabaseOnDisk::iterateMetadataFiles(const Context & context, const Iterati else if (endsWith(dir_it.name(), ".sql")) { /// The required files have names like `table_name.sql` - process_metadata_file(dir_it.name()); + metadata_files.emplace(dir_it.name(), true); } else throw Exception("Incorrect file extension: " + dir_it.name() + " in metadata directory " + getMetadataPath(), ErrorCodes::INCORRECT_FILE_NAME); } + + /// Read and parse metadata in parallel + ThreadPool pool(SettingMaxThreads().getAutoValue()); + for (const auto & file : metadata_files) + { + pool.scheduleOrThrowOnError([&]() + { + if (file.second) + process_metadata_file(file.first); + else + process_tmp_drop_metadata_file(file.first); + }); + } + pool.wait(); } ASTPtr DatabaseOnDisk::parseQueryFromMetadata(Poco::Logger * loger, const Context & context, const String & metadata_file_path, bool throw_on_error /*= true*/, bool remove_empty /*= false*/) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index eec58ed9b33..277588b59f0 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -121,11 +121,12 @@ void DatabaseOrdinary::loadStoredObjects( * which does not correspond to order tables creation and does not correspond to order of their location on disk. */ using FileNames = std::map; + std::mutex file_names_mutex; FileNames file_names; size_t total_dictionaries = 0; - auto process_metadata = [&context, &file_names, &total_dictionaries, this](const String & file_name) + auto process_metadata = [&context, &file_names, &total_dictionaries, &file_names_mutex, this](const String & file_name) { String full_path = getMetadataPath() + file_name; try @@ -134,6 +135,7 @@ void DatabaseOrdinary::loadStoredObjects( if (ast) { auto * create_query = ast->as(); + std::lock_guard lock{file_names_mutex}; file_names[file_name] = ast; total_dictionaries += create_query->is_dictionary; } diff --git a/tests/queries/0_stateless/01193_metadata_loading.reference b/tests/queries/0_stateless/01193_metadata_loading.reference new file mode 100644 index 00000000000..8ff246325ac --- /dev/null +++ b/tests/queries/0_stateless/01193_metadata_loading.reference @@ -0,0 +1,5 @@ +10000 0 2020-06-25 hello [1,2] [3,4] +10000 1 2020-06-26 word [10,20] [30,40] +ok +8000 0 2020-06-25 hello [1,2] [3,4] +8000 1 2020-06-26 word [10,20] [30,40] diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh new file mode 100755 index 00000000000..de74b3ec1af --- /dev/null +++ b/tests/queries/0_stateless/01193_metadata_loading.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +# it is the worst way of making performance test, nevertheless it can detect significant slowdown and some other issues, that usually found by stress test + +db="test_01193_$RANDOM" + +declare -A engines +engines[0]="Memory" +engines[1]="File(CSV)" +engines[2]="Log" +engines[3]="StripeLog" +engines[4]="MergeTree ORDER BY i" + +tables=1000 +threads=10 +count_multiplier=1 +max_time_ms=1000 + +debug_or_sanitizer_build=`$CLICKHOUSE_CLIENT -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%'"` + +if [[ debug_or_sanitizer_build -eq 1 ]]; then tables=100; count_multiplier=10; max_time_ms=1500; fi + +create_tables() { + for i in $(seq 1 $tables); do + engine=${engines[$((i % ${#engines[@]}))]} + $CLICKHOUSE_CLIENT -q "CREATE TABLE $db.table_$1_$i (i UInt64, d Date, s String, n Nested(i UInt8, f Float32)) ENGINE=$engine" + $CLICKHOUSE_CLIENT -q "INSERT INTO $db.table_$1_$i VALUES (0, '2020-06-25', 'hello', [1, 2], [3, 4]), (1, '2020-06-26', 'word', [10, 20], [30, 40])" + done +} + +$CLICKHOUSE_CLIENT -q "CREATE DATABASE $db" + +for i in $(seq 1 $threads); do + create_tables $i & +done +wait + +$CLICKHOUSE_CLIENT -q "CREATE TABLE $db.table_merge (i UInt64, d Date, s String, n Nested(i UInt8, f Float32)) ENGINE=Merge('$db', '^table_')" +#FIXME the following query leads to segfault +#$CLICKHOUSE_CLIENT -q "SELECT count() * $count_multiplier, i, d, s, n.i, n.f FROM $db.table_merge GROUP BY i, d, s, n.i, n.f ORDER BY i" +$CLICKHOUSE_CLIENT -q "SELECT 10000, i, d, s, n.i, n.f FROM $db.table_1_1 GROUP BY i, d, s, n.i, n.f ORDER BY i" + +db_engine=`$CLICKHOUSE_CLIENT -q "SELECT engine FROM system.databases WHERE name='$db'"` + +$CLICKHOUSE_CLIENT -q "DETACH DATABASE $db" + +# get real time, grep seconds, remove point, remove leading zeros +elapsed_ms=`{ time $CLICKHOUSE_CLIENT -q "ATTACH DATABASE $db ENGINE=$db_engine"; } 2>&1 | grep real | grep -Po "0m\K[0-9\.]*" | tr -d '.' | sed "s/^0*//"` +$CLICKHOUSE_CLIENT -q "SELECT '01193_metadata_loading', $elapsed_ms FORMAT Null" # it will be printed to server log + +if [[ $elapsed_ms -le $max_time_ms ]]; then echo ok; fi + +#$CLICKHOUSE_CLIENT -q "SELECT count() * $count_multiplier, i, d, s, n.i, n.f FROM $db.table_merge GROUP BY i, d, s, n.i, n.f ORDER BY i" +$CLICKHOUSE_CLIENT -q "SELECT 8000, i, d, s, n.i, n.f FROM $db.table_1_1 GROUP BY i, d, s, n.i, n.f ORDER BY i" + +$CLICKHOUSE_CLIENT -q "DROP DATABASE $db" From a23df81dd5572e8efdf40adbb2edbaa35e09fd71 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2020 03:53:30 +0300 Subject: [PATCH 096/330] Fix error --- src/DataStreams/CheckConstraintsBlockOutputStream.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 542cecf4dd2..7a67074dbdf 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -59,8 +59,6 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) /// Is violated. if (!value) { - Names related_columns = constraint_expr->getRequiredColumns(); - std::stringstream exception_message; exception_message << "Constraint " << backQuote(constraint_ptr->name) From f797efb04c089635aed5a65bffdfa6ef7f909aa6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 30 Jun 2020 01:48:11 +0000 Subject: [PATCH 097/330] Fix build --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 4 ++-- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 4 ---- src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 5 ++--- src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h | 1 - 6 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 2fdb142423f..0af4918762b 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -35,7 +35,7 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes void RabbitMQHandler::startBackgroundLoop() { /// stop_loop variable is updated in a separate thread - while (!stop_loop) + while (!stop_loop.load()) { uv_run(loop, UV_RUN_NOWAIT); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index eb358da9425..0ffcd028e1b 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -15,7 +15,7 @@ class RabbitMQHandler : public AMQP::LibUvHandler { public: - RabbitMQHandler(uv_loop_t * evbase_, Poco::Logger * log_); + RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; void stop() { stop_loop.store(true); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 78a8b3c69e3..6ae2e6afeed 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -34,7 +34,7 @@ static const auto QUEUE_SIZE = 50000; /// Equals capacity of a single rabbitmq q ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, - HandlerPtr eventHandler_, + HandlerPtr event_handler_, const String & exchange_name_, const Names & routing_keys_, const size_t channel_id_, @@ -47,7 +47,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) - , event_handler(eventHandler_) + , event_handler(event_handler_) , exchange_name(exchange_name_) , routing_keys(routing_keys_) , channel_id(channel_id_) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 7083da56f9a..7426e939bec 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -37,10 +37,6 @@ namespace DB { -enum -{ -}; - static const auto CONNECT_SLEEP = 200; static const auto RETRIES_MAX = 1000; static const auto RESCHEDULE_MS = 500; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 9b6bf39e6cd..2539728aab3 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -153,9 +153,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() { std::atomic exchange_declared = false, exchange_error = false; - /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name - * and makes it declared on the current producer_channel. - */ + /// The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name. producer_channel->declareExchange(exchange_name, AMQP::direct, AMQP::passive) .onSuccess([&]() { @@ -177,6 +175,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() void WriteBufferToRabbitMQProducer::finilizeProducer() { + /// This will make sure everything is published checkExchange(); if (use_transactional_channel) diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 8339fc0abb3..7d7f571ab3e 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -63,7 +63,6 @@ private: ConcurrentBoundedQueue payloads; size_t next_queue = 0; - UInt64 message_counter = 0; Poco::Logger * log; const std::optional delim; From 8128ca10f436871c674c51b0a0ae74e9c88f0485 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 23 Jun 2020 19:00:15 +0300 Subject: [PATCH 098/330] Switched paths in S3 metadata to relative. --- src/Disks/S3/DiskS3.cpp | 66 +++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 71b5991f770..873b54353ad 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -20,6 +20,9 @@ #include #include +#include + + namespace DB { @@ -60,10 +63,14 @@ namespace struct Metadata { /// Metadata file version. - static constexpr UInt32 VERSION = 1; + static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; + static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; using PathAndSize = std::pair; + /// S3 root path. + const String & s3_root_path; + /// Disk path. const String & disk_path; /// Relative path to metadata file on local FS. @@ -76,8 +83,8 @@ namespace UInt32 ref_count; /// Load metadata by path or create empty if `create` flag is set. - explicit Metadata(const String & disk_path_, const String & metadata_file_path_, bool create = false) - : disk_path(disk_path_), metadata_file_path(metadata_file_path_), total_size(0), s3_objects(0), ref_count(0) + explicit Metadata(const String & s3_root_path_, const String & disk_path_, const String & metadata_file_path_, bool create = false) + : s3_root_path(s3_root_path_), disk_path(disk_path_), metadata_file_path(metadata_file_path_), total_size(0), s3_objects(0), ref_count(0) { if (create) return; @@ -87,10 +94,10 @@ namespace UInt32 version; readIntText(version, buf); - if (version != VERSION) + if (version != VERSION_RELATIVE_PATHS && version != VERSION_ABSOLUTE_PATHS) throw Exception( "Unknown metadata file version. Path: " + disk_path + metadata_file_path - + " Version: " + std::to_string(version) + ", Expected version: " + std::to_string(VERSION), + + " Version: " + std::to_string(version) + ", Maximum expected version: " + std::to_string(VERSION_RELATIVE_PATHS), ErrorCodes::UNKNOWN_FORMAT); assertChar('\n', buf); @@ -108,6 +115,15 @@ namespace readIntText(s3_object_size, buf); assertChar('\t', buf); readEscapedString(s3_object_path, buf); + if (version == VERSION_ABSOLUTE_PATHS) + { + if (!boost::algorithm::starts_with(s3_object_path, s3_root_path)) + throw Exception( + "Path in metadata does not correspond S3 root path. Path: " + s3_object_path + + ", root path: " + s3_root_path + ", disk path: " + disk_path_, + ErrorCodes::UNKNOWN_FORMAT); + s3_object_path = s3_object_path.substr(s3_root_path.size()); + } assertChar('\n', buf); s3_objects[i] = {s3_object_path, s3_object_size}; } @@ -127,7 +143,7 @@ namespace { WriteBufferFromFile buf(disk_path + metadata_file_path, 1024); - writeIntText(VERSION, buf); + writeIntText(VERSION_RELATIVE_PATHS, buf); writeChar('\n', buf); writeIntText(s3_objects.size(), buf); @@ -213,7 +229,7 @@ namespace const auto & [path, size] = metadata.s3_objects[i]; if (size > offset) { - auto buf = std::make_unique(client_ptr, bucket, path, buf_size); + auto buf = std::make_unique(client_ptr, bucket, metadata.s3_root_path + path, buf_size); buf->seek(offset, SEEK_SET); return buf; } @@ -242,7 +258,7 @@ namespace ++current_buf_idx; const auto & path = metadata.s3_objects[current_buf_idx].first; - current_buf = std::make_unique(client_ptr, bucket, path, buf_size); + current_buf = std::make_unique(client_ptr, bucket, metadata.s3_root_path + path, buf_size); current_buf->next(); working_buffer = current_buf->buffer(); absolute_position += working_buffer.size(); @@ -272,7 +288,7 @@ namespace size_t min_upload_part_size, size_t buf_size_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) - , impl(WriteBufferFromS3(client_ptr_, bucket_, s3_path_, min_upload_part_size, buf_size_)) + , impl(WriteBufferFromS3(client_ptr_, bucket_, metadata_.s3_root_path + s3_path_, min_upload_part_size, buf_size_)) , metadata(std::move(metadata_)) , s3_path(s3_path_) { @@ -440,7 +456,7 @@ bool DiskS3::isDirectory(const String & path) const size_t DiskS3::getFileSize(const String & path) const { - Metadata metadata(metadata_path, path); + Metadata metadata(s3_root_path, metadata_path, path); return metadata.total_size; } @@ -493,16 +509,16 @@ void DiskS3::copyFile(const String & from_path, const String & to_path) if (exists(to_path)) remove(to_path); - Metadata from(metadata_path, from_path); - Metadata to(metadata_path, to_path, true); + Metadata from(s3_root_path, metadata_path, from_path); + Metadata to(s3_root_path, metadata_path, to_path, true); for (const auto & [path, size] : from.s3_objects) { - auto new_path = s3_root_path + getRandomName(); + auto new_path = getRandomName(); Aws::S3::Model::CopyObjectRequest req; - req.SetCopySource(bucket + "/" + path); + req.SetCopySource(bucket + "/" + s3_root_path + path); req.SetBucket(bucket); - req.SetKey(new_path); + req.SetKey(s3_root_path + new_path); throwIfError(client->CopyObject(req)); to.addObject(new_path, size); @@ -513,7 +529,7 @@ void DiskS3::copyFile(const String & from_path, const String & to_path) std::unique_ptr DiskS3::readFile(const String & path, size_t buf_size, size_t, size_t, size_t) const { - Metadata metadata(metadata_path, path); + Metadata metadata(s3_root_path, metadata_path, path); LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Read from file by path: {}. Existing S3 objects: {}", backQuote(metadata_path + path), metadata.s3_objects.size()); @@ -525,27 +541,27 @@ std::unique_ptr DiskS3::writeFile(const String & path, { bool exist = exists(path); /// Path to store new S3 object. - auto s3_path = s3_root_path + getRandomName(); + auto s3_path = getRandomName(); if (!exist || mode == WriteMode::Rewrite) { /// If metadata file exists - remove and create new. if (exist) remove(path); - Metadata metadata(metadata_path, path, true); + Metadata metadata(s3_root_path, metadata_path, path, true); /// Save empty metadata to disk to have ability to get file size while buffer is not finalized. metadata.save(); - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write to file by path: {} New S3 path: {}", backQuote(metadata_path + path), s3_path); + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write to file by path: {} New S3 path: {}", backQuote(metadata_path + path), s3_root_path + s3_path); return std::make_unique(client, bucket, metadata, s3_path, min_upload_part_size, buf_size); } else { - Metadata metadata(metadata_path, path); + Metadata metadata(s3_root_path, metadata_path, path); LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Append to file by path: {}. New S3 path: {}. Existing S3 objects: {}.", - backQuote(metadata_path + path), s3_path, metadata.s3_objects.size()); + backQuote(metadata_path + path), s3_root_path + s3_path, metadata.s3_objects.size()); return std::make_unique(client, bucket, metadata, s3_path, min_upload_part_size, buf_size); } @@ -558,7 +574,7 @@ void DiskS3::remove(const String & path) Poco::File file(metadata_path + path); if (file.isFile()) { - Metadata metadata(metadata_path, path); + Metadata metadata(s3_root_path, metadata_path, path); /// If there is no references - delete content from S3. if (metadata.ref_count == 0) @@ -569,7 +585,7 @@ void DiskS3::remove(const String & path) /// TODO: Make operation idempotent. Do not throw exception if key is already deleted. Aws::S3::Model::DeleteObjectRequest request; request.SetBucket(bucket); - request.SetKey(s3_object_path); + request.SetKey(s3_root_path + s3_object_path); throwIfError(client->DeleteObject(request)); } } @@ -644,7 +660,7 @@ Poco::Timestamp DiskS3::getLastModified(const String & path) void DiskS3::createHardLink(const String & src_path, const String & dst_path) { /// Increment number of references. - Metadata src(metadata_path, src_path); + Metadata src(s3_root_path, metadata_path, src_path); ++src.ref_count; src.save(); @@ -655,7 +671,7 @@ void DiskS3::createHardLink(const String & src_path, const String & dst_path) void DiskS3::createFile(const String & path) { /// Create empty metadata file. - Metadata metadata(metadata_path, path, true); + Metadata metadata(s3_root_path, metadata_path, path, true); metadata.save(); } From bfa9a8efb2e0bb476112e7278150bdc6fa185982 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 30 Jun 2020 08:55:37 +0300 Subject: [PATCH 099/330] fixup --- tests/performance/sum_map.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml index cfad530652c..29ef169c25e 100644 --- a/tests/performance/sum_map.xml +++ b/tests/performance/sum_map.xml @@ -22,7 +22,7 @@ - CREATE TEMPORARY TABLE sum_map_{scale} AS + CREATE TABLE sum_map_{scale} AS SELECT arrayMap(x -> (x % 23), range(50)) AS key, arrayMap(x -> intDiv(number, x + 1), range(50)) AS val From 1d838b7b3ac6b80ca7ce47f43202ffbd710b1de8 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 30 Jun 2020 09:56:11 +0300 Subject: [PATCH 100/330] Update ThreadProfileEvents.cpp --- src/Common/ThreadProfileEvents.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index c63050792c2..0a0b89aa4bf 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -188,6 +188,9 @@ static const PerfEventInfo raw_events_info[] = { CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS), CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS), + + // Apparently it doesn't make sense to treat these values as relative: + // https://stackoverflow.com/questions/49933319/how-to-interpret-perf-itlb-loads-itlb-load-misses CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS), CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBMisses, MISS), }; From 072d0f9c1f4b556d56bde550644b55dc3bc17fc0 Mon Sep 17 00:00:00 2001 From: chengy8934 <67622393+chengy8934@users.noreply.github.com> Date: Tue, 30 Jun 2020 15:00:30 +0800 Subject: [PATCH 101/330] Update browse-code.md (#12047) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update browse-code.md 纠正了一些翻译错误。 * Update browse-code.md Co-authored-by: Ivan Blinkov --- docs/zh/development/browse-code.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/zh/development/browse-code.md b/docs/zh/development/browse-code.md index 222c773c774..49da72a63aa 100644 --- a/docs/zh/development/browse-code.md +++ b/docs/zh/development/browse-code.md @@ -1,14 +1,12 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 63 toc_title: "\u6D4F\u89C8\u6E90\u4EE3\u7801" --- # 浏览ClickHouse源代码 {#browse-clickhouse-source-code} -您可以使用 **Woboq** 在线代码浏览器可用 [这里](https://clickhouse.tech/codebrowser/html_report/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示,搜索和索引。 代码快照每天更新。 +您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.tech/codebrowser/html_report/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。 -此外,您还可以浏览源 [GitHub](https://github.com/ClickHouse/ClickHouse) 像往常一样 +此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse) -如果你有兴趣使用什么样的IDE,我们建议CLion,QT Creator,VS Code和KDevelop(有注意事项)。 您可以使用任何喜欢的IDE。 Vim和Emacs也算数。 +如果你希望了解哪种IDE较好,我们推荐使用CLion,QT Creator,VS Code和KDevelop(有注意事项)。 您可以使用任何您喜欢的IDE。 Vim和Emacs也可以。 From e8921c9c818022b9d7e1d0ddbd3f48bcc3569c44 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Tue, 30 Jun 2020 07:22:15 +0000 Subject: [PATCH 102/330] Bump python-slugify from 1.2.6 to 4.0.1 in /docs/tools Bumps [python-slugify](https://github.com/un33k/python-slugify) from 1.2.6 to 4.0.1. - [Release notes](https://github.com/un33k/python-slugify/releases) - [Changelog](https://github.com/un33k/python-slugify/blob/master/CHANGELOG.md) - [Commits](https://github.com/un33k/python-slugify/compare/1.2.6...4.0.1) Signed-off-by: dependabot-preview[bot] --- docs/tools/requirements.txt | 2 +- docs/tools/translate/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index c7186ac1e6a..1b172199839 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -25,7 +25,7 @@ protobuf==3.12.2 numpy==1.18.5 Pygments==2.5.2 pymdown-extensions==7.1 -python-slugify==1.2.6 +python-slugify==4.0.1 PyYAML==5.3.1 repackage==0.7.3 requests==2.24.0 diff --git a/docs/tools/translate/requirements.txt b/docs/tools/translate/requirements.txt index 289cf749b36..24a4f343c08 100644 --- a/docs/tools/translate/requirements.txt +++ b/docs/tools/translate/requirements.txt @@ -5,7 +5,7 @@ googletrans==3.0.0 idna==2.10 Jinja2==2.11.2 pandocfilters==1.4.2 -python-slugify==4.0.0 +python-slugify==4.0.1 PyYAML==5.3.1 requests==2.24.0 text-unidecode==1.3 From aa8461e99667bd26950d52cc71c672a34b10ff7d Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 30 Jun 2020 10:47:19 +0300 Subject: [PATCH 103/330] Update ThreadProfileEvents.cpp --- src/Common/ThreadProfileEvents.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index 0a0b89aa4bf..04c29841e23 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -188,7 +188,7 @@ static const PerfEventInfo raw_events_info[] = { CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS), CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS), - + // Apparently it doesn't make sense to treat these values as relative: // https://stackoverflow.com/questions/49933319/how-to-interpret-perf-itlb-loads-itlb-load-misses CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS), From ffcfcfa257faa3d07a83fb74a130cb4aef550d39 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 30 Jun 2020 11:00:45 +0300 Subject: [PATCH 104/330] Update README.md --- website/blog/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/website/blog/README.md b/website/blog/README.md index 4b2e122608d..c99f293f669 100644 --- a/website/blog/README.md +++ b/website/blog/README.md @@ -38,10 +38,14 @@ tags: ['meetup','Beijing','China','events'] ![ClickHouse branded Beijing duck](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/9.jpg) ``` +## How To Preview My Post? + +Use [deploy-to-test.sh](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/deploy-to-test.sh) script. Note that on the first use you'll need to follow the steps in its first comment, and [install prerequisites for build.py](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/README.md#use-buildpy-use-build-py). Alternatively, you can use `--livereload=N` argument of [build.py](https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/build.py). + ## How To Add a New Blog Language? If you want to write a guest post, you are welcome to use your native language or make multiple posts in multiple languages - Unlike documentation, blog languages are independent, i.e. they have partially overlapping sets of posts and it's ok. Most posts are written only in one language because they are not relevant to audiences of other languages. +Unlike documentation, blog languages are independent, i.e. they have partially overlapping sets of posts and it's ok. Most posts are written only in one language because they are not relevant to audiences of other languages. At the moment it's not so straightforward to set up a new language for blog and it won't be documented for now, but you can just create a language directory with the first post as described above and we'll configure the website infrastructure to include it during/after merging the pull-request. From 0eb9c8e4309e8f652a3022a53236a278942069be Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 30 Jun 2020 11:09:06 +0300 Subject: [PATCH 105/330] Add a test with negative priority in test_distributed_load_balancing --- .../configs/remote_servers.xml | 22 +++++++++++++++++++ .../test_distributed_load_balancing/test.py | 15 +++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml b/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml index 3e3dd00bc9f..b424e975bfe 100644 --- a/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml +++ b/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml @@ -27,6 +27,7 @@ n2 9000 + @@ -37,6 +38,27 @@ + + + + n1 + 9000 + -1 + + + n2 + 9000 + + 0 + + + n3 + 9000 + -1 + + + + diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py index 5ca5fae9a71..c538dc7fb3a 100644 --- a/tests/integration/test_distributed_load_balancing/test.py +++ b/tests/integration/test_distributed_load_balancing/test.py @@ -50,6 +50,13 @@ def bootstrap(): currentDatabase(), data) """.format()) + n.query(""" + CREATE TABLE dist_priority_negative AS data + Engine=Distributed( + replicas_priority_negative_cluster, + currentDatabase(), + data) + """.format()) def make_uuid(): return uuid.uuid4().hex @@ -134,10 +141,14 @@ def test_load_balancing_round_robin(): assert len(unique_nodes) == nodes, unique_nodes assert unique_nodes == set(['n1', 'n2', 'n3']) -def test_load_balancing_priority_round_robin(): +@pytest.mark.parametrize('dist_table', [ + ('dist_priority'), + ('dist_priority_negative'), +]) +def test_load_balancing_priority_round_robin(dist_table): unique_nodes = set() for _ in range(0, nodes): - unique_nodes.add(get_node(n1, 'dist_priority', settings={'load_balancing': 'round_robin'})) + unique_nodes.add(get_node(n1, dist_table, settings={'load_balancing': 'round_robin'})) assert len(unique_nodes) == 2, unique_nodes # n2 has bigger priority in config assert unique_nodes == set(['n1', 'n3']) From dd9b02c2fb2fb8e19de489da60c8c409b6185737 Mon Sep 17 00:00:00 2001 From: MicrochipQ Date: Tue, 30 Jun 2020 11:30:06 +0300 Subject: [PATCH 106/330] Fix typo (#12046) --- docs/ru/engines/table-engines/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/index.md b/docs/ru/engines/table-engines/index.md index 580ee10250f..6a954313c60 100644 --- a/docs/ru/engines/table-engines/index.md +++ b/docs/ru/engines/table-engines/index.md @@ -53,7 +53,7 @@ - [Distributed](special/distributed.md#distributed) - [MaterializedView](special/materializedview.md#materializedview) - [Dictionary](special/dictionary.md#dictionary) -- [Merge](special/merge.md#merge +- [Merge](special/merge.md#merge) - [File](special/file.md#file) - [Null](special/null.md#null) - [Set](special/set.md#set) From ace9c99e14668a00147a1bb51f9dd888baa0dfcd Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 28 Jun 2020 21:41:41 +0300 Subject: [PATCH 107/330] Fix test_allow_introspection. --- .../configs/{ => config.d}/remote_servers.xml | 0 .../users.d/allow_introspection_functions.xml | 7 +++++++ .../test_settings_constraints_distributed/test.py | 6 +++--- tests/integration/test_settings_profile/test.py | 15 +++++++++++---- 4 files changed, 21 insertions(+), 7 deletions(-) rename tests/integration/test_settings_constraints_distributed/configs/{ => config.d}/remote_servers.xml (100%) create mode 100644 tests/integration/test_settings_constraints_distributed/configs/users.d/allow_introspection_functions.xml diff --git a/tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml b/tests/integration/test_settings_constraints_distributed/configs/config.d/remote_servers.xml similarity index 100% rename from tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml rename to tests/integration/test_settings_constraints_distributed/configs/config.d/remote_servers.xml diff --git a/tests/integration/test_settings_constraints_distributed/configs/users.d/allow_introspection_functions.xml b/tests/integration/test_settings_constraints_distributed/configs/users.d/allow_introspection_functions.xml new file mode 100644 index 00000000000..ccfdf6a63f6 --- /dev/null +++ b/tests/integration/test_settings_constraints_distributed/configs/users.d/allow_introspection_functions.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index 86456f8a099..7f0f8868bcf 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -8,9 +8,9 @@ from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1') -node2 = cluster.add_instance('node2') -distributed = cluster.add_instance('distributed', main_configs=["configs/remote_servers.xml"], stay_alive=True) +node1 = cluster.add_instance('node1', config_dir="configs") +node2 = cluster.add_instance('node2', config_dir="configs") +distributed = cluster.add_instance('distributed', config_dir="configs", stay_alive=True) @pytest.fixture(scope="module", autouse=True) diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 752aa2da75d..21fdac9da7a 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -177,11 +177,18 @@ def test_allow_ddl(): def test_allow_introspection(): + assert "Introspection functions are disabled" in instance.query_and_get_error("SELECT demangle('a')") assert "Not enough privileges" in instance.query_and_get_error("SELECT demangle('a')", user="robin") - - instance.query("GRANT ALL ON *.* TO robin") - assert "Introspection functions are disabled" in instance.query_and_get_error("SELECT demangle('a')", user="robin") + assert "Not enough privileges" in instance.query_and_get_error("SELECT demangle('a')", user="robin", settings={"allow_introspection_functions":1}) + assert "Introspection functions are disabled" in instance.query_and_get_error("GRANT demangle ON *.* TO robin") + assert "Not enough privileges" in instance.query_and_get_error("GRANT demangle ON *.* TO robin", user="robin") + assert "Not enough privileges" in instance.query_and_get_error("GRANT demangle ON *.* TO robin", user="robin", settings={"allow_introspection_functions":1}) + + assert instance.query("SELECT demangle('a')", settings={"allow_introspection_functions":1}) == "signed char\n" + instance.query("GRANT demangle ON *.* TO robin", settings={"allow_introspection_functions":1}) + + assert "Introspection functions are disabled" in instance.query_and_get_error("SELECT demangle('a')", user="robin") instance.query("ALTER USER robin SETTINGS allow_introspection_functions=1") assert instance.query("SELECT demangle('a')", user="robin") == "signed char\n" @@ -194,5 +201,5 @@ def test_allow_introspection(): instance.query("DROP SETTINGS PROFILE xyz") assert "Introspection functions are disabled" in instance.query_and_get_error("SELECT demangle('a')", user="robin") - instance.query("REVOKE ALL ON *.* FROM robin") + instance.query("REVOKE demangle ON *.* FROM robin", settings={"allow_introspection_functions":1}) assert "Not enough privileges" in instance.query_and_get_error("SELECT demangle('a')", user="robin") From 7c828861dab4f0356e7f049e09cd5a9405e62da0 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 30 Jun 2020 13:18:38 +0300 Subject: [PATCH 108/330] fix arcadia --- src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 90c23d87288..02bd61dbaf9 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -1,6 +1,6 @@ #include -#if USE_ORC +#if USE_ORC && !defined(ARCADIA_BUILD) #include #include From be51741566335cd65c2e3976c664e0f729859a0a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 13:19:06 +0300 Subject: [PATCH 109/330] Try fix labda tuple(LC) argument. --- src/Functions/array/FunctionArrayMapped.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 346f2e35555..20c57ed0ba4 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -73,7 +73,7 @@ public: if (!array_type) throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found " + arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - nested_types[i] = removeLowCardinality(array_type->getNestedType()); + nested_types[i] = array_type->getNestedType(); } const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); @@ -191,8 +191,6 @@ public: if (!column_const_array) throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); column_array_ptr = column_const_array->convertToFullColumn(); - if (column_array_ptr->lowCardinality()) - column_array_ptr = column_array_ptr->convertToFullColumnIfLowCardinality(); column_array = checkAndGetColumn(column_array_ptr.get()); } @@ -218,7 +216,7 @@ public: } arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - removeLowCardinality(array_type->getNestedType()), + array_type->getNestedType(), array_with_type_and_name.name)); } From 41142c5e95ebb048eb33a94a44d7aabbcdecf035 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 30 Jun 2020 13:34:20 +0300 Subject: [PATCH 110/330] Test image changes --- docker/test/pvs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 5a6aea5d320..23a435efee7 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -25,7 +25,7 @@ ENV PKG_VERSION="pvs-studio-7.08.39365.50-amd64.deb" RUN wget "https://files.viva64.com/$PKG_VERSION" RUN sudo dpkg -i "$PKG_VERSION" -CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ +CMD echo "Hello world" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . && ninja re2_st && \ pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \ From 820d2c63dfe20c5c5bc5fa7767408532930beec0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 13:51:17 +0300 Subject: [PATCH 111/330] Try fix labda tuple(LC) argument. --- src/Functions/array/FunctionArrayMapped.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 20c57ed0ba4..2a040f80efe 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -73,7 +73,7 @@ public: if (!array_type) throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found " + arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - nested_types[i] = array_type->getNestedType(); + nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); } const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); @@ -190,7 +190,7 @@ public: const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); if (!column_const_array) throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); - column_array_ptr = column_const_array->convertToFullColumn(); + column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); column_array = checkAndGetColumn(column_array_ptr.get()); } @@ -216,7 +216,7 @@ public: } arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - array_type->getNestedType(), + recursiveRemoveLowCardinality(array_type->getNestedType()), array_with_type_and_name.name)); } From 4262fec1bbe5b0a074e9eabe98367739dee8b663 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 30 Jun 2020 13:52:02 +0300 Subject: [PATCH 112/330] Add pvs studio to images --- docker/images.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/images.json b/docker/images.json index c1174327097..0df7e53a2db 100644 --- a/docker/images.json +++ b/docker/images.json @@ -11,9 +11,14 @@ "docker/packager/binary": { "name": "yandex/clickhouse-binary-builder", "dependent": [ - "docker/test/split_build_smoke_test" + "docker/test/split_build_smoke_test", + "docker/test/pvs" ] }, + "docker/test/pvs": { + "name": "yandex/clickhouse-pvs-test", + "dependent": [] + }, "docker/test/coverage": { "name": "yandex/clickhouse-coverage", "dependent": [] From b5fbd62a6f3c8cc426c82b77702d19e936315059 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 13:56:23 +0300 Subject: [PATCH 113/330] Added test. --- ...01354_tuple_low_cardinality_array_mapped_bug.reference | 2 ++ .../01354_tuple_low_cardinality_array_mapped_bug.sql | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.reference create mode 100644 tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.sql diff --git a/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.reference b/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.sql b/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.sql new file mode 100644 index 00000000000..80a1a7c46eb --- /dev/null +++ b/tests/queries/0_stateless/01354_tuple_low_cardinality_array_mapped_bug.sql @@ -0,0 +1,8 @@ +SELECT arrayExists(x -> ((x.1) = 'pattern'), cast([tuple('a', 1)] as Array(Tuple(LowCardinality(String), UInt8)))); + +DROP TABLE IF EXISTS table; +CREATE TABLE table (id Int32, values Array(Tuple(LowCardinality(String), Int32)), date Date) ENGINE MergeTree() PARTITION BY toYYYYMM(date) ORDER BY (id, date); + +SELECT count(*) FROM table WHERE (arrayExists(x -> ((x.1) = toLowCardinality('pattern')), values) = 1); + +DROP TABLE IF EXISTS table; From 29178e26daa4cb88690058145ae3a5ab5d724c7f Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Tue, 30 Jun 2020 14:13:43 +0300 Subject: [PATCH 114/330] fix low card types in merge join (#12035) --- src/Interpreters/MergeJoin.cpp | 5 +- src/Interpreters/join_common.cpp | 10 +++ src/Interpreters/join_common.h | 1 + ...00800_low_cardinality_merge_join.reference | 41 ++++++++++ .../00800_low_cardinality_merge_join.sql | 30 ++++++++ ...01353_low_cardinality_join_types.reference | 36 +++++++++ .../01353_low_cardinality_join_types.sql | 75 +++++++++++++++++++ 7 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/00800_low_cardinality_merge_join.reference create mode 100644 tests/queries/0_stateless/00800_low_cardinality_merge_join.sql create mode 100644 tests/queries/0_stateless/01353_low_cardinality_join_types.reference create mode 100644 tests/queries/0_stateless/01353_low_cardinality_join_types.sql diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index bb054169e71..1478c36dd23 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -396,7 +396,6 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right if (required_right_keys.count(column.name)) right_columns_to_add.insert(ColumnWithTypeAndName{nullptr, column.type, column.name}); - JoinCommon::removeLowCardinalityInplace(right_columns_to_add); JoinCommon::createMissedColumns(right_columns_to_add); if (nullable_right_side) @@ -513,7 +512,7 @@ bool MergeJoin::saveRightBlock(Block && block) bool MergeJoin::addJoinedBlock(const Block & src_block, bool) { Block block = materializeBlock(src_block); - JoinCommon::removeLowCardinalityInplace(block); + JoinCommon::removeLowCardinalityInplace(block, table_join->keyNamesRight()); sortBlock(block, right_sort_description); return saveRightBlock(std::move(block)); @@ -525,7 +524,7 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) { JoinCommon::checkTypesOfKeys(block, table_join->keyNamesLeft(), right_table_keys, table_join->keyNamesRight()); materializeBlockInplace(block); - JoinCommon::removeLowCardinalityInplace(block); + JoinCommon::removeLowCardinalityInplace(block, table_join->keyNamesLeft()); sortBlock(block, left_sort_description); } diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index 6dd3a202d4d..a17d3b43e69 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -104,6 +104,16 @@ void removeLowCardinalityInplace(Block & block) } } +void removeLowCardinalityInplace(Block & block, const Names & names) +{ + for (const String & column_name : names) + { + auto & col = block.getByName(column_name); + col.column = recursiveRemoveLowCardinality(col.column); + col.type = recursiveRemoveLowCardinality(col.type); + } +} + void splitAdditionalColumns(const Block & sample_block, const Names & key_names, Block & block_keys, Block & block_others) { block_others = materializeBlock(sample_block); diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h index 47fa082e700..81eb0dfa688 100644 --- a/src/Interpreters/join_common.h +++ b/src/Interpreters/join_common.h @@ -20,6 +20,7 @@ Columns materializeColumns(const Block & block, const Names & names); ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); ColumnRawPtrs getRawPointers(const Columns & columns); void removeLowCardinalityInplace(Block & block); +void removeLowCardinalityInplace(Block & block, const Names & names); /// Split key and other columns by keys name list void splitAdditionalColumns(const Block & sample_block, const Names & key_names, Block & block_keys, Block & block_others); diff --git a/tests/queries/0_stateless/00800_low_cardinality_merge_join.reference b/tests/queries/0_stateless/00800_low_cardinality_merge_join.reference new file mode 100644 index 00000000000..8e032c0a542 --- /dev/null +++ b/tests/queries/0_stateless/00800_low_cardinality_merge_join.reference @@ -0,0 +1,41 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +- +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +- +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 \N +0 1 +1 2 +2 \N diff --git a/tests/queries/0_stateless/00800_low_cardinality_merge_join.sql b/tests/queries/0_stateless/00800_low_cardinality_merge_join.sql new file mode 100644 index 00000000000..1181ee453a6 --- /dev/null +++ b/tests/queries/0_stateless/00800_low_cardinality_merge_join.sql @@ -0,0 +1,30 @@ +set join_algorithm = 'partial_merge'; + +select * from (select dummy as val from system.one) s1 any left join (select dummy as val from system.one) s2 using val; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select dummy as val from system.one) s2 using val; +select * from (select dummy as val from system.one) s1 any left join (select toLowCardinality(dummy) as val from system.one) s2 using val; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select toLowCardinality(dummy) as val from system.one) s2 using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select dummy as val from system.one) s2 using val; +select * from (select dummy as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) s2 using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select toLowCardinality(dummy) as val from system.one) s2 using val; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) s2 using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) s2 using val; +select '-'; +select * from (select dummy as val from system.one) s1 any left join (select dummy as val from system.one) s2 on val + 0 = val * 1; -- { serverError 352 } +select * from (select dummy as val from system.one) s1 any left join (select dummy as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select dummy as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select dummy as val from system.one) s1 any left join (select toLowCardinality(dummy) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select toLowCardinality(dummy) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select dummy as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select dummy as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select toLowCardinality(dummy) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as rval from system.one) s2 on val + 0 = rval * 1; +select '-'; +select * from (select number as l from system.numbers limit 3) s1 any left join (select number as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) s1 any left join (select number as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select number as l from system.numbers limit 3) s1 any left join (select toLowCardinality(number) as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) s1 any left join (select toLowCardinality(number) as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(toNullable(number)) as l from system.numbers limit 3) s1 any left join (select toLowCardinality(number) as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) s1 any left join (select toLowCardinality(toNullable(number)) as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(toNullable(number)) as l from system.numbers limit 3) s1 any left join (select toLowCardinality(toNullable(number)) as r from system.numbers limit 3) s2 on l + 1 = r * 1; diff --git a/tests/queries/0_stateless/01353_low_cardinality_join_types.reference b/tests/queries/0_stateless/01353_low_cardinality_join_types.reference new file mode 100644 index 00000000000..85d3f3d598b --- /dev/null +++ b/tests/queries/0_stateless/01353_low_cardinality_join_types.reference @@ -0,0 +1,36 @@ +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) diff --git a/tests/queries/0_stateless/01353_low_cardinality_join_types.sql b/tests/queries/0_stateless/01353_low_cardinality_join_types.sql new file mode 100644 index 00000000000..91ebe97fa48 --- /dev/null +++ b/tests/queries/0_stateless/01353_low_cardinality_join_types.sql @@ -0,0 +1,75 @@ +set join_algorithm = 'hash'; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +full join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +set join_algorithm = 'prefer_partial_merge'; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +full join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; From 8b2a5d81df73cc32453fb7a7d5c53876dfbc20b9 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 30 Jun 2020 14:27:40 +0300 Subject: [PATCH 115/330] Update contrib/poco to ClickHouse-Extras/poco #22 (#12037) * Updated ClickHouse-Extras/poco #22 --- .gitmodules | 2 +- contrib/poco | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 2fed57a519d..c767d4f41fe 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "contrib/poco"] path = contrib/poco - url = https://github.com/ClickHouse-Extras/poco + url = https://github.com/ClickHouse-Extras/poco.git branch = clickhouse [submodule "contrib/zstd"] path = contrib/zstd diff --git a/contrib/poco b/contrib/poco index be2ab90ba5d..74c93443342 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit be2ab90ba5dccd46919a116e3fe4fa77bb85063b +Subproject commit 74c93443342f6028fa6402057684733b316aa737 From e8ee5176cdbc86e0d1a87516ae2f14d90f999136 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 14:29:50 +0300 Subject: [PATCH 116/330] Fix defaultValueOfArgumentType --- src/Functions/defaultValueOfArgumentType.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/defaultValueOfArgumentType.cpp b/src/Functions/defaultValueOfArgumentType.cpp index 85da76ce694..b07a92bd677 100644 --- a/src/Functions/defaultValueOfArgumentType.cpp +++ b/src/Functions/defaultValueOfArgumentType.cpp @@ -22,6 +22,7 @@ public: } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } size_t getNumberOfArguments() const override { From aeb0cdd5445f6f8c47302784805e1ce80497fcb2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 14:36:55 +0300 Subject: [PATCH 117/330] Added test. --- .../01355_defaultValueOfArgumentType_bug.reference | 1 + .../0_stateless/01355_defaultValueOfArgumentType_bug.sql | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.reference create mode 100644 tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql diff --git a/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.reference b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.reference new file mode 100644 index 00000000000..4165503c4b5 --- /dev/null +++ b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.reference @@ -0,0 +1 @@ + LowCardinality(String) diff --git a/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql new file mode 100644 index 00000000000..e3168eb09a0 --- /dev/null +++ b/tests/queries/0_stateless/01355_defaultValueOfArgumentType_bug.sql @@ -0,0 +1,4 @@ +SELECT + materialize(toLowCardinality('')) AS lc, + toTypeName(lc) +WHERE lc = defaultValueOfArgumentType(lc) From b74b27854a14e62063a1498f33eb52922561cb65 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 30 Jun 2020 14:38:09 +0300 Subject: [PATCH 118/330] yet another try --- src/Formats/FormatFactory.cpp | 2 +- src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a8b75baaaa8..f09a13d51ba 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -362,6 +362,7 @@ FormatFactory::FormatFactory() #if !defined(ARCADIA_BUILD) registerInputFormatProcessorCapnProto(*this); registerInputFormatProcessorORC(*this); + registerOutputFormatProcessorORC(*this); registerInputFormatProcessorParquet(*this); registerOutputFormatProcessorParquet(*this); registerInputFormatProcessorArrow(*this); @@ -396,7 +397,6 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorNull(*this); registerOutputFormatProcessorMySQLWire(*this); registerOutputFormatProcessorMarkdown(*this); - registerOutputFormatProcessorORC(*this); registerOutputFormatProcessorPostgreSQLWire(*this); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 02bd61dbaf9..90c23d87288 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -1,6 +1,6 @@ #include -#if USE_ORC && !defined(ARCADIA_BUILD) +#if USE_ORC #include #include From f4869eca41d0c737fd507216a20230cb3d208850 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 30 Jun 2020 15:14:18 +0300 Subject: [PATCH 119/330] Update cpu_synthetic.xml --- tests/performance/cpu_synthetic.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/cpu_synthetic.xml b/tests/performance/cpu_synthetic.xml index 2888f7bbbd6..e08e06f9833 100644 --- a/tests/performance/cpu_synthetic.xml +++ b/tests/performance/cpu_synthetic.xml @@ -21,7 +21,7 @@ PageCharset тоже почти всегда непуст, но его сред SELECT count() FROM hits_10m_single WHERE NOT ignore(sipHash64(SearchPhrase)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(sipHash64(SearchPhrase)) -SELECT count() FROM hits_10m_single WHERE NOT ignore(MD5(SearchPhrase)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(MD5(SearchPhrase)) From bb8da71effb1db4a6b6d19e80edf5ee6cbe862aa Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 23 Jun 2020 19:40:58 +0300 Subject: [PATCH 120/330] Moves task shall be started if new storage policy needs them. --- src/Storages/MergeTree/MergeTreeData.cpp | 52 ++++++++++++--------- src/Storages/MergeTree/MergeTreeData.h | 4 +- src/Storages/StorageMergeTree.cpp | 19 +++++--- src/Storages/StorageMergeTree.h | 2 + src/Storages/StorageReplicatedMergeTree.cpp | 19 +++++--- src/Storages/StorageReplicatedMergeTree.h | 2 + 6 files changed, 63 insertions(+), 35 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e35aa4b1181..1adb245d9e1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1495,6 +1495,8 @@ void MergeTreeData::changeSettings( { if (new_settings) { + bool has_storage_policy_changed = false; + const auto & new_changes = new_settings->as().changes; for (const auto & change : new_changes) @@ -1503,28 +1505,34 @@ void MergeTreeData::changeSettings( StoragePolicyPtr new_storage_policy = global_context.getStoragePolicy(change.value.safeGet()); StoragePolicyPtr old_storage_policy = getStoragePolicy(); - checkStoragePolicy(new_storage_policy); - - std::unordered_set all_diff_disk_names; - for (const auto & disk : new_storage_policy->getDisks()) - all_diff_disk_names.insert(disk->getName()); - for (const auto & disk : old_storage_policy->getDisks()) - all_diff_disk_names.erase(disk->getName()); - - for (const String & disk_name : all_diff_disk_names) + /// StoragePolicy of different version or name is guaranteed to have different pointer + if (new_storage_policy != old_storage_policy) { - auto disk = new_storage_policy->getDiskByName(disk_name); - if (disk->exists(relative_data_path)) - throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); - } + checkStoragePolicy(new_storage_policy); - for (const String & disk_name : all_diff_disk_names) - { - auto disk = new_storage_policy->getDiskByName(disk_name); - disk->createDirectories(relative_data_path); - disk->createDirectories(relative_data_path + "detached"); + std::unordered_set all_diff_disk_names; + for (const auto & disk : new_storage_policy->getDisks()) + all_diff_disk_names.insert(disk->getName()); + for (const auto & disk : old_storage_policy->getDisks()) + all_diff_disk_names.erase(disk->getName()); + + for (const String & disk_name : all_diff_disk_names) + { + auto disk = new_storage_policy->getDiskByName(disk_name); + if (disk->exists(relative_data_path)) + throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); + } + + for (const String & disk_name : all_diff_disk_names) + { + auto disk = new_storage_policy->getDiskByName(disk_name); + disk->createDirectories(relative_data_path); + disk->createDirectories(relative_data_path + "detached"); + } + /// FIXME how would that be done while reloading configuration??? + + has_storage_policy_changed = true; } - /// FIXME how would that be done while reloading configuration??? } MergeTreeSettings copy = *getSettings(); @@ -1533,6 +1541,9 @@ void MergeTreeData::changeSettings( StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); new_metadata.setSettingsChanges(new_settings); setInMemoryMetadata(new_metadata); + + if (has_storage_policy_changed) + startBackgroundMovesIfNeeded(); } } @@ -3291,12 +3302,11 @@ bool MergeTreeData::selectPartsAndMove() bool MergeTreeData::areBackgroundMovesNeeded() const { auto policy = getStoragePolicy(); - auto metadata_snapshot = getInMemoryMetadataPtr(); if (policy->getVolumes().size() > 1) return true; - return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1 && metadata_snapshot->hasAnyMoveTTL(); + return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1; } bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index e46e8b3a646..8fcb879b3ff 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -794,8 +794,6 @@ protected: void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; - void setStoragePolicy(const String & new_storage_policy_name, bool only_check = false); - /// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked. void calculateColumnSizesImpl(); /// Adds or subtracts the contribution of the part to compressed column sizes. @@ -873,6 +871,8 @@ private: CurrentlyMovingPartsTagger checkPartsForMove(const DataPartsVector & parts, SpacePtr space); bool canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason) const; + + virtual void startBackgroundMovesIfNeeded() = 0; }; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c3b5758c7a5..69da73653eb 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -102,12 +102,7 @@ void StorageMergeTree::startup() /// Ensure that thread started only after assignment to 'merging_mutating_task_handle' is done. merge_pool.startTask(merging_mutating_task_handle); - if (areBackgroundMovesNeeded()) - { - auto & move_pool = global_context.getBackgroundMovePool(); - moving_task_handle = move_pool.createTask([this] { return movePartsTask(); }); - move_pool.startTask(moving_task_handle); - } + startBackgroundMovesIfNeeded(); } catch (...) { @@ -464,6 +459,18 @@ bool StorageMergeTree::isMutationDone(Int64 mutation_version) const return true; } + +void StorageMergeTree::startBackgroundMovesIfNeeded() +{ + if (areBackgroundMovesNeeded() && !moving_task_handle) + { + auto & move_pool = global_context.getBackgroundMovePool(); + moving_task_handle = move_pool.createTask([this] { return movePartsTask(); }); + move_pool.startTask(moving_task_handle); + } +} + + std::vector StorageMergeTree::getMutationsStatus() const { std::lock_guard lock(currently_processing_in_background_mutex); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 9a45fd285dc..9418f1a073c 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -159,6 +159,8 @@ private: /// Just checks versions of each active data part bool isMutationDone(Int64 mutation_version) const; + void startBackgroundMovesIfNeeded() override; + friend class MergeTreeBlockOutputStream; friend class MergeTreeData; friend struct CurrentlyMergingPartsTagger; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 96e376f85fe..fb21a567572 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3263,12 +3263,7 @@ void StorageReplicatedMergeTree::startup() pool.startTask(queue_task_handle); } - if (areBackgroundMovesNeeded()) - { - auto & pool = global_context.getBackgroundMovePool(); - move_parts_task_handle = pool.createTask([this] { return movePartsTask(); }); - pool.startTask(move_parts_task_handle); - } + startBackgroundMovesIfNeeded(); } catch (...) { @@ -5702,4 +5697,16 @@ MutationCommands StorageReplicatedMergeTree::getFirtsAlterMutationCommandsForPar { return queue.getFirstAlterMutationCommandsForPart(part); } + + +void StorageReplicatedMergeTree::startBackgroundMovesIfNeeded() +{ + if (areBackgroundMovesNeeded() && !move_parts_task_handle) + { + auto & pool = global_context.getBackgroundMovePool(); + move_parts_task_handle = pool.createTask([this] { return movePartsTask(); }); + pool.startTask(move_parts_task_handle); + } +} + } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 712d997b26c..078b8d90458 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -551,6 +551,8 @@ private: MutationCommands getFirtsAlterMutationCommandsForPart(const DataPartPtr & part) const override; + void startBackgroundMovesIfNeeded() override; + protected: /** If not 'attach', either creates a new table in ZK, or adds a replica to an existing table. */ From 288b407d325f0bdf955af51a269552f98d4ab278 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 30 Jun 2020 08:38:40 +0300 Subject: [PATCH 121/330] Added test for automatic start of background move task. --- .../config.d/storage_configuration.xml | 14 ++++++ tests/integration/test_ttl_move/test.py | 47 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml b/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml index b48de85007a..47bf9f56cdd 100644 --- a/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml +++ b/tests/integration/test_ttl_move/configs/config.d/storage_configuration.xml @@ -40,6 +40,20 @@ + + + + default + +
+ jbod1 +
+ + external + +
+
+
diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 26bd36b8cb6..1894f88029e 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -160,6 +160,53 @@ def test_inserts_to_disk_work(started_cluster, name, engine, positive): pass +@pytest.mark.parametrize("name,engine", [ + ("mt_test_moves_work_after_storage_policy_change","MergeTree()"), + ("replicated_mt_test_moves_work_after_storage_policy_change","ReplicatedMergeTree('/clickhouse/test_moves_work_after_storage_policy_change', '1')"), +]) +def test_moves_work_after_storage_policy_change(started_cluster, name, engine): + try: + node1.query(""" + CREATE TABLE {name} ( + s1 String, + d1 DateTime + ) ENGINE = {engine} + ORDER BY tuple() + """.format(name=name, engine=engine)) + + node1.query("""ALTER TABLE {name} MODIFY SETTING storage_policy='default_with_small_jbod_with_external'""".format(name=name)) + + # Second expression is preferred because d1 > now()-3600. + node1.query("""ALTER TABLE {name} MODIFY TTL now()-3600 TO DISK 'jbod1', d1 TO DISK 'external'""".format(name=name)) + + wait_expire_1 = 12 + wait_expire_2 = 4 + time_1 = time.time() + wait_expire_1 + time_2 = time.time() + wait_expire_1 + wait_expire_2 + + wait_expire_1_thread = threading.Thread(target=time.sleep, args=(wait_expire_1,)) + wait_expire_1_thread.start() + + data = [] # 10MB in total + for i in range(10): + data.append(("'{}'".format(get_random_string(1024 * 1024)), "toDateTime({})".format(time_1))) # 1MB row + + node1.query("INSERT INTO {} (s1, d1) VALUES {}".format(name, ",".join(["(" + ",".join(x) + ")" for x in data]))) + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {"jbod1"} + + wait_expire_1_thread.join() + time.sleep(wait_expire_2/2) + + used_disks = get_used_disks_for_table(node1, name) + assert set(used_disks) == {"external"} + + assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "10" + + finally: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + + @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_moves_to_disk_do_not_work","MergeTree()",0), ("replicated_mt_test_moves_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_do_not_work', '1')",0), From c45a054f65645fbc065be472e64b9bd65b7e6e72 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 30 Jun 2020 16:06:35 +0300 Subject: [PATCH 122/330] fix clang-tidy --- .../Formats/Impl/ORCBlockOutputFormat.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 90c23d87288..3bf2a9dbf59 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -244,53 +244,54 @@ void ORCBlockOutputFormat::writeColumn( { case TypeIndex::Int8: { - writeNumbers(orc_column, column, null_bytemap, [](const Int8 & value){ return value ;}); + /// Note: Explicit cast to avoid clang-tidy error: 'signed char' to 'long' conversion; consider casting to 'unsigned char' first. + writeNumbers(orc_column, column, null_bytemap, [](const Int8 & value){ return static_cast(value); }); break; } case TypeIndex::UInt8: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt8 & value){ return uint8_t(value) ;}); + writeNumbers(orc_column, column, null_bytemap, [](const UInt8 & value){ return value; }); break; } case TypeIndex::Int16: { - writeNumbers(orc_column, column, null_bytemap, [](const Int16 & value){ return value ;}); + writeNumbers(orc_column, column, null_bytemap, [](const Int16 & value){ return value; }); break; } case TypeIndex::Date: [[fallthrough]]; case TypeIndex::UInt16: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt16 & value){ return value ;}); + writeNumbers(orc_column, column, null_bytemap, [](const UInt16 & value){ return value; }); break; } case TypeIndex::Int32: { - writeNumbers(orc_column, column, null_bytemap, [](const Int32 & value){ return value ;}); + writeNumbers(orc_column, column, null_bytemap, [](const Int32 & value){ return value; }); break; } case TypeIndex::UInt32: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt32 & value){ return value ;}); + writeNumbers(orc_column, column, null_bytemap, [](const UInt32 & value){ return value; }); break; } case TypeIndex::Int64: { - writeNumbers(orc_column, column, null_bytemap, [](const Int64 & value){ return value ;}); + writeNumbers(orc_column, column, null_bytemap, [](const Int64 & value){ return value; }); break; } case TypeIndex::UInt64: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt64 & value){ return value ;}); + writeNumbers(orc_column, column, null_bytemap, [](const UInt64 & value){ return value; }); break; } case TypeIndex::Float32: { - writeNumbers(orc_column, column, null_bytemap, [](const Float32 & value){ return value ;}); + writeNumbers(orc_column, column, null_bytemap, [](const Float32 & value){ return value; }); break; } case TypeIndex::Float64: { - writeNumbers(orc_column, column, null_bytemap, [](const Float64 & value){ return value ;}); + writeNumbers(orc_column, column, null_bytemap, [](const Float64 & value){ return value; }); break; } case TypeIndex::FixedString: From 29c528235e0ddae0ac7373b069d8176622759634 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 30 Jun 2020 16:08:12 +0300 Subject: [PATCH 123/330] Add ability to run any image version from packager and runner --- docker/packager/packager | 8 +++++--- tests/integration/runner | 8 +++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docker/packager/packager b/docker/packager/packager index ccb01a4df92..fb076d17b50 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -31,7 +31,7 @@ def pull_image(image_name): def build_image(image_name, filepath): subprocess.check_call("docker build --network=host -t {} -f {} .".format(image_name, filepath), shell=True) -def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir): +def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version): env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -46,7 +46,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache ch_root=ch_root, ccache_dir=ccache_dir, env=env_part, - img_name=image_name, + img_name=image_name + ":" + docker_image_version, interactive=interactive ) @@ -189,6 +189,7 @@ if __name__ == "__main__": parser.add_argument("--alien-pkgs", nargs='+', default=[]) parser.add_argument("--with-coverage", action="store_true") parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="") + parser.add_argument("--docker-image-version", default="latest") args = parser.parse_args() if not os.path.isabs(args.output_dir): @@ -219,5 +220,6 @@ if __name__ == "__main__": args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy, args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries) - run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) + + run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir, args.docker_image_version) logging.info("Output placed into {}".format(args.output_dir)) diff --git a/tests/integration/runner b/tests/integration/runner index 399c87dcf06..a009a1ce647 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -76,6 +76,12 @@ if __name__ == "__main__": default=False, help="Don't use net host in parent docker container") + parser.add_argument( + "--docker-image-version", + default="latest", + help="Version of docker image which runner will use to run tests") + + parser.add_argument('pytest_args', nargs='*', help="args for pytest command") args = parser.parse_args() @@ -106,7 +112,7 @@ if __name__ == "__main__": cfg=args.configs_dir, pth=args.clickhouse_root, opts=' '.join(args.pytest_args), - img=DIND_INTEGRATION_TESTS_IMAGE_NAME, + img=DIND_INTEGRATION_TESTS_IMAGE_NAME + ":" + args.docker_image_version, name=CONTAINER_NAME, command=args.command ) From 66da0733abfc8c5a5ef6b0810f0d095179a7f817 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 17:20:27 +0300 Subject: [PATCH 124/330] Check type of filter for prewhere. --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 0e2dca76f58..2f9230a4067 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -316,6 +316,12 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P prewhere_info->alias_actions->execute(block); prewhere_info->prewhere_actions->execute(block); + auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); + + if (!isInteger(prewhere_column.type)) + throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(), + ErrorCodes::LOGICAL_ERROR); + if (prewhere_info->remove_prewhere_column) block.erase(prewhere_info->prewhere_column_name); else From 18059e92892a26a8cd9ec7d3cd93c3cdfb13d880 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 17:44:49 +0300 Subject: [PATCH 125/330] Check type of filter for prewhere. --- src/Interpreters/ExpressionAnalyzer.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 44aa70b1697..bdfa2754643 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -636,6 +636,11 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( step.required_output.push_back(prewhere_column_name); step.can_remove_required_output.push_back(true); + auto filter_type = step.actions->getSampleBlock().getByName(prewhere_column_name).type; + if (!isInteger(filter_type)) + throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), + ErrorCodes::LOGICAL_ERROR); + { /// Remove unused source_columns from prewhere actions. auto tmp_actions = std::make_shared(sourceColumns(), context); @@ -716,11 +721,17 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); - step.required_output.push_back(select_query->where()->getColumnName()); + auto where_column_name = select_query->where()->getColumnName(); + step.required_output.push_back(where_column_name); step.can_remove_required_output = {true}; getRootActions(select_query->where(), only_types, step.actions); + auto filter_type = step.actions->getSampleBlock().getByName(where_column_name).type; + if (!isInteger(filter_type)) + throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), + ErrorCodes::LOGICAL_ERROR); + return true; } From e3aede57c6a5052539393ad73ced4e9f8c2528f2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 17:48:00 +0300 Subject: [PATCH 126/330] Check type of filter for prewhere. --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index bdfa2754643..717b7cd333b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -729,7 +729,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, auto filter_type = step.actions->getSampleBlock().getByName(where_column_name).type; if (!isInteger(filter_type)) - throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), + throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), ErrorCodes::LOGICAL_ERROR); return true; From ddc231b088bf679edc78c6ea8e29a3935c223582 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 17:58:11 +0300 Subject: [PATCH 127/330] Check type of filter for prewhere. --- src/Interpreters/ExpressionAnalyzer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 717b7cd333b..d8a330de9e1 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -639,7 +639,7 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( auto filter_type = step.actions->getSampleBlock().getByName(prewhere_column_name).type; if (!isInteger(filter_type)) throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), - ErrorCodes::LOGICAL_ERROR); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); { /// Remove unused source_columns from prewhere actions. @@ -730,7 +730,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, auto filter_type = step.actions->getSampleBlock().getByName(where_column_name).type; if (!isInteger(filter_type)) throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), - ErrorCodes::LOGICAL_ERROR); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return true; } From 8bfc67655e01859d5edc87fd95965de1b4fbd329 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 18:06:14 +0300 Subject: [PATCH 128/330] Added test. --- .../0_stateless/01356_wrong_filter-type_bug.reference | 0 .../queries/0_stateless/01356_wrong_filter-type_bug.sql | 9 +++++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/queries/0_stateless/01356_wrong_filter-type_bug.reference create mode 100644 tests/queries/0_stateless/01356_wrong_filter-type_bug.sql diff --git a/tests/queries/0_stateless/01356_wrong_filter-type_bug.reference b/tests/queries/0_stateless/01356_wrong_filter-type_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql b/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql new file mode 100644 index 00000000000..43b464f95f0 --- /dev/null +++ b/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql @@ -0,0 +1,9 @@ +drop table if exists t0; + +CREATE TABLE t0 (`c0` String, `c1` Int32 CODEC(NONE), `c2` Int32) ENGINE = MergeTree() ORDER BY tuple(); +insert into t0 values ('a', 1, 2); + +SELECT t0.c2, t0.c1, t0.c0 FROM t0 PREWHERE t0.c0 ORDER BY ((t0.c2)>=(t0.c1)), (((- (((t0.c0)>(t0.c0))))) IS NULL) FORMAT TabSeparatedWithNamesAndTypes; -- {serverError 43} +SELECT t0.c2, t0.c1, t0.c0 FROM t0 WHERE t0.c0 ORDER BY ((t0.c2)>=(t0.c1)), (((- (((t0.c0)>(t0.c0))))) IS NULL) FORMAT TabSeparatedWithNamesAndTypes settings optimize_move_to_prewhere=0; -- {serverError 43} + +drop table if exists t0; From 8f1845185e30ada8df2758d374cba1a273a781b4 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Tue, 30 Jun 2020 16:33:16 +0100 Subject: [PATCH 129/330] Try fix pk in tuple performance Possible approach for fixing #10574 The problem is that prepared sets are built correctly, it is a hash map of key -> set where key is a hash of AST and list of data types (when we a list of tuples of literals). However, when the key is built from the index to try and find if there exists a prepared set that would match it looks for data types of the primary key (see how data_types is populated) because the primary key has only one field (v in my example) it can not find the prepared set. The patch looks for any prepared indexes where data types match for the subset of fields found in primary key, we are not interested in other fields anyway for the purpose of primary key pruning. --- src/Storages/MergeTree/KeyCondition.cpp | 38 ++++++++++++++++--- .../00612_pk_in_tuple_perf.reference | 2 + .../0_stateless/00612_pk_in_tuple_perf.sh | 24 ++++++++++++ 3 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/00612_pk_in_tuple_perf.reference create mode 100755 tests/queries/0_stateless/00612_pk_in_tuple_perf.sh diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 281f8511a59..4db931b35c3 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -618,16 +618,44 @@ bool KeyCondition::tryPrepareSetIndex( const ASTPtr & right_arg = args[1]; PreparedSetKey set_key; + SetPtr prepared_set; if (right_arg->as() || right_arg->as()) + { set_key = PreparedSetKey::forSubquery(*right_arg); + + auto set_it = prepared_sets.find(set_key); + if (set_it == prepared_sets.end()) + return false; + + prepared_set = set_it->second; + } else - set_key = PreparedSetKey::forLiteral(*right_arg, data_types); + { + auto set_it = std::find_if( + prepared_sets.begin(), + prepared_sets.end(), + [&](const auto &e) + { + if (e.first.ast_hash == right_arg->getTreeHash()) + { + for (size_t i = 0; i < data_types.size(); i++) + { + if (!recursiveRemoveLowCardinality(data_types[i])->equals(*e.first.types[indexes_mapping[i].tuple_index])) + { + return false; + } + } - auto set_it = prepared_sets.find(set_key); - if (set_it == prepared_sets.end()) - return false; + return true; + } - const SetPtr & prepared_set = set_it->second; + return false; + }); + if (set_it == prepared_sets.end()) + return false; + + prepared_set = set_it->second; + } /// The index can be prepared if the elements of the set were saved in advance. if (!prepared_set->hasExplicitSetElements()) diff --git a/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference b/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference new file mode 100644 index 00000000000..cb18472feb3 --- /dev/null +++ b/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference @@ -0,0 +1,2 @@ +1 + "rows_read": 2, diff --git a/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh new file mode 100755 index 00000000000..5a77dde34a8 --- /dev/null +++ b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + + +$CLICKHOUSE_CLIENT --multiquery < Date: Sun, 21 Jun 2020 01:44:52 +0300 Subject: [PATCH 130/330] Fix partial revokes (complex cases). --- src/Access/AccessControlManager.cpp | 41 +- src/Access/AccessControlManager.h | 3 + src/Access/AccessRights.cpp | 696 ++++++++++++------ src/Access/AccessRights.h | 91 ++- src/Access/AccessRightsElement.cpp | 288 +++----- src/Access/AccessRightsElement.h | 85 ++- src/Access/ContextAccess.cpp | 657 ++++++++--------- src/Access/ContextAccess.h | 132 ++-- src/Access/EnabledRolesInfo.cpp | 3 +- src/Access/EnabledRolesInfo.h | 1 - src/Access/GrantedAccess.cpp | 22 - src/Access/GrantedAccess.h | 55 -- src/Access/Role.h | 4 +- src/Access/RoleCache.cpp | 3 +- src/Access/User.h | 4 +- src/Access/ya.make | 1 - src/Interpreters/DDLWorker.cpp | 37 +- src/Interpreters/DDLWorker.h | 3 +- src/Interpreters/InterpreterGrantQuery.cpp | 31 +- .../InterpreterKillQueryQuery.cpp | 22 +- .../InterpreterShowGrantsQuery.cpp | 57 +- src/Interpreters/InterpreterSystemQuery.cpp | 10 +- src/Interpreters/tests/users.cpp | 2 +- src/Parsers/ASTGrantQuery.cpp | 6 + src/Parsers/ASTGrantQuery.h | 7 +- src/Storages/System/StorageSystemGrants.cpp | 26 +- .../test_create_user_and_login/test.py | 1 - tests/integration/test_role/test.py | 1 - .../01073_grant_and_revoke.reference | 4 +- .../01074_partial_revokes.reference | 59 ++ .../0_stateless/01074_partial_revokes.sql | 98 +++ 31 files changed, 1378 insertions(+), 1072 deletions(-) delete mode 100644 src/Access/GrantedAccess.cpp delete mode 100644 src/Access/GrantedAccess.h diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 1c1215a0e28..94a45e3e1c1 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -40,27 +40,8 @@ class AccessControlManager::ContextAccessCache public: explicit ContextAccessCache(const AccessControlManager & manager_) : manager(manager_) {} - std::shared_ptr getContextAccess( - const UUID & user_id, - const boost::container::flat_set & current_roles, - bool use_default_roles, - const Settings & settings, - const String & current_database, - const ClientInfo & client_info) + std::shared_ptr getContextAccess(const ContextAccessParams & params) { - ContextAccess::Params params; - params.user_id = user_id; - params.current_roles = current_roles; - params.use_default_roles = use_default_roles; - params.current_database = current_database; - params.readonly = settings.readonly; - params.allow_ddl = settings.allow_ddl; - params.allow_introspection = settings.allow_introspection_functions; - params.interface = client_info.interface; - params.http_method = client_info.http_method; - params.address = client_info.current_address.host(); - params.quota_key = client_info.quota_key; - std::lock_guard lock{mutex}; auto x = cache.get(params); if (x) @@ -119,7 +100,25 @@ std::shared_ptr AccessControlManager::getContextAccess( const String & current_database, const ClientInfo & client_info) const { - return context_access_cache->getContextAccess(user_id, current_roles, use_default_roles, settings, current_database, client_info); + ContextAccessParams params; + params.user_id = user_id; + params.current_roles = current_roles; + params.use_default_roles = use_default_roles; + params.current_database = current_database; + params.readonly = settings.readonly; + params.allow_ddl = settings.allow_ddl; + params.allow_introspection = settings.allow_introspection_functions; + params.interface = client_info.interface; + params.http_method = client_info.http_method; + params.address = client_info.current_address.host(); + params.quota_key = client_info.quota_key; + return getContextAccess(params); +} + + +std::shared_ptr AccessControlManager::getContextAccess(const ContextAccessParams & params) const +{ + return context_access_cache->getContextAccess(params); } diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index 6bcf8d7c504..d244ecd07d2 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -21,6 +21,7 @@ namespace Poco namespace DB { class ContextAccess; +struct ContextAccessParams; struct User; using UserPtr = std::shared_ptr; class EnabledRoles; @@ -58,6 +59,8 @@ public: const String & current_database, const ClientInfo & client_info) const; + std::shared_ptr getContextAccess(const ContextAccessParams & params) const; + std::shared_ptr getEnabledRoles( const boost::container::flat_set & current_roles, const boost::container::flat_set & current_roles_with_admin_option) const; diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index a4e446750a7..82ff3aaba98 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -1,7 +1,9 @@ #include #include #include +#include #include +#include #include namespace DB @@ -9,7 +11,6 @@ namespace DB namespace ErrorCodes { extern const int INVALID_GRANT; - extern const int LOGICAL_ERROR; } @@ -58,12 +59,194 @@ namespace const AccessFlags system_reload_embedded_dictionaries = AccessType::SYSTEM_RELOAD_EMBEDDED_DICTIONARIES; }; - std::string_view checkCurrentDatabase(const std::string_view & current_database) + using Kind = AccessRightsElementWithOptions::Kind; + + struct ProtoElement { - if (current_database.empty()) - throw Exception("No current database", ErrorCodes::LOGICAL_ERROR); - return current_database; - } + AccessFlags access_flags; + boost::container::small_vector full_name; + bool grant_option = false; + Kind kind = Kind::GRANT; + + friend bool operator<(const ProtoElement & left, const ProtoElement & right) + { + static constexpr auto compare_name = [](const boost::container::small_vector & left_name, + const boost::container::small_vector & right_name, + size_t i) + { + if (i < left_name.size()) + { + if (i < right_name.size()) + return left_name[i].compare(right_name[i]); + else + return 1; /// left_name is longer => left_name > right_name + } + else if (i < right_name.size()) + return 1; /// right_name is longer => left < right + else + return 0; /// left_name == right_name + }; + + if (int cmp = compare_name(left.full_name, right.full_name, 0)) + return cmp < 0; + + if (int cmp = compare_name(left.full_name, right.full_name, 1)) + return cmp < 0; + + if (left.kind != right.kind) + return (left.kind == Kind::GRANT); + + if (left.grant_option != right.grant_option) + return right.grant_option; + + if (int cmp = compare_name(left.full_name, right.full_name, 2)) + return cmp < 0; + + return (left.access_flags < right.access_flags); + } + + AccessRightsElementWithOptions getResult() const + { + AccessRightsElementWithOptions res; + res.access_flags = access_flags; + res.grant_option = grant_option; + res.kind = kind; + switch (full_name.size()) + { + case 0: + { + res.any_database = true; + res.any_table = true; + res.any_column = true; + break; + } + case 1: + { + res.any_database = false; + res.database = full_name[0]; + res.any_table = true; + res.any_column = true; + break; + } + case 2: + { + res.any_database = false; + res.database = full_name[0]; + res.any_table = false; + res.table = full_name[1]; + res.any_column = true; + break; + } + case 3: + { + res.any_database = false; + res.database = full_name[0]; + res.any_table = false; + res.table = full_name[1]; + res.any_column = false; + res.columns.emplace_back(full_name[2]); + break; + } + } + return res; + } + }; + + class ProtoElements : public std::vector + { + public: + AccessRightsElementsWithOptions getResult() const + { + ProtoElements sorted = *this; + boost::range::sort(sorted); + AccessRightsElementsWithOptions res; + res.reserve(sorted.size()); + + for (size_t i = 0; i != sorted.size();) + { + size_t count_elements_with_diff_columns = sorted.countElementsWithDifferenceInColumnOnly(i); + if (count_elements_with_diff_columns == 1) + { + /// Easy case: one Element is converted to one AccessRightsElement. + const auto & element = sorted[i]; + if (element.access_flags) + res.emplace_back(element.getResult()); + ++i; + } + else + { + /// Difficult case: multiple Elements are converted to one or multiple AccessRightsElements. + sorted.appendResultWithElementsWithDifferenceInColumnOnly(i, count_elements_with_diff_columns, res); + i += count_elements_with_diff_columns; + } + } + return res; + } + + private: + size_t countElementsWithDifferenceInColumnOnly(size_t start) const + { + const auto & start_element = (*this)[start]; + if ((start_element.full_name.size() != 3) || (start == size() - 1)) + return 1; + + auto it = std::find_if(begin() + start + 1, end(), [&](const ProtoElement & element) + { + return (element.full_name.size() != 3) || (element.full_name[0] != start_element.full_name[0]) + || (element.full_name[1] != start_element.full_name[1]) || (element.grant_option != start_element.grant_option) + || (element.kind != start_element.kind); + }); + + return it - (begin() + start); + } + + /// Collects columns together to write multiple columns into one AccessRightsElement. + /// That procedure allows to output access rights in more compact way, + /// e.g. "SELECT(x, y)" instead of "SELECT(x), SELECT(y)". + void appendResultWithElementsWithDifferenceInColumnOnly(size_t start, size_t count, AccessRightsElementsWithOptions & res) const + { + const auto * pbegin = data() + start; + const auto * pend = pbegin + count; + AccessFlags handled_flags; + + while (pbegin < pend) + { + while (pbegin < pend && !(pbegin->access_flags - handled_flags)) + ++pbegin; + + while (pbegin < pend && !((pend - 1)->access_flags - handled_flags)) + --pend; + + if (pbegin >= pend) + break; + + AccessFlags common_flags = (pbegin->access_flags - handled_flags); + for (const auto * element = pbegin + 1; element != pend; ++element) + { + if (auto new_common_flags = (element->access_flags - handled_flags) & common_flags) + common_flags = new_common_flags; + } + + res.emplace_back(); + auto & back = res.back(); + back.grant_option = pbegin->grant_option; + back.kind = pbegin->kind; + back.any_database = false; + back.database = pbegin->full_name[0]; + back.any_table = false; + back.table = pbegin->full_name[1]; + back.any_column = false; + back.access_flags = common_flags; + for (const auto * element = pbegin; element != pend; ++element) + { + if (((element->access_flags - handled_flags) & common_flags) == common_flags) + back.columns.emplace_back(element->full_name[2]); + } + + handled_flags |= common_flags; + } + } + }; } @@ -249,17 +432,32 @@ public: calculateFinalAccessRec(helper); } - void logTree(Poco::Logger * log) const + + ProtoElements getElements() const { - LOG_TRACE(log, "Tree({}): name={}, access={}, final_access={}, min_access={}, max_access={}, num_children={}", - level, node_name ? *node_name : "NULL", access.toString(), + ProtoElements res; + getElementsRec(res, {}, *this, {}); + return res; + } + + static ProtoElements getElements(const Node * node, const Node * node_with_grant_option) + { + ProtoElements res; + getElementsRec(res, {}, node, {}, node_with_grant_option, {}); + return res; + } + + void logTree(Poco::Logger * log, const String & title) const + { + LOG_TRACE(log, "Tree({}): level={}, name={}, access={}, final_access={}, min_access={}, max_access={}, num_children={}", + title, level, node_name ? *node_name : "NULL", access.toString(), final_access.toString(), min_access.toString(), max_access.toString(), (children ? children->size() : 0)); if (children) { for (auto & child : *children | boost::adaptors::map_values) - child.logTree(log); + child.logTree(log, title); } } @@ -342,6 +540,93 @@ private: } } + static void getElementsRec( + ProtoElements & res, + const boost::container::small_vector & full_name, + const Node & node, + const AccessFlags & parent_access) + { + auto access = node.access; + auto revokes = parent_access - access; + auto grants = access - parent_access; + + if (revokes) + res.push_back(ProtoElement{revokes, full_name, false, Kind::REVOKE}); + + if (grants) + res.push_back(ProtoElement{grants, full_name, false, Kind::GRANT}); + + if (node.children) + { + for (const auto & [child_name, child] : *node.children) + { + boost::container::small_vector child_full_name = full_name; + child_full_name.push_back(child_name); + getElementsRec(res, child_full_name, child, access); + } + } + } + + static void getElementsRec( + ProtoElements & res, + const boost::container::small_vector & full_name, + const Node * node, + const AccessFlags & parent_access, + const Node * node_go, + const AccessFlags & parent_access_go) + { + auto access = node ? node->access : parent_access; + auto access_go = node_go ? node_go->access : parent_access_go; + auto revokes = parent_access - access; + auto revokes_go = parent_access_go - access_go - revokes; + auto grants_go = access_go - parent_access_go; + auto grants = access - parent_access - grants_go; + + if (revokes) + res.push_back(ProtoElement{revokes, full_name, false, Kind::REVOKE}); + + if (revokes_go) + res.push_back(ProtoElement{revokes_go, full_name, true, Kind::REVOKE}); + + if (grants) + res.push_back(ProtoElement{grants, full_name, false, Kind::GRANT}); + + if (grants_go) + res.push_back(ProtoElement{grants_go, full_name, true, Kind::GRANT}); + + if (node && node->children) + { + for (const auto & [child_name, child] : *node->children) + { + boost::container::small_vector child_full_name = full_name; + child_full_name.push_back(child_name); + const Node * child_node = &child; + const Node * child_node_go = nullptr; + if (node_go && node_go->children) + { + auto it = node_go->children->find(child_name); + if (it != node_go->children->end()) + child_node_go = &it->second; + } + getElementsRec(res, child_full_name, child_node, access, child_node_go, access_go); + } + + } + if (node_go && node_go->children) + { + for (const auto & [child_name, child] : *node_go->children) + { + if (node && node->children && node->children->count(child_name)) + continue; /// already processed + boost::container::small_vector child_full_name = full_name; + child_full_name.push_back(child_name); + const Node * child_node = nullptr; + const Node * child_node_go = &child; + getElementsRec(res, child_full_name, child_node, access, child_node_go, access_go); + } + } + } + void calculateFinalAccessRec(const Helper & helper) { /// Traverse tree. @@ -476,6 +761,10 @@ AccessRights & AccessRights::operator =(const AccessRights & src) root = std::make_unique(*src.root); else root = nullptr; + if (src.root_with_grant_option) + root_with_grant_option = std::make_unique(*src.root_with_grant_option); + else + root_with_grant_option = nullptr; return *this; } @@ -488,302 +777,245 @@ AccessRights::AccessRights(const AccessFlags & access) bool AccessRights::isEmpty() const { - return !root; + return !root && !root_with_grant_option; } void AccessRights::clear() { root = nullptr; + root_with_grant_option = nullptr; } -template +template void AccessRights::grantImpl(const AccessFlags & flags, const Args &... args) { - if (!root) - root = std::make_unique(); - root->grant(flags, Helper::instance(), args...); - if (!root->access && !root->children) - root = nullptr; + auto helper = [&](std::unique_ptr & root_node) + { + if (!root_node) + root_node = std::make_unique(); + root_node->grant(flags, Helper::instance(), args...); + if (!root_node->access && !root_node->children) + root_node = nullptr; + }; + helper(root); + + if constexpr (with_grant_option) + helper(root_with_grant_option); } -void AccessRights::grant(const AccessFlags & flags) { grantImpl(flags); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database) { grantImpl(flags, database); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { grantImpl(flags, database, table); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { grantImpl(flags, database, table, column); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { grantImpl(flags, database, table, columns); } -void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { grantImpl(flags, database, table, columns); } - -void AccessRights::grant(const AccessRightsElement & element, std::string_view current_database) +template +void AccessRights::grantImpl(const AccessRightsElement & element) { if (element.any_database) - { - grant(element.access_flags); - } + grantImpl(element.access_flags); else if (element.any_table) - { - if (element.database.empty()) - grant(element.access_flags, checkCurrentDatabase(current_database)); - else - grant(element.access_flags, element.database); - } + grantImpl(element.access_flags, element.database); else if (element.any_column) - { - if (element.database.empty()) - grant(element.access_flags, checkCurrentDatabase(current_database), element.table); - else - grant(element.access_flags, element.database, element.table); - } + grantImpl(element.access_flags, element.database, element.table); else - { - if (element.database.empty()) - grant(element.access_flags, checkCurrentDatabase(current_database), element.table, element.columns); - else - grant(element.access_flags, element.database, element.table, element.columns); - } + grantImpl(element.access_flags, element.database, element.table, element.columns); } -void AccessRights::grant(const AccessRightsElements & elements, std::string_view current_database) +template +void AccessRights::grantImpl(const AccessRightsElements & elements) { for (const auto & element : elements) - grant(element, current_database); + grantImpl(element); } +void AccessRights::grant(const AccessFlags & flags) { grantImpl(flags); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database) { grantImpl(flags, database); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { grantImpl(flags, database, table); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { grantImpl(flags, database, table, column); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { grantImpl(flags, database, table, columns); } +void AccessRights::grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { grantImpl(flags, database, table, columns); } +void AccessRights::grant(const AccessRightsElement & element) { grantImpl(element); } +void AccessRights::grant(const AccessRightsElements & elements) { grantImpl(elements); } -template +void AccessRights::grantWithGrantOption(const AccessFlags & flags) { grantImpl(flags); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database) { grantImpl(flags, database); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { grantImpl(flags, database, table); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { grantImpl(flags, database, table, column); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { grantImpl(flags, database, table, columns); } +void AccessRights::grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { grantImpl(flags, database, table, columns); } +void AccessRights::grantWithGrantOption(const AccessRightsElement & element) { grantImpl(element); } +void AccessRights::grantWithGrantOption(const AccessRightsElements & elements) { grantImpl(elements); } + + +template void AccessRights::revokeImpl(const AccessFlags & flags, const Args &... args) { - if (!root) - return; - root->revoke(flags, Helper::instance(), args...); - if (!root->access && !root->children) - root = nullptr; + auto helper = [&](std::unique_ptr & root_node) + { + if (!root_node) + return; + root_node->revoke(flags, Helper::instance(), args...); + if (!root_node->access && !root_node->children) + root_node = nullptr; + }; + helper(root_with_grant_option); + + if constexpr (!grant_option) + helper(root); } -void AccessRights::revoke(const AccessFlags & flags) { revokeImpl(flags); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } -void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } - - -void AccessRights::revoke(const AccessRightsElement & element, std::string_view current_database) +template +void AccessRights::revokeImpl(const AccessRightsElement & element) { if (element.any_database) - { - revoke(element.access_flags); - } + revokeImpl(element.access_flags); else if (element.any_table) - { - if (element.database.empty()) - revoke(element.access_flags, checkCurrentDatabase(current_database)); - else - revoke(element.access_flags, element.database); - } + revokeImpl(element.access_flags, element.database); else if (element.any_column) - { - if (element.database.empty()) - revoke(element.access_flags, checkCurrentDatabase(current_database), element.table); - else - revoke(element.access_flags, element.database, element.table); - } + revokeImpl(element.access_flags, element.database, element.table); else - { - if (element.database.empty()) - revoke(element.access_flags, checkCurrentDatabase(current_database), element.table, element.columns); - else - revoke(element.access_flags, element.database, element.table, element.columns); - } + revokeImpl(element.access_flags, element.database, element.table, element.columns); } -void AccessRights::revoke(const AccessRightsElements & elements, std::string_view current_database) +template +void AccessRights::revokeImpl(const AccessRightsElements & elements) { for (const auto & element : elements) - revoke(element, current_database); + revokeImpl(element); } +void AccessRights::revoke(const AccessFlags & flags) { revokeImpl(flags); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } +void AccessRights::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } +void AccessRights::revoke(const AccessRightsElement & element) { revokeImpl(element); } +void AccessRights::revoke(const AccessRightsElements & elements) { revokeImpl(elements); } -AccessRightsElements AccessRights::getGrants() const -{ - AccessRightsElements grants; - getGrantsAndPartialRevokesImpl(&grants, nullptr); - return grants; -} - -AccessRightsElements AccessRights::getPartialRevokes() const -{ - AccessRightsElements partial_revokes; - getGrantsAndPartialRevokesImpl(nullptr, &partial_revokes); - return partial_revokes; -} - -AccessRights::GrantsAndPartialRevokes AccessRights::getGrantsAndPartialRevokes() const -{ - GrantsAndPartialRevokes res; - getGrantsAndPartialRevokesImpl(&res.grants, &res.revokes); - return res; -} - - -void AccessRights::getGrantsAndPartialRevokesImpl(AccessRightsElements * out_grants, AccessRightsElements * out_partial_revokes) const +void AccessRights::revokeGrantOption(const AccessFlags & flags) { revokeImpl(flags); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } +void AccessRights::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } +void AccessRights::revokeGrantOption(const AccessRightsElement & element) { revokeImpl(element); } +void AccessRights::revokeGrantOption(const AccessRightsElements & elements) { revokeImpl(elements); } + + +AccessRightsElementsWithOptions AccessRights::getElements() const { +#if 0 + logTree(); +#endif if (!root) - return; - auto global_access = root->access; - if (out_grants && global_access) - out_grants->push_back({global_access}); - if (root->children) - { - for (const auto & [db_name, db_node] : *root->children) - { - if (out_grants) - { - if (auto db_grants = db_node.access - global_access) - out_grants->push_back({db_grants, db_name}); - } - if (out_partial_revokes) - { - if (auto db_partial_revokes = global_access - db_node.access) - out_partial_revokes->push_back({db_partial_revokes, db_name}); - } - if (db_node.children) - { - for (const auto & [table_name, table_node] : *db_node.children) - { - if (out_grants) - { - if (auto table_grants = table_node.access - db_node.access) - out_grants->push_back({table_grants, db_name, table_name}); - } - if (out_partial_revokes) - { - if (auto table_partial_revokes = db_node.access - table_node.access) - out_partial_revokes->push_back({table_partial_revokes, db_name, table_name}); - } - if (table_node.children) - { - for (const auto & [column_name, column_node] : *table_node.children) - { - if (out_grants) - { - if (auto column_grants = column_node.access - table_node.access) - out_grants->push_back({column_grants, db_name, table_name, column_name}); - } - if (out_partial_revokes) - { - if (auto column_partial_revokes = table_node.access - column_node.access) - out_partial_revokes->push_back({column_partial_revokes, db_name, table_name, column_name}); - } - } - - } - } - } - } - } + return {}; + if (!root_with_grant_option) + return root->getElements().getResult(); + return Node::getElements(root.get(), root_with_grant_option.get()).getResult(); } String AccessRights::toString() const { - String res; - auto gr = getGrantsAndPartialRevokes(); - if (!gr.grants.empty()) - { - res += "GRANT "; - res += gr.grants.toString(); - } - if (!gr.revokes.empty()) - { - if (!res.empty()) - res += ", "; - res += "REVOKE "; - res += gr.revokes.toString(); - } - if (res.empty()) - res = "GRANT USAGE ON *.*"; - return res; + return getElements().toString(); } -template +template bool AccessRights::isGrantedImpl(const AccessFlags & flags, const Args &... args) const { - if (!root) - return flags.isEmpty(); - return root->isGranted(flags, args...); + auto helper = [&](const std::unique_ptr & root_node) -> bool + { + if (!root_node) + return flags.isEmpty(); + return root_node->isGranted(flags, args...); + }; + if constexpr (grant_option) + return helper(root_with_grant_option); + else + return helper(root); } -bool AccessRights::isGranted(const AccessFlags & flags) const { return isGrantedImpl(flags); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } -bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } - -bool AccessRights::isGranted(const AccessRightsElement & element, std::string_view current_database) const +template +bool AccessRights::isGrantedImpl(const AccessRightsElement & element) const { if (element.any_database) - { - return isGranted(element.access_flags); - } + return isGrantedImpl(element.access_flags); else if (element.any_table) - { - if (element.database.empty()) - return isGranted(element.access_flags, checkCurrentDatabase(current_database)); - else - return isGranted(element.access_flags, element.database); - } + return isGrantedImpl(element.access_flags, element.database); else if (element.any_column) - { - if (element.database.empty()) - return isGranted(element.access_flags, checkCurrentDatabase(current_database), element.table); - else - return isGranted(element.access_flags, element.database, element.table); - } + return isGrantedImpl(element.access_flags, element.database, element.table); else - { - if (element.database.empty()) - return isGranted(element.access_flags, checkCurrentDatabase(current_database), element.table, element.columns); - else - return isGranted(element.access_flags, element.database, element.table, element.columns); - } + return isGrantedImpl(element.access_flags, element.database, element.table, element.columns); } -bool AccessRights::isGranted(const AccessRightsElements & elements, std::string_view current_database) const +template +bool AccessRights::isGrantedImpl(const AccessRightsElements & elements) const { for (const auto & element : elements) - if (!isGranted(element, current_database)) + if (!isGrantedImpl(element)) return false; return true; } +bool AccessRights::isGranted(const AccessFlags & flags) const { return isGrantedImpl(flags); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool AccessRights::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool AccessRights::isGranted(const AccessRightsElement & element) const { return isGrantedImpl(element); } +bool AccessRights::isGranted(const AccessRightsElements & elements) const { return isGrantedImpl(elements); } + +bool AccessRights::hasGrantOption(const AccessFlags & flags) const { return isGrantedImpl(flags); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool AccessRights::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool AccessRights::hasGrantOption(const AccessRightsElement & element) const { return isGrantedImpl(element); } +bool AccessRights::hasGrantOption(const AccessRightsElements & elements) const { return isGrantedImpl(elements); } + bool operator ==(const AccessRights & left, const AccessRights & right) { - if (!left.root) - return !right.root; - if (!right.root) - return false; - return *left.root == *right.root; + auto helper = [](const std::unique_ptr & left_node, const std::unique_ptr & right_node) + { + if (!left_node) + return !right_node; + if (!right_node) + return false; + return *left_node == *right_node; + }; + return helper(left.root, right.root) && helper(left.root_with_grant_option, right.root_with_grant_option); } void AccessRights::merge(const AccessRights & other) { - if (!root) + auto helper = [](std::unique_ptr & root_node, const std::unique_ptr & other_root_node) { - *this = other; - return; - } - if (other.root) - { - root->merge(*other.root, Helper::instance()); - if (!root->access && !root->children) - root = nullptr; - } + if (!root_node) + { + if (other_root_node) + root_node = std::make_unique(*other_root_node); + return; + } + if (other_root_node) + { + root_node->merge(*other_root_node, Helper::instance()); + if (!root_node->access && !root_node->children) + root_node = nullptr; + } + }; + helper(root, other.root); + helper(root_with_grant_option, other.root_with_grant_option); +} + + +AccessRights AccessRights::getFullAccess() +{ + AccessRights res; + res.grantWithGrantOption(AccessType::ALL); + return res; } @@ -791,7 +1023,11 @@ void AccessRights::logTree() const { auto * log = &Poco::Logger::get("AccessRights"); if (root) - root->logTree(log); + { + root->logTree(log, ""); + if (root_with_grant_option) + root->logTree(log, "go"); + } else LOG_TRACE(log, "Tree: NULL"); } diff --git a/src/Access/AccessRights.h b/src/Access/AccessRights.h index c32514e8feb..7706edcb40a 100644 --- a/src/Access/AccessRights.h +++ b/src/Access/AccessRights.h @@ -26,6 +26,12 @@ public: /// Revokes everything. It's the same as revoke(AccessType::ALL). void clear(); + /// Returns the information about all the access granted as a string. + String toString() const; + + /// Returns the information about all the access granted. + AccessRightsElementsWithOptions getElements() const; + /// Grants access on a specified database/table/column. /// Does nothing if the specified access has been already granted. void grant(const AccessFlags & flags); @@ -34,8 +40,17 @@ public: void grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); void grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); void grant(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); - void grant(const AccessRightsElement & element, std::string_view current_database = {}); - void grant(const AccessRightsElements & elements, std::string_view current_database = {}); + void grant(const AccessRightsElement & element); + void grant(const AccessRightsElements & elements); + + void grantWithGrantOption(const AccessFlags & flags); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); + void grantWithGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); + void grantWithGrantOption(const AccessRightsElement & element); + void grantWithGrantOption(const AccessRightsElements & elements); /// Revokes a specified access granted earlier on a specified database/table/column. /// For example, revoke(AccessType::ALL) revokes all grants at all, just like clear(); @@ -45,21 +60,17 @@ public: void revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); void revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); void revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); - void revoke(const AccessRightsElement & element, std::string_view current_database = {}); - void revoke(const AccessRightsElements & elements, std::string_view current_database = {}); + void revoke(const AccessRightsElement & element); + void revoke(const AccessRightsElements & elements); - /// Returns the information about all the access granted. - struct GrantsAndPartialRevokes - { - AccessRightsElements grants; - AccessRightsElements revokes; - }; - AccessRightsElements getGrants() const; - AccessRightsElements getPartialRevokes() const; - GrantsAndPartialRevokes getGrantsAndPartialRevokes() const; - - /// Returns the information about all the access granted as a string. - String toString() const; + void revokeGrantOption(const AccessFlags & flags); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); + void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); + void revokeGrantOption(const AccessRightsElement & element); + void revokeGrantOption(const AccessRightsElements & elements); /// Whether a specified access granted. bool isGranted(const AccessFlags & flags) const; @@ -68,38 +79,60 @@ public: bool isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; bool isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; bool isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; - bool isGranted(const AccessRightsElement & element, std::string_view current_database = {}) const; - bool isGranted(const AccessRightsElements & elements, std::string_view current_database = {}) const; + bool isGranted(const AccessRightsElement & element) const; + bool isGranted(const AccessRightsElements & elements) const; - friend bool operator ==(const AccessRights & left, const AccessRights & right); - friend bool operator !=(const AccessRights & left, const AccessRights & right) { return !(left == right); } + bool hasGrantOption(const AccessFlags & flags) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; + bool hasGrantOption(const AccessRightsElement & element) const; + bool hasGrantOption(const AccessRightsElements & elements) const; /// Merges two sets of access rights together. /// It's used to combine access rights from multiple roles. void merge(const AccessRights & other); + friend bool operator ==(const AccessRights & left, const AccessRights & right); + friend bool operator !=(const AccessRights & left, const AccessRights & right) { return !(left == right); } + + static AccessRights getFullAccess(); + private: - template + template void grantImpl(const AccessFlags & flags, const Args &... args); - template + template + void grantImpl(const AccessRightsElement & element); + + template + void grantImpl(const AccessRightsElements & elements); + + template void revokeImpl(const AccessFlags & flags, const Args &... args); - template + template + void revokeImpl(const AccessRightsElement & element); + + template + void revokeImpl(const AccessRightsElements & elements); + + template bool isGrantedImpl(const AccessFlags & flags, const Args &... args) const; - bool isGrantedImpl(const AccessRightsElement & element, std::string_view current_database) const; - bool isGrantedImpl(const AccessRightsElements & elements, std::string_view current_database) const; + template + bool isGrantedImpl(const AccessRightsElement & element) const; - template - AccessFlags getAccessImpl(const Args &... args) const; - - void getGrantsAndPartialRevokesImpl(AccessRightsElements * grants, AccessRightsElements * partial_revokes) const; + template + bool isGrantedImpl(const AccessRightsElements & elements) const; void logTree() const; struct Node; std::unique_ptr root; + std::unique_ptr root_with_grant_option; }; } diff --git a/src/Access/AccessRightsElement.cpp b/src/Access/AccessRightsElement.cpp index db1ea5d3d5c..e69fb6d3b74 100644 --- a/src/Access/AccessRightsElement.cpp +++ b/src/Access/AccessRightsElement.cpp @@ -12,222 +12,158 @@ namespace DB { namespace { - size_t groupElements(AccessRightsElements & elements, size_t start) + using Kind = AccessRightsElementWithOptions::Kind; + + String formatOptions(bool grant_option, Kind kind, const String & inner_part) { - auto & start_element = elements[start]; - auto it = std::find_if(elements.begin() + start + 1, elements.end(), - [&](const AccessRightsElement & element) + if (kind == Kind::REVOKE) { - return (element.database != start_element.database) || - (element.any_database != start_element.any_database) || - (element.table != start_element.table) || - (element.any_table != start_element.any_table) || - (element.any_column != start_element.any_column); - }); - size_t end = it - elements.begin(); - - /// All the elements at indices from start to end here specify - /// the same database and table. - - if (start_element.any_column) - { - /// Easy case: the elements don't specify columns. - /// All we need is to combine the access flags. - for (size_t i = start + 1; i != end; ++i) - { - start_element.access_flags |= elements[i].access_flags; - elements[i].access_flags = {}; - } - return end; + if (grant_option) + return "REVOKE GRANT OPTION " + inner_part; + else + return "REVOKE " + inner_part; } - - /// Difficult case: the elements specify columns. - /// We have to find groups of columns with common access flags. - for (size_t i = start; i != end; ++i) + else { - if (!elements[i].access_flags) - continue; - - AccessFlags common_flags = elements[i].access_flags; - size_t num_elements_with_common_flags = 1; - for (size_t j = i + 1; j != end; ++j) - { - auto new_common_flags = common_flags & elements[j].access_flags; - if (new_common_flags) - { - common_flags = new_common_flags; - ++num_elements_with_common_flags; - } - } - - if (num_elements_with_common_flags == 1) - continue; - - if (elements[i].access_flags != common_flags) - { - elements.insert(elements.begin() + i + 1, elements[i]); - elements[i].access_flags = common_flags; - elements[i].columns.clear(); - ++end; - } - - for (size_t j = i + 1; j != end; ++j) - { - if ((elements[j].access_flags & common_flags) == common_flags) - { - boost::range::push_back(elements[i].columns, elements[j].columns); - elements[j].access_flags -= common_flags; - } - } + if (grant_option) + return "GRANT " + inner_part + " WITH GRANT OPTION"; + else + return "GRANT " + inner_part; } - - return end; } - /// Tries to combine elements to decrease their number. - void groupElements(AccessRightsElements & elements) + + String formatONClause(const String & database, bool any_database, const String & table, bool any_table) { - if (!boost::range::is_sorted(elements)) - boost::range::sort(elements); /// Algorithm in groupElement() requires elements to be sorted. - for (size_t start = 0; start != elements.size();) - start = groupElements(elements, start); + String msg = "ON "; + + if (any_database) + msg += "*."; + else if (!database.empty()) + msg += backQuoteIfNeed(database) + "."; + + if (any_table) + msg += "*"; + else + msg += backQuoteIfNeed(table); + return msg; } - /// Removes unnecessary elements, sorts elements and makes them unique. - void sortElementsAndMakeUnique(AccessRightsElements & elements) + + String formatAccessFlagsWithColumns(const AccessFlags & access_flags, const Strings & columns, bool any_column) { - /// Remove empty elements. - boost::range::remove_erase_if(elements, [](const AccessRightsElement & element) + String columns_in_parentheses; + if (!any_column) { - return !element.access_flags || (!element.any_column && element.columns.empty()); - }); - - /// Sort columns and make them unique. - for (auto & element : elements) - { - if (element.any_column) - continue; - - if (!boost::range::is_sorted(element.columns)) - boost::range::sort(element.columns); - element.columns.erase(std::unique(element.columns.begin(), element.columns.end()), element.columns.end()); + if (columns.empty()) + return "USAGE"; + for (const auto & column : columns) + { + columns_in_parentheses += columns_in_parentheses.empty() ? "(" : ", "; + columns_in_parentheses += backQuoteIfNeed(column); + } + columns_in_parentheses += ")"; } - /// Sort elements themselves. - boost::range::sort(elements); - elements.erase(std::unique(elements.begin(), elements.end()), elements.end()); + auto keywords = access_flags.toKeywords(); + if (keywords.empty()) + return "USAGE"; + + String msg; + for (const std::string_view & keyword : keywords) + { + if (!msg.empty()) + msg += ", "; + msg += String{keyword} + columns_in_parentheses; + } + return msg; } } -void AccessRightsElement::setDatabase(const String & new_database) -{ - database = new_database; - any_database = false; -} - - -void AccessRightsElement::replaceEmptyDatabase(const String & new_database) -{ - if (isEmptyDatabase()) - setDatabase(new_database); -} - - -bool AccessRightsElement::isEmptyDatabase() const -{ - return !any_database && database.empty(); -} - String AccessRightsElement::toString() const { - String msg = toStringWithoutON(); - msg += " ON "; - - if (any_database) - msg += "*."; - else if (!database.empty()) - msg += backQuoteIfNeed(database) + "."; - - if (any_table) - msg += "*"; - else - msg += backQuoteIfNeed(table); - return msg; + return formatAccessFlagsWithColumns(access_flags, columns, any_column) + " " + formatONClause(database, any_database, table, any_table); } -String AccessRightsElement::toStringWithoutON() const +String AccessRightsElementWithOptions::toString() const { - String columns_in_parentheses; - if (!any_column) - { - if (columns.empty()) - return "USAGE"; - for (const auto & column : columns) - { - columns_in_parentheses += columns_in_parentheses.empty() ? "(" : ", "; - columns_in_parentheses += backQuoteIfNeed(column); - } - columns_in_parentheses += ")"; - } - - auto keywords = access_flags.toKeywords(); - if (keywords.empty()) - return "USAGE"; - - String msg; - for (const std::string_view & keyword : keywords) - { - if (!msg.empty()) - msg += ", "; - msg += String{keyword} + columns_in_parentheses; - } - return msg; + return formatOptions(grant_option, kind, AccessRightsElement::toString()); } - -void AccessRightsElements::replaceEmptyDatabase(const String & new_database) +String AccessRightsElements::toString() const { - for (auto & element : *this) - element.replaceEmptyDatabase(new_database); -} - - -String AccessRightsElements::toString() -{ - normalize(); - if (empty()) return "USAGE ON *.*"; - String msg; - bool need_comma = false; + String res; + String inner_part; + for (size_t i = 0; i != size(); ++i) { const auto & element = (*this)[i]; - if (std::exchange(need_comma, true)) - msg += ", "; - bool next_element_on_same_db_and_table = false; + + if (!inner_part.empty()) + inner_part += ", "; + inner_part += formatAccessFlagsWithColumns(element.access_flags, element.columns, element.any_column); + + bool next_element_uses_same_table = false; if (i != size() - 1) { const auto & next_element = (*this)[i + 1]; - if ((element.database == next_element.database) && (element.any_database == next_element.any_database) - && (element.table == next_element.table) && (element.any_table == next_element.any_table)) - next_element_on_same_db_and_table = true; + if (element.sameDatabaseAndTable(next_element)) + next_element_uses_same_table = true; + } + + if (!next_element_uses_same_table) + { + if (!res.empty()) + res += ", "; + res += inner_part + " " + formatONClause(element.database, element.any_database, element.table, element.any_table); + inner_part.clear(); } - if (next_element_on_same_db_and_table) - msg += element.toStringWithoutON(); - else - msg += element.toString(); } - return msg; + + return res; } - -void AccessRightsElements::normalize() +String AccessRightsElementsWithOptions::toString() const { - groupElements(*this); - sortElementsAndMakeUnique(*this); + if (empty()) + return "GRANT USAGE ON *.*"; + + String res; + String inner_part; + + for (size_t i = 0; i != size(); ++i) + { + const auto & element = (*this)[i]; + + if (!inner_part.empty()) + inner_part += ", "; + inner_part += formatAccessFlagsWithColumns(element.access_flags, element.columns, element.any_column); + + bool next_element_uses_same_mode_and_table = false; + if (i != size() - 1) + { + const auto & next_element = (*this)[i + 1]; + if (element.sameDatabaseAndTable(next_element) && element.sameOptions(next_element)) + next_element_uses_same_mode_and_table = true; + } + + if (!next_element_uses_same_mode_and_table) + { + if (!res.empty()) + res += ", "; + res += formatOptions( + element.grant_option, + element.kind, + inner_part + " " + formatONClause(element.database, element.any_database, element.table, element.any_table)); + inner_part.clear(); + } + } + + return res; } } diff --git a/src/Access/AccessRightsElement.h b/src/Access/AccessRightsElement.h index 70eb95c2d17..f9f7c433308 100644 --- a/src/Access/AccessRightsElement.h +++ b/src/Access/AccessRightsElement.h @@ -71,26 +71,48 @@ struct AccessRightsElement { } - auto toTuple() const { return std::tie(access_flags, database, any_database, table, any_table, columns, any_column); } + auto toTuple() const { return std::tie(access_flags, any_database, database, any_table, table, any_column, columns); } friend bool operator==(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() == right.toTuple(); } - friend bool operator!=(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() != right.toTuple(); } - friend bool operator<(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() < right.toTuple(); } - friend bool operator>(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() > right.toTuple(); } - friend bool operator<=(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() <= right.toTuple(); } - friend bool operator>=(const AccessRightsElement & left, const AccessRightsElement & right) { return left.toTuple() >= right.toTuple(); } + friend bool operator!=(const AccessRightsElement & left, const AccessRightsElement & right) { return !(left == right); } - /// Sets the database. - void setDatabase(const String & new_database); + bool sameDatabaseAndTable(const AccessRightsElement & other) const + { + return (database == other.database) && (any_database == other.any_database) && (table == other.table) + && (any_table == other.any_table); + } + + bool isEmptyDatabase() const { return !any_database && database.empty(); } /// If the database is empty, replaces it with `new_database`. Otherwise does nothing. void replaceEmptyDatabase(const String & new_database); - bool isEmptyDatabase() const; - /// Returns a human-readable representation like "SELECT, UPDATE(x, y) ON db.table". - /// The returned string isn't prefixed with the "GRANT" keyword. String toString() const; - String toStringWithoutON() const; +}; + + +struct AccessRightsElementWithOptions : public AccessRightsElement +{ + bool grant_option = false; + + enum class Kind + { + GRANT, + REVOKE, + }; + Kind kind = Kind::GRANT; + + bool sameOptions(const AccessRightsElementWithOptions & other) const + { + return (grant_option == other.grant_option) && (kind == other.kind); + } + + auto toTuple() const { return std::tie(access_flags, any_database, database, any_table, table, any_column, columns, grant_option, kind); } + friend bool operator==(const AccessRightsElementWithOptions & left, const AccessRightsElementWithOptions & right) { return left.toTuple() == right.toTuple(); } + friend bool operator!=(const AccessRightsElementWithOptions & left, const AccessRightsElementWithOptions & right) { return !(left == right); } + + /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". + String toString() const; }; @@ -101,13 +123,38 @@ public: /// Replaces the empty database with `new_database`. void replaceEmptyDatabase(const String & new_database); - /// Returns a human-readable representation like "SELECT, UPDATE(x, y) ON db.table". - /// The returned string isn't prefixed with the "GRANT" keyword. - String toString() const { return AccessRightsElements(*this).toString(); } - String toString(); - - /// Reorder and group elements to show them in more readable form. - void normalize(); + /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". + String toString() const; }; + +class AccessRightsElementsWithOptions : public std::vector +{ +public: + /// Replaces the empty database with `new_database`. + void replaceEmptyDatabase(const String & new_database); + + /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". + String toString() const; +}; + + +inline void AccessRightsElement::replaceEmptyDatabase(const String & new_database) +{ + if (isEmptyDatabase()) + database = new_database; +} + +inline void AccessRightsElements::replaceEmptyDatabase(const String & new_database) +{ + for (auto & element : *this) + element.replaceEmptyDatabase(new_database); +} + +inline void AccessRightsElementsWithOptions::replaceEmptyDatabase(const String & new_database) +{ + for (auto & element : *this) + element.replaceEmptyDatabase(new_database); +} + } diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 62aebfd4367..4a156c5972d 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include @@ -32,68 +30,6 @@ namespace ErrorCodes extern const int UNKNOWN_USER; } - -namespace -{ - enum CheckAccessRightsMode - { - RETURN_FALSE_IF_ACCESS_DENIED, - LOG_WARNING_IF_ACCESS_DENIED, - THROW_IF_ACCESS_DENIED, - }; - - - String formatSkippedMessage() - { - return ""; - } - - String formatSkippedMessage(const std::string_view & database) - { - return ". Skipped database " + backQuoteIfNeed(database); - } - - String formatSkippedMessage(const std::string_view & database, const std::string_view & table) - { - String str = ". Skipped table "; - if (!database.empty()) - str += backQuoteIfNeed(database) + "."; - str += backQuoteIfNeed(table); - return str; - } - - String formatSkippedMessage(const std::string_view & database, const std::string_view & table, const std::string_view & column) - { - String str = ". Skipped column " + backQuoteIfNeed(column) + " ON "; - if (!database.empty()) - str += backQuoteIfNeed(database) + "."; - str += backQuoteIfNeed(table); - return str; - } - - template - String formatSkippedMessage(const std::string_view & database, const std::string_view & table, const std::vector & columns) - { - if (columns.size() == 1) - return formatSkippedMessage(database, table, columns[0]); - - String str = ". Skipped columns "; - bool need_comma = false; - for (const auto & column : columns) - { - if (std::exchange(need_comma, true)) - str += ", "; - str += backQuoteIfNeed(column); - } - str += " ON "; - if (!database.empty()) - str += backQuoteIfNeed(database) + "."; - str += backQuoteIfNeed(table); - return str; - } -} - - ContextAccess::ContextAccess(const AccessControlManager & manager_, const Params & params_) : manager(&manager_) , params(params_) @@ -116,8 +52,8 @@ void ContextAccess::setUser(const UserPtr & user_) const if (!user) { /// User has been dropped. - auto nothing_granted = boost::make_shared(); - boost::range::fill(result_access, nothing_granted); + auto nothing_granted = std::make_shared(); + access = nothing_granted; subscription_for_user_change = {}; subscription_for_roles_changes = {}; enabled_roles = nullptr; @@ -169,10 +105,73 @@ void ContextAccess::setRolesInfo(const std::shared_ptr & { assert(roles_info_); roles_info = roles_info_; - boost::range::fill(result_access, nullptr /* need recalculate */); enabled_row_policies = manager->getEnabledRowPolicies(*params.user_id, roles_info->enabled_roles); enabled_quota = manager->getEnabledQuota(*params.user_id, user_name, roles_info->enabled_roles, params.address, params.quota_key); enabled_settings = manager->getEnabledSettings(*params.user_id, user->settings, roles_info->enabled_roles, roles_info->settings_from_enabled_roles); + setFinalAccess(); +} + + +void ContextAccess::setFinalAccess() const +{ + auto final_access = std::make_shared(); + *final_access = user->access; + if (roles_info) + final_access->merge(roles_info->access); + + static const AccessFlags table_ddl = AccessType::CREATE_DATABASE | AccessType::CREATE_TABLE | AccessType::CREATE_VIEW + | AccessType::ALTER_TABLE | AccessType::ALTER_VIEW | AccessType::DROP_DATABASE | AccessType::DROP_TABLE | AccessType::DROP_VIEW + | AccessType::TRUNCATE; + + static const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; + static const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; + static const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; + static const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; + + if (params.readonly) + final_access->revoke(write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY); + + if (params.readonly == 1) + { + /// Table functions are forbidden in readonly mode. + /// For example, for readonly = 2 - allowed. + final_access->revoke(AccessType::CREATE_TEMPORARY_TABLE); + } + + if (!params.allow_ddl) + final_access->revoke(table_and_dictionary_ddl); + + if (!params.allow_introspection) + final_access->revoke(AccessType::INTROSPECTION); + + /// Anyone has access to the "system" database. + final_access->grant(AccessType::SELECT, DatabaseCatalog::SYSTEM_DATABASE); + + if (params.readonly != 1) + { + /// User has access to temporary or external table if such table was resolved in session or query context + final_access->grant(AccessFlags::allTableFlags() | AccessFlags::allColumnFlags(), DatabaseCatalog::TEMPORARY_DATABASE); + } + + if (params.readonly) + { + /// No grant option in readonly mode. + final_access->revokeGrantOption(AccessType::ALL); + } + + if (trace_log) + { + if (roles_info && !roles_info->getCurrentRolesNames().empty()) + { + LOG_TRACE(trace_log, "Current_roles: {}, enabled_roles: {}", + boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "), + boost::algorithm::join(roles_info->getEnabledRolesNames(), ", ")); + } + LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", params.readonly, params.allow_ddl, params.allow_introspection); + LOG_TRACE(trace_log, "List of all grants: {}", final_access->toString()); + } + + access = final_access; } @@ -193,284 +192,6 @@ bool ContextAccess::isClientHostAllowed() const } -template -bool ContextAccess::calculateResultAccessAndCheck(Poco::Logger * log_, const AccessFlags & flags, const Args &... args) const -{ - auto access = calculateResultAccess(grant_option); - bool is_granted = access->isGranted(flags, args...); - - if (trace_log) - LOG_TRACE(trace_log, "Access {}: {}", (is_granted ? "granted" : "denied"), (AccessRightsElement{flags, args...}.toString())); - - if (is_granted) - return true; - - if constexpr (mode == RETURN_FALSE_IF_ACCESS_DENIED) - return false; - - if constexpr (mode == LOG_WARNING_IF_ACCESS_DENIED) - { - if (!log_) - return false; - } - - auto show_error = [&](const String & msg, [[maybe_unused]] int error_code) - { - if constexpr (mode == THROW_IF_ACCESS_DENIED) - throw Exception(user_name + ": " + msg, error_code); - else if constexpr (mode == LOG_WARNING_IF_ACCESS_DENIED) - LOG_WARNING(log_, "{}: {}{}", user_name, msg, formatSkippedMessage(args...)); - }; - - if (!user) - { - show_error("User has been dropped", ErrorCodes::UNKNOWN_USER); - } - else if (grant_option && calculateResultAccess(false, params.readonly, params.allow_ddl, params.allow_introspection)->isGranted(flags, args...)) - { - show_error( - "Not enough privileges. " - "The required privileges have been granted, but without grant option. " - "To execute this query it's necessary to have the grant " - + AccessRightsElement{flags, args...}.toString() + " WITH GRANT OPTION", - ErrorCodes::ACCESS_DENIED); - } - else if (params.readonly && calculateResultAccess(false, false, params.allow_ddl, params.allow_introspection)->isGranted(flags, args...)) - { - if (params.interface == ClientInfo::Interface::HTTP && params.http_method == ClientInfo::HTTPMethod::GET) - show_error( - "Cannot execute query in readonly mode. " - "For queries over HTTP, method GET implies readonly. You should use method POST for modifying queries", - ErrorCodes::READONLY); - else - show_error("Cannot execute query in readonly mode", ErrorCodes::READONLY); - } - else if (!params.allow_ddl && calculateResultAccess(false, params.readonly, true, params.allow_introspection)->isGranted(flags, args...)) - { - show_error("Cannot execute query. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); - } - else if (!params.allow_introspection && calculateResultAccess(false, params.readonly, params.allow_ddl, true)->isGranted(flags, args...)) - { - show_error("Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED); - } - else - { - show_error( - "Not enough privileges. To execute this query it's necessary to have the grant " - + AccessRightsElement{flags, args...}.toString() + (grant_option ? " WITH GRANT OPTION" : ""), - ErrorCodes::ACCESS_DENIED); - } - - return false; -} - - -template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags) const -{ - return calculateResultAccessAndCheck(log_, flags); -} - -template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const Args &... args) const -{ - if (database.empty()) - return calculateResultAccessAndCheck(log_, flags, params.current_database, args...); - else - return calculateResultAccessAndCheck(log_, flags, database, args...); -} - - -template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessRightsElement & element) const -{ - if (element.any_database) - { - return checkAccessImpl(log_, element.access_flags); - } - else if (element.any_table) - { - return checkAccessImpl(log_, element.access_flags, element.database); - } - else if (element.any_column) - { - return checkAccessImpl(log_, element.access_flags, element.database, element.table); - } - else - { - return checkAccessImpl(log_, element.access_flags, element.database, element.table, element.columns); - } -} - - -template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessRightsElements & elements) const -{ - for (const auto & element : elements) - if (!checkAccessImpl(log_, element)) - return false; - return true; -} - - -void ContextAccess::checkAccess(const AccessFlags & flags) const { checkAccessImpl(nullptr, flags); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database) const { checkAccessImpl(nullptr, flags, database); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { checkAccessImpl(nullptr, flags, database, table); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { checkAccessImpl(nullptr, flags, database, table, column); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { checkAccessImpl(nullptr, flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { checkAccessImpl(nullptr, flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(nullptr, element); } -void ContextAccess::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(nullptr, elements); } - -bool ContextAccess::isGranted(const AccessFlags & flags) const { return checkAccessImpl(nullptr, flags); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database) const { return checkAccessImpl(nullptr, flags, database); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return checkAccessImpl(nullptr, flags, database, table); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return checkAccessImpl(nullptr, flags, database, table, column); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return checkAccessImpl(nullptr, flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return checkAccessImpl(nullptr, flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessRightsElement & element) const { return checkAccessImpl(nullptr, element); } -bool ContextAccess::isGranted(const AccessRightsElements & elements) const { return checkAccessImpl(nullptr, elements); } - -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags) const { return checkAccessImpl(log_, flags); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database) const { return checkAccessImpl(log_, flags, database); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return checkAccessImpl(log_, flags, database, table); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return checkAccessImpl(log_, flags, database, table, column); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return checkAccessImpl(log_, flags, database, table, columns); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return checkAccessImpl(log_, flags, database, table, columns); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessRightsElement & element) const { return checkAccessImpl(log_, element); } -bool ContextAccess::isGranted(Poco::Logger * log_, const AccessRightsElements & elements) const { return checkAccessImpl(log_, elements); } - -void ContextAccess::checkGrantOption(const AccessFlags & flags) const { checkAccessImpl(nullptr, flags); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database) const { checkAccessImpl(nullptr, flags, database); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { checkAccessImpl(nullptr, flags, database, table); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { checkAccessImpl(nullptr, flags, database, table, column); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { checkAccessImpl(nullptr, flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { checkAccessImpl(nullptr, flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessRightsElement & element) const { checkAccessImpl(nullptr, element); } -void ContextAccess::checkGrantOption(const AccessRightsElements & elements) const { checkAccessImpl(nullptr, elements); } - - -void ContextAccess::checkAdminOption(const UUID & role_id) const -{ - if (isGranted(AccessType::ROLE_ADMIN)) - return; - - auto info = getRolesInfo(); - if (info && info->enabled_roles_with_admin_option.count(role_id)) - return; - - if (!user) - throw Exception(user_name + ": User has been dropped", ErrorCodes::UNKNOWN_USER); - - std::optional role_name = manager->readName(role_id); - if (!role_name) - role_name = "ID {" + toString(role_id) + "}"; - throw Exception( - getUserName() + ": Not enough privileges. To execute this query it's necessary to have the grant " + backQuoteIfNeed(*role_name) - + " WITH ADMIN OPTION ", - ErrorCodes::ACCESS_DENIED); -} - - -boost::shared_ptr ContextAccess::calculateResultAccess(bool grant_option) const -{ - return calculateResultAccess(grant_option, params.readonly, params.allow_ddl, params.allow_introspection); -} - - -boost::shared_ptr ContextAccess::calculateResultAccess(bool grant_option, UInt64 readonly_, bool allow_ddl_, bool allow_introspection_) const -{ - size_t index = static_cast(readonly_ != params.readonly) - + static_cast(allow_ddl_ != params.allow_ddl) * 2 + - + static_cast(allow_introspection_ != params.allow_introspection) * 3 - + static_cast(grant_option) * 4; - assert(index < std::size(result_access)); - auto res = result_access[index].load(); - if (res) - return res; - - std::lock_guard lock{mutex}; - res = result_access[index].load(); - if (res) - return res; - - auto merged_access = boost::make_shared(); - - if (grant_option) - { - *merged_access = user->access.access_with_grant_option; - if (roles_info) - merged_access->merge(roles_info->access_with_grant_option); - } - else - { - *merged_access = user->access.access; - if (roles_info) - merged_access->merge(roles_info->access); - } - - static const AccessFlags table_ddl = AccessType::CREATE_DATABASE | AccessType::CREATE_TABLE | AccessType::CREATE_VIEW - | AccessType::ALTER_TABLE | AccessType::ALTER_VIEW | AccessType::DROP_DATABASE | AccessType::DROP_TABLE | AccessType::DROP_VIEW - | AccessType::TRUNCATE; - - static const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; - static const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; - static const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; - static const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; - - if (readonly_) - merged_access->revoke(write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY); - - if (readonly_ == 1) - { - /// Table functions are forbidden in readonly mode. - /// For example, for readonly = 2 - allowed. - merged_access->revoke(AccessType::CREATE_TEMPORARY_TABLE); - } - - if (!allow_ddl_) - merged_access->revoke(table_and_dictionary_ddl); - - if (!allow_introspection_) - merged_access->revoke(AccessType::INTROSPECTION); - - /// Anyone has access to the "system" database. - merged_access->grant(AccessType::SELECT, DatabaseCatalog::SYSTEM_DATABASE); - - if (readonly_ != 1) - { - /// User has access to temporary or external table if such table was resolved in session or query context - merged_access->grant(AccessFlags::allTableFlags() | AccessFlags::allColumnFlags(), DatabaseCatalog::TEMPORARY_DATABASE); - } - - if (readonly_ && grant_option) - { - /// No grant option in readonly mode. - merged_access->revoke(AccessType::ALL); - } - - if (trace_log && (params.readonly == readonly_) && (params.allow_ddl == allow_ddl_) && (params.allow_introspection == allow_introspection_)) - { - if (grant_option) - LOG_TRACE(trace_log, "List of all grants: {} WITH GRANT OPTION", merged_access->toString()); - else - LOG_TRACE(trace_log, "List of all grants: {}", merged_access->toString()); - - if (roles_info && !roles_info->getCurrentRolesNames().empty()) - { - LOG_TRACE(trace_log, "Current_roles: {}, enabled_roles: {}", - boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "), - boost::algorithm::join(roles_info->getEnabledRolesNames(), ", ")); - } - LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", readonly_, allow_ddl_, allow_introspection_); - } - - res = std::move(merged_access); - result_access[index].store(res); - return res; -} - - UserPtr ContextAccess::getUser() const { std::lock_guard lock{mutex}; @@ -520,9 +241,7 @@ std::shared_ptr ContextAccess::getFullAccess() static const std::shared_ptr res = [] { auto full_access = std::shared_ptr(new ContextAccess); - auto everything_granted = boost::make_shared(); - everything_granted->grant(AccessType::ALL); - boost::range::fill(full_access->result_access, everything_granted); + full_access->access = std::make_shared(AccessRights::getFullAccess()); full_access->enabled_quota = EnabledQuota::getUnlimitedQuota(); return full_access; }(); @@ -543,4 +262,246 @@ std::shared_ptr ContextAccess::getSettingsConstraints return enabled_settings ? enabled_settings->getConstraints() : nullptr; } + +std::shared_ptr ContextAccess::getAccess() const +{ + std::lock_guard lock{mutex}; + return access; +} + + +template +bool ContextAccess::isGrantedImpl2(const AccessFlags & flags, const Args &... args) const +{ + bool access_granted; + if constexpr (grant_option) + access_granted = getAccess()->hasGrantOption(flags, args...); + else + access_granted = getAccess()->isGranted(flags, args...); + + if (trace_log) + LOG_TRACE(trace_log, "Access {}: {}{}", (access_granted ? "granted" : "denied"), (AccessRightsElement{flags, args...}.toString()), + (grant_option ? " WITH GRANT OPTION" : "")); + + return access_granted; +} + +template +bool ContextAccess::isGrantedImpl(const AccessFlags & flags) const +{ + return isGrantedImpl2(flags); +} + +template +bool ContextAccess::isGrantedImpl(const AccessFlags & flags, const std::string_view & database, const Args &... args) const +{ + return isGrantedImpl2(flags, database.empty() ? params.current_database : database, args...); +} + +template +bool ContextAccess::isGrantedImpl(const AccessRightsElement & element) const +{ + if (element.any_database) + return isGrantedImpl(element.access_flags); + else if (element.any_table) + return isGrantedImpl(element.access_flags, element.database); + else if (element.any_column) + return isGrantedImpl(element.access_flags, element.database, element.table); + else + return isGrantedImpl(element.access_flags, element.database, element.table, element.columns); +} + +template +bool ContextAccess::isGrantedImpl(const AccessRightsElements & elements) const +{ + for (const auto & element : elements) + if (!isGrantedImpl(element)) + return false; + return true; +} + +bool ContextAccess::isGranted(const AccessFlags & flags) const { return isGrantedImpl(flags); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool ContextAccess::isGranted(const AccessRightsElement & element) const { return isGrantedImpl(element); } +bool ContextAccess::isGranted(const AccessRightsElements & elements) const { return isGrantedImpl(elements); } + +bool ContextAccess::hasGrantOption(const AccessFlags & flags) const { return isGrantedImpl(flags); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(flags, database); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(flags, database, table); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(flags, database, table, column); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const AccessRightsElement & element) const { return isGrantedImpl(element); } +bool ContextAccess::hasGrantOption(const AccessRightsElements & elements) const { return isGrantedImpl(elements); } + + +template +void ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &... args) const +{ + if constexpr (grant_option) + { + if (hasGrantOption(flags, args...)) + return; + } + else + { + if (isGranted(flags, args...)) + return; + } + + auto show_error = [&](const String & msg, int error_code) + { + throw Exception(user_name + ": " + msg, error_code); + }; + + std::lock_guard lock{mutex}; + + if (!user) + { + show_error("User has been dropped", ErrorCodes::UNKNOWN_USER); + } + + if (grant_option && access->isGranted(flags, args...)) + { + show_error( + "Not enough privileges. " + "The required privileges have been granted, but without grant option. " + "To execute this query it's necessary to have the grant " + + AccessRightsElement{flags, args...}.toString() + " WITH GRANT OPTION", + ErrorCodes::ACCESS_DENIED); + } + + if (params.readonly) + { + if (!access_without_readonly) + { + Params changed_params = params; + changed_params.readonly = 0; + access_without_readonly = manager->getContextAccess(changed_params); + } + + if (access_without_readonly->isGranted(flags, args...)) + { + if (params.interface == ClientInfo::Interface::HTTP && params.http_method == ClientInfo::HTTPMethod::GET) + show_error( + "Cannot execute query in readonly mode. " + "For queries over HTTP, method GET implies readonly. You should use method POST for modifying queries", + ErrorCodes::READONLY); + else + show_error("Cannot execute query in readonly mode", ErrorCodes::READONLY); + } + } + + if (!params.allow_ddl) + { + if (!access_with_allow_ddl) + { + Params changed_params = params; + changed_params.allow_ddl = true; + access_with_allow_ddl = manager->getContextAccess(changed_params); + } + + if (access_with_allow_ddl->isGranted(flags, args...)) + { + show_error("Cannot execute query. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); + } + } + + if (!params.allow_introspection) + { + if (!access_with_allow_introspection) + { + Params changed_params = params; + changed_params.allow_introspection = true; + access_with_allow_introspection = manager->getContextAccess(changed_params); + } + + if (access_with_allow_introspection->isGranted(flags, args...)) + { + show_error("Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED); + } + } + + show_error( + "Not enough privileges. To execute this query it's necessary to have the grant " + + AccessRightsElement{flags, args...}.toString() + (grant_option ? " WITH GRANT OPTION" : ""), + ErrorCodes::ACCESS_DENIED); +} + +template +void ContextAccess::checkAccessImpl(const AccessFlags & flags) const +{ + checkAccessImpl2(flags); +} + +template +void ContextAccess::checkAccessImpl(const AccessFlags & flags, const std::string_view & database, const Args &... args) const +{ + checkAccessImpl2(flags, database.empty() ? params.current_database : database, args...); +} + +template +void ContextAccess::checkAccessImpl(const AccessRightsElement & element) const +{ + if (element.any_database) + checkAccessImpl(element.access_flags); + else if (element.any_table) + checkAccessImpl(element.access_flags, element.database); + else if (element.any_column) + checkAccessImpl(element.access_flags, element.database, element.table); + else + checkAccessImpl(element.access_flags, element.database, element.table, element.columns); +} + +template +void ContextAccess::checkAccessImpl(const AccessRightsElements & elements) const +{ + for (const auto & element : elements) + checkAccessImpl(element); +} + +void ContextAccess::checkAccess(const AccessFlags & flags) const { checkAccessImpl(flags); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database) const { checkAccessImpl(flags, database); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { checkAccessImpl(flags, database, table); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { checkAccessImpl(flags, database, table, column); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } +void ContextAccess::checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } +void ContextAccess::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } +void ContextAccess::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } + +void ContextAccess::checkGrantOption(const AccessFlags & flags) const { checkAccessImpl(flags); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database) const { checkAccessImpl(flags, database); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { checkAccessImpl(flags, database, table); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { checkAccessImpl(flags, database, table, column); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } +void ContextAccess::checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } +void ContextAccess::checkGrantOption(const AccessRightsElement & element) const { checkAccessImpl(element); } +void ContextAccess::checkGrantOption(const AccessRightsElements & elements) const { checkAccessImpl(elements); } + + +void ContextAccess::checkAdminOption(const UUID & role_id) const +{ + if (isGranted(AccessType::ROLE_ADMIN)) + return; + + auto info = getRolesInfo(); + if (info && info->enabled_roles_with_admin_option.count(role_id)) + return; + + if (!user) + throw Exception(user_name + ": User has been dropped", ErrorCodes::UNKNOWN_USER); + + std::optional role_name = manager->readName(role_id); + if (!role_name) + role_name = "ID {" + toString(role_id) + "}"; + throw Exception( + getUserName() + ": Not enough privileges. To execute this query it's necessary to have the grant " + backQuoteIfNeed(*role_name) + + " WITH ADMIN OPTION ", + ErrorCodes::ACCESS_DENIED); +} + } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 27bb29a878c..997ea585c68 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -30,32 +29,34 @@ class IAST; using ASTPtr = std::shared_ptr; +struct ContextAccessParams +{ + std::optional user_id; + boost::container::flat_set current_roles; + bool use_default_roles = false; + UInt64 readonly = 0; + bool allow_ddl = false; + bool allow_introspection = false; + String current_database; + ClientInfo::Interface interface = ClientInfo::Interface::TCP; + ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN; + Poco::Net::IPAddress address; + String quota_key; + + auto toTuple() const { return std::tie(user_id, current_roles, use_default_roles, readonly, allow_ddl, allow_introspection, current_database, interface, http_method, address, quota_key); } + friend bool operator ==(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return lhs.toTuple() == rhs.toTuple(); } + friend bool operator !=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(lhs == rhs); } + friend bool operator <(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return lhs.toTuple() < rhs.toTuple(); } + friend bool operator >(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return rhs < lhs; } + friend bool operator <=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(rhs < lhs); } + friend bool operator >=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(lhs < rhs); } +}; + + class ContextAccess { public: - struct Params - { - std::optional user_id; - boost::container::flat_set current_roles; - bool use_default_roles = false; - UInt64 readonly = 0; - bool allow_ddl = false; - bool allow_introspection = false; - String current_database; - ClientInfo::Interface interface = ClientInfo::Interface::TCP; - ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN; - Poco::Net::IPAddress address; - String quota_key; - - auto toTuple() const { return std::tie(user_id, current_roles, use_default_roles, readonly, allow_ddl, allow_introspection, current_database, interface, http_method, address, quota_key); } - friend bool operator ==(const Params & lhs, const Params & rhs) { return lhs.toTuple() == rhs.toTuple(); } - friend bool operator !=(const Params & lhs, const Params & rhs) { return !(lhs == rhs); } - friend bool operator <(const Params & lhs, const Params & rhs) { return lhs.toTuple() < rhs.toTuple(); } - friend bool operator >(const Params & lhs, const Params & rhs) { return rhs < lhs; } - friend bool operator <=(const Params & lhs, const Params & rhs) { return !(rhs < lhs); } - friend bool operator >=(const Params & lhs, const Params & rhs) { return !(lhs < rhs); } - }; - + using Params = ContextAccessParams; const Params & getParams() const { return params; } /// Returns the current user. The function can return nullptr. @@ -90,16 +91,8 @@ public: /// The function returns nullptr if there are no constraints. std::shared_ptr getSettingsConstraints() const; - /// Checks if a specified access is granted, and throws an exception if not. - /// Empty database means the current database. - void checkAccess(const AccessFlags & flags) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; - void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; - void checkAccess(const AccessRightsElement & element) const; - void checkAccess(const AccessRightsElements & elements) const; + /// Returns the current access rights. + std::shared_ptr getAccess() const; /// Checks if a specified access is granted. bool isGranted(const AccessFlags & flags) const; @@ -111,17 +104,26 @@ public: bool isGranted(const AccessRightsElement & element) const; bool isGranted(const AccessRightsElements & elements) const; - /// Checks if a specified access is granted, and logs a warning if not. - bool isGranted(Poco::Logger * log_, const AccessFlags & flags) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; - bool isGranted(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; - bool isGranted(Poco::Logger * log_, const AccessRightsElement & element) const; - bool isGranted(Poco::Logger * log_, const AccessRightsElements & elements) const; + bool hasGrantOption(const AccessFlags & flags) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + bool hasGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; + bool hasGrantOption(const AccessRightsElement & element) const; + bool hasGrantOption(const AccessRightsElements & elements) const; + + /// Checks if a specified access is granted, and throws an exception if not. + /// Empty database means the current database. + void checkAccess(const AccessFlags & flags) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + void checkAccess(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; + void checkAccess(const AccessRightsElement & element) const; + void checkAccess(const AccessRightsElements & elements) const; - /// Checks if a specified access is granted with grant option, and throws an exception if not. void checkGrantOption(const AccessFlags & flags) const; void checkGrantOption(const AccessFlags & flags, const std::string_view & database) const; void checkGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; @@ -146,24 +148,37 @@ private: void setUser(const UserPtr & user_) const; void setRolesInfo(const std::shared_ptr & roles_info_) const; void setSettingsAndConstraints() const; + void setFinalAccess() const; - template - bool checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags) const; + template + bool isGrantedImpl(const AccessFlags & flags) const; - template - bool checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const Args &... args) const; + template + bool isGrantedImpl(const AccessFlags & flags, const std::string_view & database, const Args &... args) const; - template - bool checkAccessImpl(Poco::Logger * log_, const AccessRightsElement & element) const; + template + bool isGrantedImpl(const AccessRightsElement & element) const; - template - bool checkAccessImpl(Poco::Logger * log_, const AccessRightsElements & elements) const; + template + bool isGrantedImpl(const AccessRightsElements & elements) const; - template - bool calculateResultAccessAndCheck(Poco::Logger * log_, const AccessFlags & flags, const Args &... args) const; + template + bool isGrantedImpl2(const AccessFlags & flags, const Args &... args) const; - boost::shared_ptr calculateResultAccess(bool grant_option) const; - boost::shared_ptr calculateResultAccess(bool grant_option, UInt64 readonly_, bool allow_ddl_, bool allow_introspection_) const; + template + void checkAccessImpl(const AccessFlags & flags) const; + + template + void checkAccessImpl(const AccessFlags & flags, const std::string_view & database, const Args &... args) const; + + template + void checkAccessImpl(const AccessRightsElement & element) const; + + template + void checkAccessImpl(const AccessRightsElements & elements) const; + + template + void checkAccessImpl2(const AccessFlags & flags, const Args &... args) const; const AccessControlManager * manager = nullptr; const Params params; @@ -174,10 +189,13 @@ private: mutable std::shared_ptr enabled_roles; mutable ext::scope_guard subscription_for_roles_changes; mutable std::shared_ptr roles_info; - mutable boost::atomic_shared_ptr result_access[7]; + mutable std::shared_ptr access; mutable std::shared_ptr enabled_row_policies; mutable std::shared_ptr enabled_quota; mutable std::shared_ptr enabled_settings; + mutable std::shared_ptr access_without_readonly; + mutable std::shared_ptr access_with_allow_ddl; + mutable std::shared_ptr access_with_allow_introspection; mutable std::mutex mutex; }; diff --git a/src/Access/EnabledRolesInfo.cpp b/src/Access/EnabledRolesInfo.cpp index 01b90d6fa1e..8069da467ad 100644 --- a/src/Access/EnabledRolesInfo.cpp +++ b/src/Access/EnabledRolesInfo.cpp @@ -28,8 +28,7 @@ bool operator==(const EnabledRolesInfo & lhs, const EnabledRolesInfo & rhs) { return (lhs.current_roles == rhs.current_roles) && (lhs.enabled_roles == rhs.enabled_roles) && (lhs.enabled_roles_with_admin_option == rhs.enabled_roles_with_admin_option) && (lhs.names_of_roles == rhs.names_of_roles) - && (lhs.access == rhs.access) && (lhs.access_with_grant_option == rhs.access_with_grant_option) - && (lhs.settings_from_enabled_roles == rhs.settings_from_enabled_roles); + && (lhs.access == rhs.access) && (lhs.settings_from_enabled_roles == rhs.settings_from_enabled_roles); } } diff --git a/src/Access/EnabledRolesInfo.h b/src/Access/EnabledRolesInfo.h index 45e1bfd9057..f06b7478daf 100644 --- a/src/Access/EnabledRolesInfo.h +++ b/src/Access/EnabledRolesInfo.h @@ -18,7 +18,6 @@ struct EnabledRolesInfo boost::container::flat_set enabled_roles_with_admin_option; std::unordered_map names_of_roles; AccessRights access; - AccessRights access_with_grant_option; SettingsProfileElements settings_from_enabled_roles; Strings getCurrentRolesNames() const; diff --git a/src/Access/GrantedAccess.cpp b/src/Access/GrantedAccess.cpp deleted file mode 100644 index 2af1e0b44ec..00000000000 --- a/src/Access/GrantedAccess.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include - - -namespace DB -{ - -GrantedAccess::GrantsAndPartialRevokes GrantedAccess::getGrantsAndPartialRevokes() const -{ - GrantsAndPartialRevokes res; - res.grants_with_grant_option = access_with_grant_option.getGrants(); - AccessRights access_without_gg = access; - access_without_gg.revoke(res.grants_with_grant_option); - auto gr = access_without_gg.getGrantsAndPartialRevokes(); - res.grants = std::move(gr.grants); - res.revokes = std::move(gr.revokes); - AccessRights access_with_grant_options_without_r = access_with_grant_option; - access_with_grant_options_without_r.grant(res.revokes); - res.revokes_grant_option = access_with_grant_options_without_r.getPartialRevokes(); - return res; -} - -} diff --git a/src/Access/GrantedAccess.h b/src/Access/GrantedAccess.h deleted file mode 100644 index b8f6bdfe8fb..00000000000 --- a/src/Access/GrantedAccess.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ -/// Access rights as they are granted to a role or user. -/// Stores both the access rights themselves and the access rights with grant option. -struct GrantedAccess -{ - AccessRights access; - AccessRights access_with_grant_option; - - template - void grant(const Args &... args) - { - access.grant(args...); - } - - template - void grantWithGrantOption(const Args &... args) - { - access.grant(args...); - access_with_grant_option.grant(args...); - } - - template - void revoke(const Args &... args) - { - access.revoke(args...); - access_with_grant_option.revoke(args...); - } - - template - void revokeGrantOption(const Args &... args) - { - access_with_grant_option.revoke(args...); - } - - struct GrantsAndPartialRevokes - { - AccessRightsElements grants; - AccessRightsElements revokes; - AccessRightsElements grants_with_grant_option; - AccessRightsElements revokes_grant_option; - }; - - /// Retrieves the information about grants and partial revokes. - GrantsAndPartialRevokes getGrantsAndPartialRevokes() const; - - friend bool operator ==(const GrantedAccess & left, const GrantedAccess & right) { return (left.access == right.access) && (left.access_with_grant_option == right.access_with_grant_option); } - friend bool operator !=(const GrantedAccess & left, const GrantedAccess & right) { return !(left == right); } -}; -} diff --git a/src/Access/Role.h b/src/Access/Role.h index 9acb97bdfbd..131bbd69195 100644 --- a/src/Access/Role.h +++ b/src/Access/Role.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -11,7 +11,7 @@ namespace DB struct Role : public IAccessEntity { - GrantedAccess access; + AccessRights access; GrantedRoles granted_roles; SettingsProfileElements settings; diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index ca8065145f3..a0468958d42 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -43,8 +43,7 @@ namespace roles_info.enabled_roles_with_admin_option.emplace(role_id); roles_info.names_of_roles[role_id] = role->getName(); - roles_info.access.merge(role->access.access); - roles_info.access_with_grant_option.merge(role->access.access_with_grant_option); + roles_info.access.merge(role->access); roles_info.settings_from_enabled_roles.merge(role->settings); for (const auto & granted_role : role->granted_roles.roles) diff --git a/src/Access/User.h b/src/Access/User.h index 4852fce375d..13f1e532015 100644 --- a/src/Access/User.h +++ b/src/Access/User.h @@ -1,9 +1,9 @@ #pragma once #include +#include #include #include -#include #include #include #include @@ -17,7 +17,7 @@ struct User : public IAccessEntity { Authentication authentication; AllowedClientHosts allowed_client_hosts = AllowedClientHosts::AnyHostTag{}; - GrantedAccess access; + AccessRights access; GrantedRoles granted_roles; RolesOrUsersSet default_roles = RolesOrUsersSet::AllTag{}; SettingsProfileElements settings; diff --git a/src/Access/ya.make b/src/Access/ya.make index bdd62ae2b7b..77c94b87dfa 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -17,7 +17,6 @@ SRCS( EnabledRolesInfo.cpp EnabledRowPolicies.cpp EnabledSettings.cpp - GrantedAccess.cpp GrantedRoles.cpp IAccessEntity.cpp IAccessStorage.cpp diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 28436f192b0..2278c0e452f 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1278,7 +1279,7 @@ private: }; -BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, AccessRightsElements && query_required_access) +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option) { /// Remove FORMAT and INTO OUTFILE if exists ASTPtr query_ptr = query_ptr_->clone(); @@ -1323,10 +1324,10 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont /// the local current database or a shard's default database. bool need_replace_current_database = (std::find_if( - query_required_access.begin(), - query_required_access.end(), + query_requires_access.begin(), + query_requires_access.end(), [](const AccessRightsElement & elem) { return elem.isEmptyDatabase(); }) - != query_required_access.end()); + != query_requires_access.end()); if (need_replace_current_database) { @@ -1355,29 +1356,31 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont AddDefaultDatabaseVisitor visitor(current_database); visitor.visitDDL(query_ptr); - query_required_access.replaceEmptyDatabase(current_database); + query_requires_access.replaceEmptyDatabase(current_database); } else { - size_t old_num_elements = query_required_access.size(); - for (size_t i = 0; i != old_num_elements; ++i) + for (size_t i = 0; i != query_requires_access.size();) { - auto & element = query_required_access[i]; + auto & element = query_requires_access[i]; if (element.isEmptyDatabase()) { - element.setDatabase(shard_default_databases[0]); - for (size_t j = 1; j != shard_default_databases.size(); ++j) - { - query_required_access.push_back(element); - query_required_access.back().setDatabase(shard_default_databases[j]); - } + query_requires_access.insert(query_requires_access.begin() + i + 1, shard_default_databases.size() - 1, element); + for (size_t j = 0; j != shard_default_databases.size(); ++j) + query_requires_access[i + j].replaceEmptyDatabase(shard_default_databases[j]); + i += shard_default_databases.size(); } + else + ++i; } } } /// Check access rights, assume that all servers have the same users config - context.checkAccess(query_required_access); + if (query_requires_grant_option) + context.getAccess()->checkGrantOption(query_requires_access); + else + context.checkAccess(query_requires_access); DDLLogEntry entry; entry.hosts = std::move(hosts); @@ -1394,6 +1397,10 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont return io; } +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option) +{ + return executeDDLQueryOnCluster(query_ptr, context, AccessRightsElements{query_requires_access}, query_requires_grant_option); +} BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context) { diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index d764eab626f..544fb3da27d 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -29,8 +29,9 @@ struct DDLTask; /// Pushes distributed DDL query to the queue -BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_required_access); BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context); +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const AccessRightsElements & query_requires_access, bool query_requires_grant_option = false); +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, AccessRightsElements && query_requires_access, bool query_requires_grant_option = false); class DDLWorker diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index 8981c06f962..b7c62197059 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -16,7 +16,7 @@ namespace DB namespace { template - void updateFromQueryImpl(T & grantee, const ASTGrantQuery & query, const std::vector & roles_from_query, const String & current_database) + void updateFromQueryImpl(T & grantee, const ASTGrantQuery & query, const std::vector & roles_from_query) { using Kind = ASTGrantQuery::Kind; if (!query.access_rights_elements.empty()) @@ -24,16 +24,16 @@ namespace if (query.kind == Kind::GRANT) { if (query.grant_option) - grantee.access.grantWithGrantOption(query.access_rights_elements, current_database); + grantee.access.grantWithGrantOption(query.access_rights_elements); else - grantee.access.grant(query.access_rights_elements, current_database); + grantee.access.grant(query.access_rights_elements); } else { if (query.grant_option) - grantee.access.revokeGrantOption(query.access_rights_elements, current_database); + grantee.access.revokeGrantOption(query.access_rights_elements); else - grantee.access.revoke(query.access_rights_elements, current_database); + grantee.access.revoke(query.access_rights_elements); } } @@ -67,9 +67,9 @@ namespace BlockIO InterpreterGrantQuery::execute() { auto & query = query_ptr->as(); - auto & access_control = context.getAccessControlManager(); + query.replaceCurrentUserTagWithName(context.getUserName()); auto access = context.getAccess(); - access->checkGrantOption(query.access_rights_elements); + auto & access_control = context.getAccessControlManager(); std::vector roles_from_query; if (query.roles) @@ -80,25 +80,24 @@ BlockIO InterpreterGrantQuery::execute() } if (!query.cluster.empty()) - { - query.replaceCurrentUserTagWithName(context.getUserName()); - return executeDDLQueryOnCluster(query_ptr, context); - } + return executeDDLQueryOnCluster(query_ptr, context, query.access_rights_elements, true); + + query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); + access->checkGrantOption(query.access_rights_elements); std::vector to_roles = RolesOrUsersSet{*query.to_roles, access_control, context.getUserID()}.getMatchingIDs(access_control); - String current_database = context.getCurrentDatabase(); auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto clone = entity->clone(); if (auto user = typeid_cast>(clone)) { - updateFromQueryImpl(*user, query, roles_from_query, current_database); + updateFromQueryImpl(*user, query, roles_from_query); return user; } else if (auto role = typeid_cast>(clone)) { - updateFromQueryImpl(*role, query, roles_from_query, current_database); + updateFromQueryImpl(*role, query, roles_from_query); return role; } else @@ -116,7 +115,7 @@ void InterpreterGrantQuery::updateUserFromQuery(User & user, const ASTGrantQuery std::vector roles_from_query; if (query.roles) roles_from_query = RolesOrUsersSet{*query.roles}.getMatchingIDs(); - updateFromQueryImpl(user, query, roles_from_query, {}); + updateFromQueryImpl(user, query, roles_from_query); } @@ -125,7 +124,7 @@ void InterpreterGrantQuery::updateRoleFromQuery(Role & role, const ASTGrantQuery std::vector roles_from_query; if (query.roles) roles_from_query = RolesOrUsersSet{*query.roles}.getMatchingIDs(); - updateFromQueryImpl(role, query, roles_from_query, {}); + updateFromQueryImpl(role, query, roles_from_query); } } diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 82c134aeba6..80710600db6 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -84,19 +84,20 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce const ColumnString & user_col = typeid_cast(*processes_block.getByName("user").column); const ClientInfo & my_client = context.getProcessListElement()->getClientInfo(); - std::optional can_kill_query_started_by_another_user_cached; - auto can_kill_query_started_by_another_user = [&]() -> bool + bool access_denied = false; + std::optional is_kill_query_granted_value; + auto is_kill_query_granted = [&]() -> bool { - if (!can_kill_query_started_by_another_user_cached) + if (!is_kill_query_granted_value) { - can_kill_query_started_by_another_user_cached - = context.getAccess()->isGranted(&Poco::Logger::get("InterpreterKillQueryQuery"), AccessType::KILL_QUERY); + is_kill_query_granted_value = context.getAccess()->isGranted(AccessType::KILL_QUERY); + if (!*is_kill_query_granted_value) + access_denied = true; } - return *can_kill_query_started_by_another_user_cached; + return *is_kill_query_granted_value; }; String query_user; - bool access_denied = false; for (size_t i = 0; i < num_processes; ++i) { @@ -107,11 +108,8 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce auto query_id = query_id_col.getDataAt(i).toString(); query_user = user_col.getDataAt(i).toString(); - if ((my_client.current_user != query_user) && !can_kill_query_started_by_another_user()) - { - access_denied = true; + if ((my_client.current_user != query_user) && !is_kill_query_granted()) continue; - } res.emplace_back(std::move(query_id), query_user, i, false); } @@ -269,7 +267,7 @@ BlockIO InterpreterKillQueryQuery::execute() ParserAlterCommand parser; auto command_ast = parseQuery(parser, command_col.getDataAt(i).toString(), 0, context.getSettingsRef().max_parser_depth); required_access_rights = InterpreterAlterQuery::getRequiredAccessForCommand(command_ast->as(), table_id.database_name, table_id.table_name); - if (!access->isGranted(&Poco::Logger::get("InterpreterKillQueryQuery"), required_access_rights)) + if (!access->isGranted(required_access_rights)) { access_denied = true; continue; diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/InterpreterShowGrantsQuery.cpp index ebb0d871c8b..45e065dcfd9 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/InterpreterShowGrantsQuery.cpp @@ -35,44 +35,33 @@ namespace std::shared_ptr to_roles = std::make_shared(); to_roles->names.push_back(grantee.getName()); - auto grants_and_partial_revokes = grantee.access.getGrantsAndPartialRevokes(); + std::shared_ptr current_query = nullptr; - for (bool grant_option : {false, true}) + auto elements = grantee.access.getElements(); + for (const auto & element : elements) { - using Kind = ASTGrantQuery::Kind; - for (Kind kind : {Kind::GRANT, Kind::REVOKE}) + if (current_query) { - AccessRightsElements * elements = nullptr; - if (grant_option) - elements = (kind == Kind::GRANT) ? &grants_and_partial_revokes.grants_with_grant_option : &grants_and_partial_revokes.revokes_grant_option; - else - elements = (kind == Kind::GRANT) ? &grants_and_partial_revokes.grants : &grants_and_partial_revokes.revokes; - elements->normalize(); - - std::shared_ptr grant_query = nullptr; - for (size_t i = 0; i != elements->size(); ++i) - { - const auto & element = (*elements)[i]; - bool prev_element_on_same_db_and_table = false; - if (grant_query) - { - const auto & prev_element = grant_query->access_rights_elements.back(); - if ((element.database == prev_element.database) && (element.any_database == prev_element.any_database) - && (element.table == prev_element.table) && (element.any_table == prev_element.any_table)) - prev_element_on_same_db_and_table = true; - } - if (!prev_element_on_same_db_and_table) - { - grant_query = std::make_shared(); - grant_query->kind = kind; - grant_query->attach = attach_mode; - grant_query->grant_option = grant_option; - grant_query->to_roles = to_roles; - res.push_back(grant_query); - } - grant_query->access_rights_elements.emplace_back(std::move(element)); - } + const auto & prev_element = current_query->access_rights_elements.back(); + bool continue_using_current_query = (element.database == prev_element.database) + && (element.any_database == prev_element.any_database) && (element.table == prev_element.table) + && (element.any_table == prev_element.any_table) && (element.grant_option == current_query->grant_option) + && (element.kind == current_query->kind); + if (!continue_using_current_query) + current_query = nullptr; } + + if (!current_query) + { + current_query = std::make_shared(); + current_query->kind = element.kind; + current_query->attach = attach_mode; + current_query->grant_option = element.grant_option; + current_query->to_roles = to_roles; + res.push_back(current_query); + } + + current_query->access_rights_elements.emplace_back(std::move(element)); } auto grants_roles = grantee.granted_roles.getGrants(); diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9b1712ac407..7c80b681114 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -152,8 +152,16 @@ void InterpreterSystemQuery::startStopAction(StorageActionBlockType action_type, if (!table) continue; - if (!access->isGranted(log, getRequiredAccessType(action_type), elem.first, iterator->name())) + if (!access->isGranted(getRequiredAccessType(action_type), elem.first, iterator->name())) + { + LOG_INFO( + log, + "Access {} denied, skipping {}.{}", + toString(getRequiredAccessType(action_type)), + elem.first, + iterator->name()); continue; + } if (start) manager->remove(table, action_type); diff --git a/src/Interpreters/tests/users.cpp b/src/Interpreters/tests/users.cpp index 5c7d66ed7ed..acd0cfd0519 100644 --- a/src/Interpreters/tests/users.cpp +++ b/src/Interpreters/tests/users.cpp @@ -218,7 +218,7 @@ void runOneTest(const TestDescriptor & test_descriptor) try { - res = acl_manager.read(entry.user_name)->access.access.isGranted(DB::AccessType::ALL, entry.database_name); + res = acl_manager.read(entry.user_name)->access.isGranted(DB::AccessType::ALL, entry.database_name); } catch (const Poco::Exception &) { diff --git a/src/Parsers/ASTGrantQuery.cpp b/src/Parsers/ASTGrantQuery.cpp index cf1943477b2..ae9649cdddc 100644 --- a/src/Parsers/ASTGrantQuery.cpp +++ b/src/Parsers/ASTGrantQuery.cpp @@ -133,6 +133,12 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F } +void ASTGrantQuery::replaceEmptyDatabaseWithCurrent(const String & current_database) +{ + access_rights_elements.replaceEmptyDatabase(current_database); +} + + void ASTGrantQuery::replaceCurrentUserTagWithName(const String & current_user_name) const { if (to_roles) diff --git a/src/Parsers/ASTGrantQuery.h b/src/Parsers/ASTGrantQuery.h index 9a11f5dc509..c36e42689a5 100644 --- a/src/Parsers/ASTGrantQuery.h +++ b/src/Parsers/ASTGrantQuery.h @@ -19,11 +19,7 @@ class ASTRolesOrUsersSet; class ASTGrantQuery : public IAST, public ASTQueryWithOnCluster { public: - enum class Kind - { - GRANT, - REVOKE, - }; + using Kind = AccessRightsElementWithOptions::Kind; Kind kind = Kind::GRANT; bool attach = false; AccessRightsElements access_rights_elements; @@ -35,6 +31,7 @@ public: String getID(char) const override; ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + void replaceEmptyDatabaseWithCurrent(const String & current_database); void replaceCurrentUserTagWithName(const String & current_user_name) const; ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster(clone()); } }; diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index a663e3307fe..360256c1f45 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,7 @@ namespace DB { using EntityType = IAccessEntity::Type; - +using Kind = AccessRightsElementWithOptions::Kind; NamesAndTypesList StorageSystemGrants::getNamesAndTypes() { @@ -63,7 +64,7 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & const String * database, const String * table, const String * column, - bool is_partial_revoke, + Kind kind, bool grant_option) { if (grantee_type == EntityType::USER) @@ -118,15 +119,13 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & column_column_null_map.push_back(true); } - column_is_partial_revoke.push_back(is_partial_revoke); + column_is_partial_revoke.push_back(kind == Kind::REVOKE); column_grant_option.push_back(grant_option); }; auto add_rows = [&](const String & grantee_name, IAccessEntity::Type grantee_type, - const AccessRightsElements & elements, - bool is_partial_revoke, - bool grant_option) + const AccessRightsElementsWithOptions & elements) { for (const auto & element : elements) { @@ -140,13 +139,13 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & if (element.any_column) { for (const auto & access_type : access_types) - add_row(grantee_name, grantee_type, access_type, database, table, nullptr, is_partial_revoke, grant_option); + add_row(grantee_name, grantee_type, access_type, database, table, nullptr, element.kind, element.grant_option); } else { for (const auto & access_type : access_types) for (const auto & column : element.columns) - add_row(grantee_name, grantee_type, access_type, database, table, &column, is_partial_revoke, grant_option); + add_row(grantee_name, grantee_type, access_type, database, table, &column, element.kind, element.grant_option); } } }; @@ -157,7 +156,7 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & if (!entity) continue; - const GrantedAccess * access = nullptr; + const AccessRights * access = nullptr; if (auto role = typeid_cast(entity)) access = &role->access; else if (auto user = typeid_cast(entity)) @@ -167,13 +166,8 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, const Context & const String & grantee_name = entity->getName(); const auto grantee_type = entity->getType(); - auto grants_and_revokes = access->access.getGrantsAndPartialRevokes(); - auto grants_and_revokes_with_grant_option = access->access_with_grant_option.getGrantsAndPartialRevokes(); - - add_rows(grantee_name, grantee_type, grants_and_revokes.grants, /* is_partial_revoke = */ false, /* grant_option = */ false); - add_rows(grantee_name, grantee_type, grants_and_revokes.revokes, /* is_partial_revoke = */ true, /* grant_option = */ false); - add_rows(grantee_name, grantee_type, grants_and_revokes_with_grant_option.grants, /* is_partial_revoke = */ false, /* grant_option = */ true); - add_rows(grantee_name, grantee_type, grants_and_revokes_with_grant_option.revokes, /* is_partial_revoke = */ true, /* grant_option = */ true); + auto elements = access->getElements(); + add_rows(grantee_name, grantee_type, elements); } } diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index ae75f69d28a..e1bc99ca75b 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -100,7 +100,6 @@ def test_introspection(): assert instance.query("SELECT * from system.grants WHERE user_name IN ('A', 'B') ORDER BY user_name, access_type, grant_option") ==\ TSV([[ "A", "\N", "SELECT", "test", "table", "\N", 0, 0 ], - [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 0 ], [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 1 ]]) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index e668b461389..5fb521fc1ff 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -140,7 +140,6 @@ def test_introspection(): assert instance.query("SELECT * from system.grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, access_type, grant_option") ==\ TSV([[ "A", "\N", "SELECT", "test", "table", "\N", 0, 0 ], - [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 0 ], [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 1 ], [ "\N", "R2", "SELECT", "test", "table", "\N", 0, 0 ], [ "\N", "R2", "SELECT", "test", "table", "x", 1, 0 ]]) diff --git a/tests/queries/0_stateless/01073_grant_and_revoke.reference b/tests/queries/0_stateless/01073_grant_and_revoke.reference index 134256c8113..a19caf19533 100644 --- a/tests/queries/0_stateless/01073_grant_and_revoke.reference +++ b/tests/queries/0_stateless/01073_grant_and_revoke.reference @@ -1,11 +1,11 @@ CREATE USER test_user_01073 A B +GRANT INSERT, ALTER DELETE ON *.* TO test_user_01073 GRANT SELECT ON db1.* TO test_user_01073 GRANT SELECT ON db2.table TO test_user_01073 GRANT SELECT(col1) ON db3.table TO test_user_01073 GRANT SELECT(col1, col2) ON db4.table TO test_user_01073 -GRANT INSERT, ALTER DELETE ON *.* TO test_user_01073 C -GRANT SELECT(col1) ON db4.table TO test_user_01073 GRANT ALTER DELETE ON *.* TO test_user_01073 +GRANT SELECT(col1) ON db4.table TO test_user_01073 diff --git a/tests/queries/0_stateless/01074_partial_revokes.reference b/tests/queries/0_stateless/01074_partial_revokes.reference index 19a70679143..43e44f3c941 100644 --- a/tests/queries/0_stateless/01074_partial_revokes.reference +++ b/tests/queries/0_stateless/01074_partial_revokes.reference @@ -1,2 +1,61 @@ +--simple 1 GRANT SELECT ON *.* TO test_user_01074 REVOKE SELECT ON db.* FROM test_user_01074 +--cleanup +--simple 2 +GRANT SELECT ON db.* TO test_user_01074 +REVOKE SELECT ON db.table FROM test_user_01074 +--cleanup +--simple 3 +GRANT SELECT ON db.table TO test_user_01074 +REVOKE SELECT(col1) ON db.table FROM test_user_01074 +--cleanup +--complex 1 +GRANT SELECT ON *.* TO test_user_01074 +REVOKE SELECT(col1, col2) ON db.table FROM test_user_01074 +--cleanup +--complex 2 +GRANT SELECT ON *.* TO test_user_01074 +REVOKE SELECT ON db.* FROM test_user_01074 +GRANT SELECT ON db.table TO test_user_01074 +REVOKE SELECT(col1) ON db.table FROM test_user_01074 +┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ +┃ user_name  ┃ role_name ┃ access_type ┃ database ┃ table ┃ column ┃ is_partial_revoke ┃ grant_option ┃ +┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ +│ test_user_01074 │ ᴺᵁᴸᴸ │ SELECT │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ 0 │ +├─────────────────┼───────────┼─────────────┼──────────┼───────┼────────┼───────────────────┼──────────────┤ +│ test_user_01074 │ ᴺᵁᴸᴸ │ SELECT │ db │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 1 │ 0 │ +├─────────────────┼───────────┼─────────────┼──────────┼───────┼────────┼───────────────────┼──────────────┤ +│ test_user_01074 │ ᴺᵁᴸᴸ │ SELECT │ db │ table │ ᴺᵁᴸᴸ │ 0 │ 0 │ +├─────────────────┼───────────┼─────────────┼──────────┼───────┼────────┼───────────────────┼──────────────┤ +│ test_user_01074 │ ᴺᵁᴸᴸ │ SELECT │ db │ table │ col1 │ 1 │ 0 │ +└─────────────────┴───────────┴─────────────┴──────────┴───────┴────────┴───────────────────┴──────────────┘ +--cleanup +--revoke 1 +GRANT SELECT ON *.* TO test_user_01074 +REVOKE SELECT ON db.* FROM test_user_01074 +--cleanup +--revoke 2 +GRANT SELECT ON *.* TO test_user_01074 +--cleanup +--grant option 1 +GRANT SELECT ON *.* TO test_user_01074 WITH GRANT OPTION +REVOKE GRANT OPTION FOR SELECT(col1) ON db.table FROM test_user_01074 +┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ +┃ user_name  ┃ role_name ┃ access_type ┃ database ┃ table ┃ column ┃ is_partial_revoke ┃ grant_option ┃ +┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ +│ test_user_01074 │ ᴺᵁᴸᴸ │ SELECT │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ 1 │ +├─────────────────┼───────────┼─────────────┼──────────┼───────┼────────┼───────────────────┼──────────────┤ +│ test_user_01074 │ ᴺᵁᴸᴸ │ SELECT │ db │ table │ col1 │ 1 │ 1 │ +└─────────────────┴───────────┴─────────────┴──────────┴───────┴────────┴───────────────────┴──────────────┘ +--cleanup +--grant option 2 +GRANT SELECT ON *.* TO test_user_01074 WITH GRANT OPTION +REVOKE SELECT(col1) ON db.table FROM test_user_01074 +--cleanup +--grant option 3 +GRANT SELECT ON *.* TO test_user_01074 +--cleanup +--grant option 4 +GRANT SELECT ON *.* TO test_user_01074 +GRANT SELECT ON db.* TO test_user_01074 WITH GRANT OPTION diff --git a/tests/queries/0_stateless/01074_partial_revokes.sql b/tests/queries/0_stateless/01074_partial_revokes.sql index 4406341cc4f..8c92b9511c7 100644 --- a/tests/queries/0_stateless/01074_partial_revokes.sql +++ b/tests/queries/0_stateless/01074_partial_revokes.sql @@ -1,8 +1,106 @@ DROP USER IF EXISTS test_user_01074; CREATE USER test_user_01074; +SELECT '--simple 1'; GRANT SELECT ON *.* TO test_user_01074; REVOKE SELECT ON db.* FROM test_user_01074; SHOW GRANTS FOR test_user_01074; +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--simple 2'; +GRANT SELECT ON db.* TO test_user_01074; +REVOKE SELECT ON db.table FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--simple 3'; +GRANT SELECT ON db.table TO test_user_01074; +REVOKE SELECT(col1) ON db.table FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--complex 1'; +GRANT SELECT ON *.* TO test_user_01074; +REVOKE SELECT(col1, col2) ON db.table FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--complex 2'; +GRANT SELECT ON *.* TO test_user_01074; +REVOKE SELECT ON db.* FROM test_user_01074; +GRANT SELECT ON db.table TO test_user_01074; +REVOKE SELECT(col1) ON db.table FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; +SELECT * FROM system.grants WHERE user_name = 'test_user_01074' format Pretty; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--revoke 1'; +GRANT SELECT ON *.* TO test_user_01074; +REVOKE SELECT ON db.table FROM test_user_01074; +REVOKE SELECT ON db.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--revoke 2'; +GRANT SELECT ON *.* TO test_user_01074; +REVOKE SELECT ON db.table FROM test_user_01074; +GRANT SELECT ON db.* TO test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--grant option 1'; +GRANT SELECT ON *.* TO test_user_01074 WITH GRANT OPTION; +REVOKE GRANT OPTION FOR SELECT(col1) ON db.table FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; +SELECT * FROM system.grants WHERE user_name = 'test_user_01074' format Pretty; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--grant option 2'; +GRANT SELECT ON *.* TO test_user_01074 WITH GRANT OPTION; +REVOKE SELECT(col1) ON db.table FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--grant option 3'; +GRANT SELECT ON *.* TO test_user_01074; +REVOKE GRANT OPTION FOR SELECT(col1) ON db.table FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--cleanup'; +REVOKE SELECT ON *.* FROM test_user_01074; +SHOW GRANTS FOR test_user_01074; + +SELECT '--grant option 4'; +GRANT SELECT ON *.* TO test_user_01074; +REVOKE SELECT ON db.table FROM test_user_01074; +GRANT SELECT ON db.* TO test_user_01074 WITH GRANT OPTION; +SHOW GRANTS FOR test_user_01074; + DROP USER test_user_01074; From af3716f305b457301f0eca26d5053e403272b70e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 30 Jun 2020 19:04:15 +0300 Subject: [PATCH 131/330] Changelog for 20.3.11, 12 --- CHANGELOG.md | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a4666f08bb..606d394ff1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -323,6 +323,81 @@ ## ClickHouse release v20.3 +### ClickHouse release v20.3.12.112-lts 2020-06-25 + +#### Bug Fix + +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([tavplubix](https://github.com/tavplubix)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error which leads to an incorrect state of `system.mutations`. It may show that whole mutation is already done but the server still has `MUTATE_PART` tasks in the replication queue and tries to execute them. This fixes [#11611](https://github.com/ClickHouse/ClickHouse/issues/11611). [#11681](https://github.com/ClickHouse/ClickHouse/pull/11681) ([alesapin](https://github.com/alesapin)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* All queries in HTTP session have had the same query_id. It is fixed. [#11578](https://github.com/ClickHouse/ClickHouse/pull/11578) ([tavplubix](https://github.com/tavplubix)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* Fix shard_num/replica_num for (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong results of distributed queries when alias could override qualified column name. Fixes [#9672](https://github.com/ClickHouse/ClickHouse/issues/9672) [#9714](https://github.com/ClickHouse/ClickHouse/issues/9714). [#9972](https://github.com/ClickHouse/ClickHouse/pull/9972) ([Artem Zuikov](https://github.com/4ertus2)). + + +### ClickHouse release v20.3.11.97-lts 2020-06-10 + +#### New Feature + +* Now ClickHouse controls timeouts of dictionary sources on its side. Two new settings added to cache dictionary configuration: `strict_max_lifetime_seconds`, which is `max_lifetime` by default and `query_wait_timeout_milliseconds`, which is one minute by default. The first settings is also useful with `allow_read_expired_keys` settings (to forbid reading very expired keys). [#10337](https://github.com/ClickHouse/ClickHouse/pull/10337) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix + +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Fix crash in JOIN over LowCarinality(T) and Nullable(T). [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Better errors for `joinGet()` functions. [#11389](https://github.com/ClickHouse/ClickHouse/pull/11389) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove redundant lock during parts send in ReplicatedMergeTree. [#11354](https://github.com/ClickHouse/ClickHouse/pull/11354) ([alesapin](https://github.com/alesapin)). +* Fix support for `\G` (vertical output) in clickhouse-client in multiline mode. This closes [#9933](https://github.com/ClickHouse/ClickHouse/issues/9933). [#11350](https://github.com/ClickHouse/ClickHouse/pull/11350) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash in direct selects from StorageJoin (without JOIN) and wrong nullability. [#11340](https://github.com/ClickHouse/ClickHouse/pull/11340) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now merges stopped before change metadata in `ALTER` queries. [#11335](https://github.com/ClickHouse/ClickHouse/pull/11335) ([alesapin](https://github.com/alesapin)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed S3 globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Fix excessive reserving of threads for simple queries (optimization for reducing the number of threads, which was partly broken after changes in pipeline). [#11114](https://github.com/ClickHouse/ClickHouse/pull/11114) ([Azat Khuzhin](https://github.com/azat)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). +* Introduce commit retry logic to decrease the possibility of getting duplicates from Kafka in rare cases when offset commit was failed. [#9884](https://github.com/ClickHouse/ClickHouse/pull/9884) ([filimonov](https://github.com/filimonov)). + +#### Performance Improvement + +* Get dictionary and check access rights only once per each call of any function reading external dictionaries. [#10928](https://github.com/ClickHouse/ClickHouse/pull/10928) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement + +* Fix several flaky integration tests. [#11355](https://github.com/ClickHouse/ClickHouse/pull/11355) ([alesapin](https://github.com/alesapin)). + ### ClickHouse release v20.3.10.75-lts 2020-05-23 #### Bug Fix From 444a869d7820746fcf44b073d47931e22a9dec87 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 30 Jun 2020 19:07:50 +0300 Subject: [PATCH 132/330] simple changelog script --- utils/simple-backport/changelog.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/simple-backport/changelog.sh b/utils/simple-backport/changelog.sh index 75a54a50b92..4b898f4c75c 100755 --- a/utils/simple-backport/changelog.sh +++ b/utils/simple-backport/changelog.sh @@ -1,6 +1,8 @@ #!/bin/bash set -e +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + from="$1" to="$2" log_command=(git log "$from..$to" --first-parent) @@ -82,5 +84,5 @@ done echo "### ClickHouse release $to FIXME as compared to $from " > changelog.md -./format-changelog.py changelog-prs-filtered.txt >> changelog.md +"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md cat changelog.md From 67d03b6b758c18c4ad5ce1a39fc15deabb322128 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 30 Jun 2020 19:35:07 +0300 Subject: [PATCH 133/330] bump CI From 358570050676196f89a5e99813de67330fe3b679 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jun 2020 19:41:43 +0300 Subject: [PATCH 134/330] Fix tests. --- src/Interpreters/ExpressionAnalyzer.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d8a330de9e1..1bc7abc9cc7 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -637,7 +637,7 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( step.can_remove_required_output.push_back(true); auto filter_type = step.actions->getSampleBlock().getByName(prewhere_column_name).type; - if (!isInteger(filter_type)) + if (!isInteger(removeNullable(filter_type))) throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -728,7 +728,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions); auto filter_type = step.actions->getSampleBlock().getByName(where_column_name).type; - if (!isInteger(filter_type)) + if (!isInteger(removeNullable(filter_type))) throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 2f9230a4067..cc7d43f6003 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -318,7 +319,7 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P prewhere_info->prewhere_actions->execute(block); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); - if (!isInteger(prewhere_column.type)) + if (!isInteger(removeNullable(prewhere_column.type))) throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(), ErrorCodes::LOGICAL_ERROR); From b1d2d55cba7bbb357a5cc5dd43c2248cf4c1cd35 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Tue, 30 Jun 2020 17:43:45 +0100 Subject: [PATCH 135/330] Add explicit test for a case where AST hashes collide for different prepared sets --- tests/queries/0_stateless/00612_pk_in_tuple.reference | 1 + tests/queries/0_stateless/00612_pk_in_tuple.sql | 1 + tests/queries/0_stateless/00612_pk_in_tuple_perf.sh | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00612_pk_in_tuple.reference b/tests/queries/0_stateless/00612_pk_in_tuple.reference index 74e8e642f20..294ab7491c1 100644 --- a/tests/queries/0_stateless/00612_pk_in_tuple.reference +++ b/tests/queries/0_stateless/00612_pk_in_tuple.reference @@ -35,3 +35,4 @@ max(key) from tab_00612 any left join (select key, arrayJoin(n.x) as val from ta max(key) from tab_00612 any left join (select key, arrayJoin(n.x) as val from tab_00612) using key where (key, val) in ((1, 1), (2, 2)) 2 1 +1 diff --git a/tests/queries/0_stateless/00612_pk_in_tuple.sql b/tests/queries/0_stateless/00612_pk_in_tuple.sql index 499474d1b0a..ac563006cda 100644 --- a/tests/queries/0_stateless/00612_pk_in_tuple.sql +++ b/tests/queries/0_stateless/00612_pk_in_tuple.sql @@ -42,5 +42,6 @@ drop table if exists tab_00612; CREATE TABLE tab_00612 (key1 Int32, id1 Int64, c1 Int64) ENGINE = MergeTree PARTITION BY id1 ORDER BY (key1) ; insert into tab_00612 values ( -1, 1, 0 ); SELECT count(*) FROM tab_00612 PREWHERE id1 IN (1); +SELECT count() FROM tab_00612 WHERE (key1, id1) IN (-1, 1) AND (key1, 1) IN (-1, 1); drop table tab_00612; diff --git a/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh index 5a77dde34a8..35a76a2d365 100755 --- a/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh +++ b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh @@ -21,4 +21,4 @@ EOF query="SELECT count() FROM pk_in_tuple_perf WHERE (v, u) IN ((2, 10), (2, 20))" $CLICKHOUSE_CLIENT --query "$query" -$CLICKHOUSE_CLIENT --query "$query FORMAT JSON" | grep "rows_read" \ No newline at end of file +$CLICKHOUSE_CLIENT --query "$query FORMAT JSON" | grep "rows_read" From e48b6b8f84426e856f2dcbb30dd77a68f39a5c56 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 30 Jun 2020 21:22:07 +0300 Subject: [PATCH 136/330] [blog] add RSS feed (#12064) * [blog] add rss feed * better title --- docs/tools/blog.py | 10 ++++++++- docs/tools/mdx_clickhouse.py | 9 ++------ docs/tools/util.py | 34 ++++++++++++++++++++++++++++++ docs/tools/website.py | 20 ++---------------- website/templates/blog/rss.xml | 23 ++++++++++++++++++++ website/templates/common_meta.html | 6 +++++- 6 files changed, 75 insertions(+), 27 deletions(-) create mode 100644 website/templates/blog/rss.xml diff --git a/docs/tools/blog.py b/docs/tools/blog.py index f5415bec608..c3261f61d4d 100644 --- a/docs/tools/blog.py +++ b/docs/tools/blog.py @@ -80,7 +80,8 @@ def build_for_lang(lang, args): includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), is_amp=False, is_blog=True, - post_meta=post_meta + post_meta=post_meta, + today=datetime.date.today().isoformat() ) ) @@ -89,6 +90,13 @@ def build_for_lang(lang, args): redirects.build_blog_redirects(args) + env = util.init_jinja2_env(args) + with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f: + rss_template_string = f.read().decode('utf-8').strip() + rss_template = env.from_string(rss_template_string) + with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f: + f.write(rss_template.render({'config': raw_config})) + # TODO: AMP for blog # if not args.skip_amp: # amp.build_amp(lang, args, cfg) diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index 0c431fec106..80ecf829341 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -14,9 +14,6 @@ import macros.plugin import slugify as slugify_impl -import amp -import website - def slugify(value, separator): return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) @@ -119,6 +116,7 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): ]) def on_env(self, env, config, files): + import util env.add_extension('jinja2.ext.i18n') dirname = os.path.join(config.data['theme'].dirs[0], 'locale') lang = config.data['theme']['language'] @@ -126,10 +124,7 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): get_translations(dirname, lang), newstyle=True ) - chunk_size = 10240 - env.filters['chunks'] = lambda line: [line[i:i+chunk_size] for i in range(0, len(line), chunk_size)] - env.filters['html_to_amp'] = amp.html_to_amp - env.filters['adjust_markdown_html'] = website.adjust_markdown_html + util.init_jinja2_filters(env) return env def render(self, markdown): diff --git a/docs/tools/util.py b/docs/tools/util.py index a5a751020f0..b840dc1168a 100644 --- a/docs/tools/util.py +++ b/docs/tools/util.py @@ -1,5 +1,6 @@ import collections import contextlib +import datetime import multiprocessing import os import shutil @@ -8,6 +9,7 @@ import socket import tempfile import threading +import jinja2 import yaml @@ -111,3 +113,35 @@ def represent_ordereddict(dumper, data): yaml.add_representer(collections.OrderedDict, represent_ordereddict) + + +def init_jinja2_filters(env): + import amp + import website + chunk_size = 10240 + env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)] + env.filters['html_to_amp'] = amp.html_to_amp + env.filters['adjust_markdown_html'] = website.adjust_markdown_html + env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT') + + +def init_jinja2_env(args): + import mdx_clickhouse + env = jinja2.Environment( + loader=jinja2.FileSystemLoader([ + args.website_dir, + os.path.join(args.docs_dir, '_includes') + ]), + extensions=[ + 'jinja2.ext.i18n', + 'jinja2_highlight.HighlightExtension' + ] + ) + env.extend(jinja2_highlight_cssclass='syntax p-3 my-3') + translations_dir = os.path.join(args.website_dir, 'locale') + env.install_gettext_translations( + mdx_clickhouse.get_translations(translations_dir, 'en'), + newstyle=True + ) + init_jinja2_filters(env) + return env diff --git a/docs/tools/website.py b/docs/tools/website.py index d69371665ce..97d699b9916 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -11,10 +11,9 @@ import bs4 import closure import cssmin import htmlmin -import jinja2 import jsmin -import mdx_clickhouse +import util def handle_iframe(iframe, soup): @@ -121,22 +120,7 @@ def minify_html(content): def build_website(args): logging.info('Building website') - env = jinja2.Environment( - loader=jinja2.FileSystemLoader([ - args.website_dir, - os.path.join(args.docs_dir, '_includes') - ]), - extensions=[ - 'jinja2.ext.i18n', - 'jinja2_highlight.HighlightExtension' - ] - ) - env.extend(jinja2_highlight_cssclass='syntax p-3 my-3') - translations_dir = os.path.join(args.website_dir, 'locale') - env.install_gettext_translations( - mdx_clickhouse.get_translations(translations_dir, 'en'), - newstyle=True - ) + env = util.init_jinja2_env(args) shutil.copytree( args.website_dir, diff --git a/website/templates/blog/rss.xml b/website/templates/blog/rss.xml new file mode 100644 index 00000000000..1f584b522aa --- /dev/null +++ b/website/templates/blog/rss.xml @@ -0,0 +1,23 @@ + + + {{ config.site_name }} + {{ config.site_url }} + + + {{ config.extra.today|to_rfc882 }} + + {% for post in config.extra.post_meta.values() %} + {% set url = config.extra.website_url + post['url'] %} + + {{ post['title'] }} + ]]> + {{ post['date']|to_rfc882 }} + {{ url }} + {{ url }} + {# TODO: #} + + {% endfor %} + + diff --git a/website/templates/common_meta.html b/website/templates/common_meta.html index 11a36414cd7..350bcf18f05 100644 --- a/website/templates/common_meta.html +++ b/website/templates/common_meta.html @@ -26,7 +26,7 @@ {% if page and page.meta.tags %} + content="{% for tag in page.meta.tags %}{{tag}}{{ ', ' if not loop.last }}{% endfor %}" /> {% else %} @@ -45,3 +45,7 @@ {% for prefetch_item in prefetch_items %} {% endfor %} + +{% if is_blog %} + +{% endif %} From 594fc9247b6ea96925d0bac9118ff2a008c69822 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 30 Jun 2020 22:38:29 +0300 Subject: [PATCH 137/330] Update CHANGELOG.md Co-authored-by: Ivan Blinkov --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 606d394ff1e..1cb41d335bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -346,7 +346,7 @@ * Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * All queries in HTTP session have had the same query_id. It is fixed. [#11578](https://github.com/ClickHouse/ClickHouse/pull/11578) ([tavplubix](https://github.com/tavplubix)). * Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). -* Fix shard_num/replica_num for (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). +* Fix shard_num/replica_num for `` (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). * Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix wrong results of distributed queries when alias could override qualified column name. Fixes [#9672](https://github.com/ClickHouse/ClickHouse/issues/9672) [#9714](https://github.com/ClickHouse/ClickHouse/issues/9714). [#9972](https://github.com/ClickHouse/ClickHouse/pull/9972) ([Artem Zuikov](https://github.com/4ertus2)). From 4d300abe24dddcd598837c0f9337f593e6f830f1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 30 Jun 2020 23:43:22 +0300 Subject: [PATCH 138/330] bump ci From a541683efa81cd07d2bf6c27cdb65c5a9cc6b82b Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 1 Jul 2020 01:04:30 +0300 Subject: [PATCH 139/330] Moved useless S3 logging to TRACE level. --- src/IO/S3/PocoHTTPClient.cpp | 4 ++-- src/IO/S3Common.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 0dfa80ca107..56632b22071 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -106,7 +106,7 @@ void PocoHTTPClient::MakeRequestInternal( if (request.GetContentBody()) { - LOG_DEBUG(log, "Writing request body."); + LOG_TRACE(log, "Writing request body."); if (attempt > 0) /// rewind content body buffer. { request.GetContentBody()->clear(); @@ -116,7 +116,7 @@ void PocoHTTPClient::MakeRequestInternal( LOG_DEBUG(log, "Written {} bytes to request body", size); } - LOG_DEBUG(log, "Receiving response..."); + LOG_TRACE(log, "Receiving response..."); auto & response_body_stream = session->receiveResponse(poco_response); int status_code = static_cast(poco_response.getStatus()); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 2c75a137222..20ff38150eb 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -31,7 +31,7 @@ const std::pair & convertLogLevel(Aws::U {Aws::Utils::Logging::LogLevel::Error, {DB::LogsLevel::error, Poco::Message::PRIO_ERROR}}, {Aws::Utils::Logging::LogLevel::Warn, {DB::LogsLevel::warning, Poco::Message::PRIO_WARNING}}, {Aws::Utils::Logging::LogLevel::Info, {DB::LogsLevel::information, Poco::Message::PRIO_INFORMATION}}, - {Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG}}, + {Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}}, {Aws::Utils::Logging::LogLevel::Trace, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}}, }; return mapping.at(log_level); From c95d09aed056f10bba531e5f3e44723f951d17de Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Wed, 1 Jul 2020 11:01:47 +0100 Subject: [PATCH 140/330] Add a test to cover non-const tuple elemenets (just in case) --- .../00612_pk_in_tuple_perf.reference | 2 ++ .../0_stateless/00612_pk_in_tuple_perf.sh | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference b/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference index cb18472feb3..5bea74275a1 100644 --- a/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference +++ b/tests/queries/0_stateless/00612_pk_in_tuple_perf.reference @@ -1,2 +1,4 @@ 1 "rows_read": 2, +1 + "rows_read": 2, diff --git a/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh index 35a76a2d365..8f3e89098e6 100755 --- a/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh +++ b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --multiquery < Date: Wed, 1 Jul 2020 13:15:39 +0300 Subject: [PATCH 141/330] Bump From b2df6accadd7bd28f3622d7abd1dd8ab5192a5e4 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 1 Jul 2020 14:34:24 +0200 Subject: [PATCH 142/330] Update query-complexity.md Remove a note about read limits applied on threads level. --- docs/en/operations/settings/query-complexity.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 812056785da..920d77a32dd 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -57,7 +57,6 @@ See also the description of [max\_memory\_usage](#settings_max_memory_usage). ## max\_rows\_to\_read {#max-rows-to-read} The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little. -When running a query in multiple threads, the following restrictions apply to each thread separately. A maximum number of rows that can be read from a table when running a query. From 2f9c52c1df39dc5a0646327fccda305cb1fd4c23 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 1 Jul 2020 14:35:47 +0200 Subject: [PATCH 143/330] Update query-complexity.md --- docs/ru/operations/settings/query-complexity.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index 651f597c4d2..ada015aafc1 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -53,7 +53,6 @@ ## max\_rows\_to\_read {#max-rows-to-read} Следующие ограничения могут проверяться на каждый блок (а не на каждую строку). То есть, ограничения могут быть немного нарушены. -При выполнении запроса в несколько потоков, следующие ограничения действуют в каждом потоке по отдельности. Максимальное количество строчек, которое можно прочитать из таблицы при выполнении запроса. From 3cf01e9e0bfe669261b82f818b4f3e6f3f149f6f Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 1 Jul 2020 14:37:39 +0200 Subject: [PATCH 144/330] Update query-complexity.md --- docs/zh/operations/settings/query-complexity.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/zh/operations/settings/query-complexity.md b/docs/zh/operations/settings/query-complexity.md index 090862903f4..1e87bdf8879 100644 --- a/docs/zh/operations/settings/query-complexity.md +++ b/docs/zh/operations/settings/query-complexity.md @@ -60,7 +60,6 @@ Restrictions on the «maximum amount of something» can take the value 0, which ## max\_rows\_to\_read {#max-rows-to-read} 可以在每个块(而不是每行)上检查以下限制。 也就是说,限制可以打破一点。 -在多个线程中运行查询时,以下限制单独应用于每个线程。 运行查询时可从表中读取的最大行数。 From bdc78a0017b4095940908d9218531e45b153d790 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 1 Jul 2020 14:38:50 +0200 Subject: [PATCH 145/330] Update query-complexity.md --- docs/es/operations/settings/query-complexity.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/es/operations/settings/query-complexity.md b/docs/es/operations/settings/query-complexity.md index 218952dff1f..d65fb03ad13 100644 --- a/docs/es/operations/settings/query-complexity.md +++ b/docs/es/operations/settings/query-complexity.md @@ -59,7 +59,6 @@ Ver también la descripción de [Método de codificación de datos:](#settings_m ## ¿Qué puedes encontrar en Neodigit {#max-rows-to-read} Las siguientes restricciones se pueden verificar en cada bloque (en lugar de en cada fila). Es decir, las restricciones se pueden romper un poco. -Al ejecutar una consulta en varios subprocesos, las siguientes restricciones se aplican a cada subproceso por separado. Un número máximo de filas que se pueden leer de una tabla al ejecutar una consulta. From 03a643e9d3f5b1ffaf827284802409f5b30c96c5 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 1 Jul 2020 16:08:16 +0300 Subject: [PATCH 146/330] DOCS-609: max_server_memory_usage (#11771) * Revolg DOCSUP-1000 add max server memory usage setting (#125) * Add max_server_memory_usage setting, delete max_memory_usage_for_all_queries setting. * Syntax fixed * Apply suggestions from code review Co-authored-by: BayoNet * Doc for the max_server_memory_usage setting. Updates. Co-authored-by: Olga Revyakina Co-authored-by: BayoNet * CLICKHOUSEDOCS-609: Minor fixes. * CLICKHOUSEDOCS-609: Actualized position of the setting. Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> Co-authored-by: Olga Revyakina Co-authored-by: Sergei Shtykov --- .../settings.md | 21 +++++++++++++++++++ .../operations/settings/query-complexity.md | 9 +------- .../settings.md | 19 +++++++++++++++++ .../operations/settings/query-complexity.md | 12 ++--------- 4 files changed, 43 insertions(+), 18 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index f90b418b4a9..f1ffc011776 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -398,6 +398,27 @@ The cache is shared for the server and memory is allocated as needed. The cache 5368709120 ``` + +## max_server_memory_usage {#max_server_memory_usage} + +Limits total RAM usage by the ClickHouse server. You can specify it only for the default profile. + +Possible values: + +- Positive integer. +- 0 — Unlimited. + +Default value: `0`. + +**Additional Info** + +On hosts with low RAM and swap, you possibly need setting `max_server_memory_usage_to_ram_ratio > 1`. + +**See also** + +- [max_memory_usage](../settings/query-complexity.md#settings_max_memory_usage) + + ## max\_concurrent\_queries {#max-concurrent-queries} The maximum number of simultaneously processed requests. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 812056785da..899f236177a 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -36,7 +36,7 @@ Memory usage is not monitored for the states of certain aggregate functions. Memory usage is not fully tracked for states of the aggregate functions `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` from `String` and `Array` arguments. -Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and `max_memory_usage_for_all_queries`. +Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and [max_server_memory_usage](../server-configuration-parameters/settings.md#max_server_memory_usage). ## max\_memory\_usage\_for\_user {#max-memory-usage-for-user} @@ -46,13 +46,6 @@ Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHo See also the description of [max\_memory\_usage](#settings_max_memory_usage). -## max\_memory\_usage\_for\_all\_queries {#max-memory-usage-for-all-queries} - -The maximum amount of RAM to use for running all queries on a single server. - -Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Settings.h#L289). By default, the amount is not restricted (`max_memory_usage_for_all_queries = 0`). - -See also the description of [max\_memory\_usage](#settings_max_memory_usage). ## max\_rows\_to\_read {#max-rows-to-read} diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 5bfedf4c520..2c933d87157 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -372,6 +372,25 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat 100 ``` +## max_server_memory_usage {#max_server_memory_usage} + +Ограничивает объём оперативной памяти, используемой сервером ClickHouse. Настройка может быть задана только для профиля `default`. + +Возможные значения: + +- Положительное целое число. +- 0 — объём используемой памяти не ограничен. + +Значение по умолчанию: `0`. + +**Дополнительная информация** + +На серверах с небольшим объёмом RAM и файла подкачки может потребоваться настройка `max_server_memory_usage_to_ram_ratio > 1`. + +**См. также** + +- [max_memory_usage](../settings/query-complexity.md#settings_max_memory_usage) + ## max\_connections {#max-connections} Максимальное количество входящих соединений. diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index 651f597c4d2..95b5a0adb63 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -1,4 +1,4 @@ -# Ограничения на сложность запроса {#ogranicheniia-na-slozhnost-zaprosa} +# Ограничения на сложность запроса {#restrictions-on-query-complexity} Ограничения на сложность запроса - часть настроек. Используются, чтобы обеспечить более безопасное исполнение запросов из пользовательского интерфейса. @@ -32,7 +32,7 @@ Потребление памяти не полностью учитывается для состояний агрегатных функций `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` от аргументов `String` и `Array`. -Потребление памяти ограничивается также параметрами `max_memory_usage_for_user` и `max_memory_usage_for_all_queries`. +Потребление памяти ограничивается также параметрами `max_memory_usage_for_user` и [max_server_memory_usage](../server-configuration-parameters/settings.md#max_server_memory_usage). ## max\_memory\_usage\_for\_user {#max-memory-usage-for-user} @@ -42,14 +42,6 @@ Смотрите также описание настройки [max\_memory\_usage](#settings_max_memory_usage). -## max\_memory\_usage\_for\_all\_queries {#max-memory-usage-for-all-queries} - -Максимальный возможный объём оперативной памяти для всех запросов на одном сервере. - -Значения по умолчанию определены в файле [Settings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Settings.h#L289). По умолчанию размер не ограничен (`max_memory_usage_for_all_queries = 0`). - -Смотрите также описание настройки [max\_memory\_usage](#settings_max_memory_usage). - ## max\_rows\_to\_read {#max-rows-to-read} Следующие ограничения могут проверяться на каждый блок (а не на каждую строку). То есть, ограничения могут быть немного нарушены. From 64bbccb42e918424a61453c8518ea3239b2edc5d Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Wed, 1 Jul 2020 14:25:14 +0100 Subject: [PATCH 147/330] Add force_primary_key to a pk in tuple test --- tests/queries/0_stateless/00612_pk_in_tuple.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00612_pk_in_tuple.sql b/tests/queries/0_stateless/00612_pk_in_tuple.sql index ac563006cda..081e8a75092 100644 --- a/tests/queries/0_stateless/00612_pk_in_tuple.sql +++ b/tests/queries/0_stateless/00612_pk_in_tuple.sql @@ -42,6 +42,7 @@ drop table if exists tab_00612; CREATE TABLE tab_00612 (key1 Int32, id1 Int64, c1 Int64) ENGINE = MergeTree PARTITION BY id1 ORDER BY (key1) ; insert into tab_00612 values ( -1, 1, 0 ); SELECT count(*) FROM tab_00612 PREWHERE id1 IN (1); -SELECT count() FROM tab_00612 WHERE (key1, id1) IN (-1, 1) AND (key1, 1) IN (-1, 1); + +SELECT count() FROM tab_00612 WHERE (key1, id1) IN (-1, 1) AND (key1, 1) IN (-1, 1) SETTINGS force_primary_key = 1; drop table tab_00612; From 41fed2840383e71a7eff7380daa2f274946558b7 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 1 Jul 2020 16:36:41 +0300 Subject: [PATCH 148/330] DOCS-510: runningAccumulate (#12061) * asiana21-DOCSUP-797 (#117) * docs(runningAccumulate): the function description is added * docs(runningAccumulate): the function description is modified * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * docs(runningAccumulate): some changes * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * docs(runningAccumulate): added ru translation Co-authored-by: asiana21 Co-authored-by: BayoNet * CLICKHOUSEDOCS-510: Minor fix. * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: Ivan Blinkov * CLICKHOUSEDOCS-510: Fixed links. Co-authored-by: AsiaKorushkina <43650329+AsiaKorushkina@users.noreply.github.com> Co-authored-by: asiana21 Co-authored-by: Sergei Shtykov Co-authored-by: Ivan Blinkov --- docs/en/development/architecture.md | 2 +- .../aggregate-functions/combinators.md | 2 +- .../functions/other-functions.md | 107 +++++++++++++++++- .../aggregate-functions/combinators.md | 2 +- .../functions/other-functions.md | 107 +++++++++++++++++- 5 files changed, 210 insertions(+), 10 deletions(-) diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 4c98ec12541..c2f7a86fc9f 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -120,7 +120,7 @@ There are ordinary functions and aggregate functions. For aggregate functions, s Ordinary functions don’t change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`’s of data to implement vectorized query execution. -There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#function-runningaccumulate), that exploit block processing and violate the independence of rows. +There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulatexploit block processing and violate the independence of rows. ClickHouse has strong typing, so there’s no implicit type conversion. If a function doesn't support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function. diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 6d70637236b..554969b80a3 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -33,7 +33,7 @@ To work with these states, use: - [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. - [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function. -- [runningAccumulate](../../sql-reference/functions/other-functions.md#function-runningaccumulate) function. +- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function. - [-Merge](#aggregate_functions_combinators-merge) combinator. - [-MergeState](#aggregate_functions_combinators-mergestate) combinator. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index e979aff2ee7..55ddc5c029c 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1054,11 +1054,110 @@ Result: Takes state of aggregate function. Returns result of aggregation (finalized state). -## runningAccumulate {#function-runningaccumulate} +## runningAccumulate {#runningaccumulate} -Takes the states of the aggregate function and returns a column with values, are the result of the accumulation of these states for a set of block lines, from the first to the current line. -For example, takes state of aggregate function (example runningAccumulate(uniqState(UserID))), and for each row of block, return result of aggregate function on merge of states of all previous rows and current row. -So, result of function depends on partition of data to blocks and on order of data in block. +Accumulates states of an aggregate function for each row of a data block. + +!!! warning "Warning" + The state is reset for each new data block. + +**Syntax** + +```sql +runningAccumulate(agg_state[, grouping]); +``` + +**Parameters** + +- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. + +**Returned value** + +- Each resulting row contains a result of the aggregate function, accumulated for all the input rows from 0 to the current position. `runningAccumulate` resets states for each new data block or when the `grouping` value changes. + +Type depends on the aggregate function used. + +**Examples** + +Consider how you can use `runningAccumulate` to find the cumulative sum of numbers without and with grouping. + +Query: + +```sql +SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k) AS sum_k FROM numbers(10) GROUP BY k ORDER BY k); +``` + +Result: + +```text +┌─k─┬─res─┐ +│ 0 │ 0 │ +│ 1 │ 1 │ +│ 2 │ 3 │ +│ 3 │ 6 │ +│ 4 │ 10 │ +│ 5 │ 15 │ +│ 6 │ 21 │ +│ 7 │ 28 │ +│ 8 │ 36 │ +│ 9 │ 45 │ +└───┴─────┘ +``` + +The subquery generates `sumState` for every number from `0` to `9`. `sumState` returns the state of the [sum](../aggregate-functions/reference/sum.md) function that contains the sum of a single number. + +The whole query does the following: + +1. For the first row, `runningAccumulate` takes `sumState(0)` and returns `0`. +2. For the second row, the function merges `sumState(0)` and `sumState(1)` resulting in `sumState(0 + 1)`, and returns `1` as a result. +3. For the third row, the function merges `sumState(0 + 1)` and `sumState(2)` resulting in `sumState(0 + 1 + 2)`, and returns `3` as a result. +4. The actions are repeated until the block ends. + +The following example shows the `groupping` parameter usage: + +Query: + +```sql +SELECT + grouping, + item, + runningAccumulate(state, grouping) AS res +FROM +( + SELECT + toInt8(number / 4) AS grouping, + number AS item, + sumState(number) AS state + FROM numbers(15) + GROUP BY item + ORDER BY item ASC +); +``` + +Result: + +```text +┌─grouping─┬─item─┬─res─┐ +│ 0 │ 0 │ 0 │ +│ 0 │ 1 │ 1 │ +│ 0 │ 2 │ 3 │ +│ 0 │ 3 │ 6 │ +│ 1 │ 4 │ 4 │ +│ 1 │ 5 │ 9 │ +│ 1 │ 6 │ 15 │ +│ 1 │ 7 │ 22 │ +│ 2 │ 8 │ 8 │ +│ 2 │ 9 │ 17 │ +│ 2 │ 10 │ 27 │ +│ 2 │ 11 │ 38 │ +│ 3 │ 12 │ 12 │ +│ 3 │ 13 │ 25 │ +│ 3 │ 14 │ 39 │ +└──────────┴──────┴─────┘ +``` + +As you can see, `runningAccumulate` merges states for each group of rows separately. ## joinGet {#joinget} diff --git a/docs/ru/sql-reference/aggregate-functions/combinators.md b/docs/ru/sql-reference/aggregate-functions/combinators.md index 95264976857..ec325d62b02 100644 --- a/docs/ru/sql-reference/aggregate-functions/combinators.md +++ b/docs/ru/sql-reference/aggregate-functions/combinators.md @@ -29,7 +29,7 @@ - Движок таблиц [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md). - Функция [finalizeAggregation](../../sql-reference/aggregate-functions/combinators.md#function-finalizeaggregation). -- Функция [runningAccumulate](../../sql-reference/aggregate-functions/combinators.md#function-runningaccumulate). +- Функция [runningAccumulate](../../sql-reference/aggregate-functions/combinators.md#runningaccumulate). - Комбинатор [-Merge](#aggregate_functions_combinators-merge). - Комбинатор [-MergeState](#aggregate_functions_combinators-mergestate). diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 0a78de66a2d..c6648963dad 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1,4 +1,4 @@ -# Прочие функции {#prochie-funktsii} +# Прочие функции {#other-functions} ## hostName() {#hostname} @@ -1036,9 +1036,110 @@ SELECT formatReadableSize(filesystemCapacity()) AS "Capacity", toTypeName(filesy Принимает состояние агрегатной функции. Возвращает результат агрегирования. -## runningAccumulate {#function-runningaccumulate} +## runningAccumulate {#runningaccumulate} -Принимает на вход состояния агрегатной функции и возвращает столбец со значениями, которые представляют собой результат мёржа этих состояний для выборки строк из блока от первой до текущей строки. Например, принимает состояние агрегатной функции (например, `runningAccumulate(uniqState(UserID))`), и для каждой строки блока возвращает результат агрегатной функции после мёржа состояний функции для всех предыдущих строк и текущей. Таким образом, результат зависит от разбиения данных по блокам и от порядка данных в блоке. +Накапливает состояния агрегатной функции для каждой строки блока данных. + +!!! warning "Warning" + Функция обнуляет состояние для каждого нового блока. + +**Синтаксис** + +```sql +runningAccumulate(agg_state[, grouping]); +``` + +**Параметры** + +- `agg_state` — Состояние агрегатной функции. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Ключ группировки. Опциональный параметр. Состояние функции обнуляется, если значение `grouping` меняется. Параметр может быть любого [поддерживаемого типа данных](../../sql-reference/data-types/index.md), для которого определен оператор равенства. + +**Возвращаемое значение** + +- Каждая результирующая строка содержит результат агрегатной функции, накопленный для всех входных строк от 0 до текущей позиции. `runningAccumulate` обнуляет состояния для каждого нового блока данных или при изменении значения `grouping`. + +Тип зависит от используемой агрегатной функции. + +**Примеры** + +Рассмотрим примеры использования `runningAccumulate` для нахождения кумулятивной суммы чисел без и с группировкой. + +Запрос: + +```sql +SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k) AS sum_k FROM numbers(10) GROUP BY k ORDER BY k); +``` + +Результат: + +```text +┌─k─┬─res─┐ +│ 0 │ 0 │ +│ 1 │ 1 │ +│ 2 │ 3 │ +│ 3 │ 6 │ +│ 4 │ 10 │ +│ 5 │ 15 │ +│ 6 │ 21 │ +│ 7 │ 28 │ +│ 8 │ 36 │ +│ 9 │ 45 │ +└───┴─────┘ +``` + +Подзапрос формирует `sumState` для каждого числа от `0` до `9`. `sumState` возвращает состояние функции [sum](../../sql-reference/aggregate-functions/reference.md#agg_function-sum), содержащее сумму одного числа. + +Весь запрос делает следующее: + +1. Для первой строки `runningAccumulate` берет `sumState(0)` и возвращает `0`. +2. Для второй строки функция объединяет `sumState (0)` и `sumState (1)`, что приводит к `sumState (0 + 1)`, и возвращает в результате `1`. +3. Для третьей строки функция объединяет `sumState (0 + 1)` и `sumState (2)`, что приводит к `sumState (0 + 1 + 2)`, и в результате возвращает `3`. +4. Действия повторяются до тех пор, пока не закончится блок. + +В следующем примере показано использование параметра `grouping`: + +Запрос: + +```sql +SELECT + grouping, + item, + runningAccumulate(state, grouping) AS res +FROM +( + SELECT + toInt8(number / 4) AS grouping, + number AS item, + sumState(number) AS state + FROM numbers(15) + GROUP BY item + ORDER BY item ASC +); +``` + +Результат: + +```text +┌─grouping─┬─item─┬─res─┐ +│ 0 │ 0 │ 0 │ +│ 0 │ 1 │ 1 │ +│ 0 │ 2 │ 3 │ +│ 0 │ 3 │ 6 │ +│ 1 │ 4 │ 4 │ +│ 1 │ 5 │ 9 │ +│ 1 │ 6 │ 15 │ +│ 1 │ 7 │ 22 │ +│ 2 │ 8 │ 8 │ +│ 2 │ 9 │ 17 │ +│ 2 │ 10 │ 27 │ +│ 2 │ 11 │ 38 │ +│ 3 │ 12 │ 12 │ +│ 3 │ 13 │ 25 │ +│ 3 │ 14 │ 39 │ +└──────────┴──────┴─────┘ +``` + +Как вы можете видеть, `runningAccumulate` объединяет состояния для каждой группы строк отдельно. ## joinGet {#joinget} From f8ceca6942a201fa547b2db329ce9ca891a687c8 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Wed, 1 Jul 2020 15:04:08 +0100 Subject: [PATCH 149/330] Remove const specifier to allow auto-move (clangtidy) --- src/Parsers/ASTFunction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index effc9a6cea9..f44eba30ee3 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -32,7 +32,7 @@ protected: template std::shared_ptr makeASTFunction(const String & name, Args &&... args) { - const auto function = std::make_shared(); + auto function = std::make_shared(); function->name = name; function->arguments = std::make_shared(); From 3854ce6d8467ca29b7261f9420e25d8551a19bd3 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Wed, 1 Jul 2020 15:05:54 +0100 Subject: [PATCH 150/330] Rewrite Set lookup to make it more readable --- src/Interpreters/Set.cpp | 7 ++++- src/Interpreters/Set.h | 1 + src/Storages/MergeTree/KeyCondition.cpp | 35 +++++++++++-------------- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 6f3031d5e7d..f331f3cecb3 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -441,9 +441,14 @@ void Set::checkColumnsNumber(size_t num_key_columns) const } } +bool Set::areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const +{ + return removeNullable(recursiveRemoveLowCardinality(data_types[set_type_idx]))->equals(*removeNullable(recursiveRemoveLowCardinality(other_type))); +} + void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const { - if (!removeNullable(recursiveRemoveLowCardinality(data_types[set_type_idx]))->equals(*removeNullable(recursiveRemoveLowCardinality(other_type)))) + if (!this->areTypesEqual(set_type_idx, other_type)) throw Exception("Types of column " + toString(set_type_idx + 1) + " in section IN don't match: " + other_type->getName() + " on the left, " + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH); diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index a4c8fd59245..933bace5e45 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -74,6 +74,7 @@ public: Columns getSetElements() const { return { set_elements.begin(), set_elements.end() }; } void checkColumnsNumber(size_t num_key_columns) const; + bool areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const; void checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const; private: diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 4db931b35c3..d85a44a2f6a 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -617,13 +617,10 @@ bool KeyCondition::tryPrepareSetIndex( const ASTPtr & right_arg = args[1]; - PreparedSetKey set_key; SetPtr prepared_set; if (right_arg->as() || right_arg->as()) { - set_key = PreparedSetKey::forSubquery(*right_arg); - - auto set_it = prepared_sets.find(set_key); + auto set_it = prepared_sets.find(PreparedSetKey::forSubquery(*right_arg)); if (set_it == prepared_sets.end()) return false; @@ -631,25 +628,23 @@ bool KeyCondition::tryPrepareSetIndex( } else { + /// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information + /// about types in left argument of the IN operator. Instead, we manually iterate through all the sets + /// and find the one for the right arg based on the AST structure (getTreeHash), after that we check + /// that the types it was prepared with are compatible with the types of the primary key. + auto set_ast_hash = right_arg->getTreeHash(); auto set_it = std::find_if( - prepared_sets.begin(), - prepared_sets.end(), - [&](const auto &e) + prepared_sets.begin(), prepared_sets.end(), + [&](const auto & candidate_entry) { - if (e.first.ast_hash == right_arg->getTreeHash()) - { - for (size_t i = 0; i < data_types.size(); i++) - { - if (!recursiveRemoveLowCardinality(data_types[i])->equals(*e.first.types[indexes_mapping[i].tuple_index])) - { - return false; - } - } + if (candidate_entry.first.ast_hash != set_ast_hash) + return false; - return true; - } + for (size_t i = 0; i < indexes_mapping.size(); ++i) + if (!candidate_entry.second->areTypesEqual(indexes_mapping[i].tuple_index, data_types[i])) + return false; - return false; + return true; }); if (set_it == prepared_sets.end()) return false; @@ -663,7 +658,7 @@ bool KeyCondition::tryPrepareSetIndex( prepared_set->checkColumnsNumber(left_args_count); for (size_t i = 0; i < indexes_mapping.size(); ++i) - prepared_set->checkTypesEqual(indexes_mapping[i].tuple_index, removeLowCardinality(data_types[i])); + prepared_set->checkTypesEqual(indexes_mapping[i].tuple_index, data_types[i]); out.set_index = std::make_shared(prepared_set->getSetElements(), std::move(indexes_mapping)); From f0e715ade13be05e14939e708c3ecf32a80348e9 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Wed, 1 Jul 2020 22:08:01 +0800 Subject: [PATCH 151/330] [docs] Sync zh/development/build-osx.md from EN (#12071) --- docs/zh/development/build-osx.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/zh/development/build-osx.md b/docs/zh/development/build-osx.md index 2fc68f6d2fb..24923f75207 100644 --- a/docs/zh/development/build-osx.md +++ b/docs/zh/development/build-osx.md @@ -6,13 +6,13 @@ ClickHouse 支持在 Mac OS X 10.12 版本中编译。若您在用更早的操 ## 安装 Homebrew {#an-zhuang-homebrew} ``` bash -/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +$ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" ``` ## 安装编译器,工具库 {#an-zhuang-bian-yi-qi-gong-ju-ku} ``` bash -brew install cmake ninja gcc icu4c mariadb-connector-c openssl libtool gettext +$ brew install cmake ninja libtool gettext ``` ## 拉取 ClickHouse 源码 {#la-qu-clickhouse-yuan-ma} @@ -27,11 +27,11 @@ cd ClickHouse ## 编译 ClickHouse {#bian-yi-clickhouse} ``` bash -mkdir build -cd build -cmake .. -DCMAKE_CXX_COMPILER=`which g++-8` -DCMAKE_C_COMPILER=`which gcc-8` -ninja -cd .. +$ mkdir build +$ cd build +$ cmake .. -DCMAKE_CXX_COMPILER=`which clang++` -DCMAKE_C_COMPILER=`which clang` +$ ninja +$ cd .. ``` ## 注意事项 {#zhu-yi-shi-xiang} From 9ed5c6d9250c4a819f0d476d7bd3001f4c269d24 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 1 Jul 2020 22:58:52 +0800 Subject: [PATCH 152/330] ISSUES-4006 support first for ALTER ADD|MODIFY COLUMN --- src/Parsers/ASTAlterQuery.cpp | 13 +++++- src/Parsers/ASTAlterQuery.h | 4 +- src/Parsers/ParserAlterQuery.cpp | 13 +++++- src/Storages/AlterCommands.cpp | 9 ++++- src/Storages/AlterCommands.h | 5 ++- src/Storages/ColumnsDescription.cpp | 38 +++++++++++++++++- src/Storages/ColumnsDescription.h | 14 ++++++- .../01355_alter_column_with_order.reference | 40 +++++++++++++++++++ .../01355_alter_column_with_order.sql | 25 ++++++++++++ 9 files changed, 151 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/01355_alter_column_with_order.reference create mode 100644 tests/queries/0_stateless/01355_alter_column_with_order.sql diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 1309037ec01..8b23302a05c 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -75,8 +75,9 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); - /// AFTER - if (column) + if (first) + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); + else if (column) /// AFTER { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); column->formatImpl(settings, state, frame); @@ -97,6 +98,14 @@ void ASTAlterCommand::formatImpl( { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); + + if (first) + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); + else if (column) /// AFTER + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + } } else if (type == ASTAlterCommand::COMMENT_COLUMN) { diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index a9ae06863a9..f2dd997bd1f 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -68,7 +68,7 @@ public: */ ASTPtr col_decl; - /** The ADD COLUMN query here optionally stores the name of the column following AFTER + /** The ADD COLUMN and MODIFY COLUMN query here optionally stores the name of the column following AFTER * The DROP query stores the column name for deletion here * Also used for RENAME COLUMN. */ @@ -136,6 +136,8 @@ public: bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN + bool first = false; /// option for ADD_COLUMN, MODIFY_COLUMN + DataDestinationType move_destination_type; /// option for MOVE PART/PARTITION String move_destination_name; /// option for MOVE PART/PARTITION diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index f90d010e9de..3f22aff9cf5 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -63,6 +63,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_freeze("FREEZE"); ParserKeyword s_partition("PARTITION"); + ParserKeyword s_first("FIRST"); ParserKeyword s_after("AFTER"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserKeyword s_if_exists("IF EXISTS"); @@ -115,7 +116,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_col_decl.parse(pos, command->col_decl, expected)) return false; - if (s_after.ignore(pos, expected)) + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) { if (!parser_name.parse(pos, command->column, expected)) return false; @@ -429,6 +432,14 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) return false; + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + } + command->type = ASTAlterCommand::MODIFY_COLUMN; } else if (s_modify_order_by.ignore(pos, expected)) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 5d892ca07d4..fc72effca9a 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -83,6 +83,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.ttl) command.ttl = ast_col_decl.ttl; + command.first = command_ast->first; command.if_not_exists = command_ast->if_not_exists; return command; @@ -133,6 +134,10 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.codec) command.codec = compression_codec_factory.get(ast_col_decl.codec, command.data_type, sanity_check_compression_codecs); + if (command_ast->column) + command.after_column = getIdentifierName(command_ast->column); + + command.first = command_ast->first; command.if_exists = command_ast->if_exists; return command; @@ -269,7 +274,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con column.codec = codec; column.ttl = ttl; - metadata.columns.add(column, after_column); + metadata.columns.add(column, after_column, first); /// Slow, because each time a list is copied metadata.columns.flattenNested(); @@ -282,7 +287,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con } else if (type == MODIFY_COLUMN) { - metadata.columns.modify(column_name, [&](ColumnDescription & column) + metadata.columns.modify(column_name, after_column, first, [&](ColumnDescription & column) { if (codec) { diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 82e438f6a45..1a80957e875 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -54,9 +54,12 @@ struct AlterCommand /// For COMMENT column std::optional comment; - /// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible. + /// For ADD or MODIFY - after which column to add a new one. If an empty string, add to the end. String after_column; + /// For ADD_COLUMN, MODIFY_COLUMN - Add to the begin if it is true. + bool first = false; + /// For DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN bool if_exists = false; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 8c92307dcb7..c1a5c1f77a0 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -167,7 +167,7 @@ static auto getNameRange(const ColumnsDescription::Container & columns, const St return std::make_pair(begin, end); } -void ColumnsDescription::add(ColumnDescription column, const String & after_column) +void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first) { if (has(column.name)) throw Exception("Cannot add column " + column.name + ": column with this name already exists", @@ -175,7 +175,9 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu auto insert_it = columns.cend(); - if (!after_column.empty()) + if (first) + insert_it = columns.cbegin(); + else if (!after_column.empty()) { auto range = getNameRange(columns, after_column); if (range.first == range.second) @@ -211,6 +213,38 @@ void ColumnsDescription::rename(const String & column_from, const String & colum }); } +void ColumnsDescription::modifyColumnOrder(const String & column_name, const String & after_column, bool first) +{ + const auto & reorder_column = [&](auto get_new_pos) + { + auto column_range = getNameRange(columns, column_name); + + if (column_range.first == column_range.second) + throw Exception("There is no column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + + std::vector moving_columns; + for (auto list_it = column_range.first; list_it != column_range.second;) + { + moving_columns.emplace_back(*list_it); + list_it = columns.get<0>().erase(list_it); + } + + columns.get<0>().insert(get_new_pos(), moving_columns.begin(), moving_columns.end()); + }; + + if (first) + reorder_column([&]() { return columns.cbegin(); }); + else if (!after_column.empty() && column_name != after_column) + { + /// Checked first + auto range = getNameRange(columns, after_column); + if (range.first == range.second) + throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after", + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + + reorder_column([&]() { return getNameRange(columns, after_column).second; }); + } +} void ColumnsDescription::flattenNested() { diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index dda10db002d..1d6d04f303f 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -34,6 +34,8 @@ struct ColumnDescription ASTPtr ttl; ColumnDescription() = default; + ColumnDescription(ColumnDescription &&) = default; + ColumnDescription(const ColumnDescription &) = default; ColumnDescription(String name_, DataTypePtr type_); bool operator==(const ColumnDescription & other) const; @@ -52,7 +54,7 @@ public: explicit ColumnsDescription(NamesAndTypesList ordinary_); /// `after_column` can be a Nested column name; - void add(ColumnDescription column, const String & after_column = String()); + void add(ColumnDescription column, const String & after_column = String(), bool first = false); /// `column_name` can be a Nested column name; void remove(const String & column_name); @@ -84,12 +86,20 @@ public: template void modify(const String & column_name, F && f) + { + modify(column_name, String(), false, std::forward(f)); + } + + template + void modify(const String & column_name, const String & after_column, bool first, F && f) { auto it = columns.get<1>().find(column_name); if (it == columns.get<1>().end()) throw Exception("Cannot find column " + column_name + " in ColumnsDescription", ErrorCodes::LOGICAL_ERROR); if (!columns.get<1>().modify(it, std::forward(f))) throw Exception("Cannot modify ColumnDescription for column " + column_name + ": column name cannot be changed", ErrorCodes::LOGICAL_ERROR); + + modifyColumnOrder(column_name, after_column, first); } Names getNamesOfPhysical() const; @@ -120,6 +130,8 @@ public: private: Container columns; + + void modifyColumnOrder(const String & column_name, const String & after_column, bool first); }; /// Validate default expressions and corresponding types compatibility, i.e. diff --git a/tests/queries/0_stateless/01355_alter_column_with_order.reference b/tests/queries/0_stateless/01355_alter_column_with_order.reference new file mode 100644 index 00000000000..247795a13c8 --- /dev/null +++ b/tests/queries/0_stateless/01355_alter_column_with_order.reference @@ -0,0 +1,40 @@ +Added1 UInt32 +CounterID UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +Added2 UInt32 +ToDrop UInt32 +Added3 UInt32 +Added1 UInt32 +CounterID UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +Added2 UInt32 +ToDrop UInt32 +Added3 UInt32 +Added2 UInt32 +Added1 UInt32 +CounterID UInt32 +Added3 UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +ToDrop UInt32 +Added2 UInt32 +Added1 UInt32 +CounterID UInt32 +Added3 UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +ToDrop UInt32 diff --git a/tests/queries/0_stateless/01355_alter_column_with_order.sql b/tests/queries/0_stateless/01355_alter_column_with_order.sql new file mode 100644 index 00000000000..b3ae8f43042 --- /dev/null +++ b/tests/queries/0_stateless/01355_alter_column_with_order.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS alter_test; + +CREATE TABLE alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192); + +ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST; + +ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn; + +ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop; + +DESC alter_test; +DETACH TABLE alter_test; +ATTACH TABLE alter_test; +DESC alter_test; + +ALTER TABLE alter_test MODIFY COLUMN Added2 UInt32 FIRST; + +ALTER TABLE alter_test MODIFY COLUMN Added3 UInt32 AFTER CounterID; + +DESC alter_test; +DETACH TABLE alter_test; +ATTACH TABLE alter_test; +DESC alter_test; + +DROP TABLE IF EXISTS alter_test; From faaf3536eaf7f11e2898eefbf142d562aff1c8c9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 1 Jul 2020 18:00:07 +0300 Subject: [PATCH 153/330] Update deb image --- docker/packager/deb/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 4f1be197668..dcdf23e0ff0 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -24,6 +24,7 @@ RUN apt-get --allow-unauthenticated update -y \ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb RUN chmod +x dpkg-deb RUN cp dpkg-deb /usr/bin +RUN echo "HELLO WORLD" # Libraries from OS are only needed to test the "unbundled" build (that is not used in production). From 29178a1df45cc42a1fbf006336f2751561075f62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 1 Jul 2020 21:51:11 +0300 Subject: [PATCH 154/330] Don't download image twice --- docker/packager/packager | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/packager/packager b/docker/packager/packager index fb076d17b50..c3e0778e10a 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -213,9 +213,10 @@ if __name__ == "__main__": logging.info("Should place {} to output".format(args.with_binaries)) dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") + image_with_version = image_name + ":" + args.docker_image_version if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image: - if not pull_image(image_name) or args.force_build_image: - build_image(image_name, dockerfile) + if not pull_image(image_with_version) or args.force_build_image: + build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy, From f0b012c94ba2d504e5b4cbc30969280eec925a06 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 1 Jul 2020 23:07:11 +0300 Subject: [PATCH 155/330] Remove garbage from images --- docker/packager/deb/Dockerfile | 1 - docker/test/pvs/Dockerfile | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index dcdf23e0ff0..4f1be197668 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -24,7 +24,6 @@ RUN apt-get --allow-unauthenticated update -y \ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb RUN chmod +x dpkg-deb RUN cp dpkg-deb /usr/bin -RUN echo "HELLO WORLD" # Libraries from OS are only needed to test the "unbundled" build (that is not used in production). diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 23a435efee7..5a6aea5d320 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -25,7 +25,7 @@ ENV PKG_VERSION="pvs-studio-7.08.39365.50-amd64.deb" RUN wget "https://files.viva64.com/$PKG_VERSION" RUN sudo dpkg -i "$PKG_VERSION" -CMD echo "Hello world" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ +CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . && ninja re2_st && \ pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \ From cf306c5be13e6cf128e656086f8e0319e84c4de3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 1 Jul 2020 23:29:39 +0300 Subject: [PATCH 156/330] bump ci From e0273d30fa47137572aaec9a9b07cd7c0ba3bf01 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 1 Jul 2020 10:22:37 +0300 Subject: [PATCH 157/330] Changelog for 20.1, 20.4 --- CHANGELOG.md | 160 +++++++++++++++++++++++++++++ utils/simple-backport/changelog.sh | 1 + 2 files changed, 161 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cb41d335bc..93aaf196d16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,89 @@ ## ClickHouse release v20.4 +### ClickHouse release v20.4.6.53-stable 2020-06-25 + +#### Bug Fix + +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result for `if()` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([tavplubix](https://github.com/tavplubix)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `LIMIT n WITH TIES` usage together with `ORDER BY` statement, which contains aliases. [#11689](https://github.com/ClickHouse/ClickHouse/pull/11689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error which leads to an incorrect state of `system.mutations`. It may show that whole mutation is already done but the server still has `MUTATE_PART` tasks in the replication queue and tries to execute them. This fixes [#11611](https://github.com/ClickHouse/ClickHouse/issues/11611). [#11681](https://github.com/ClickHouse/ClickHouse/pull/11681) ([alesapin](https://github.com/alesapin)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed rare segfault in `SHOW CREATE TABLE` Fixes [#11490](https://github.com/ClickHouse/ClickHouse/issues/11490). [#11579](https://github.com/ClickHouse/ClickHouse/pull/11579) ([tavplubix](https://github.com/tavplubix)). +* All queries in HTTP session have had the same query_id. It is fixed. [#11578](https://github.com/ClickHouse/ClickHouse/pull/11578) ([tavplubix](https://github.com/tavplubix)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* Fix shard_num/replica_num for `` (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). +* Fix race condition which may lead to an exception during table drop. It's a bit tricky and not dangerous at all. If you want an explanation, just notice me in telegram. [#11523](https://github.com/ClickHouse/ClickHouse/pull/11523) ([alesapin](https://github.com/alesapin)). +* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Get rid of old libunwind patches. https://github.com/ClickHouse-Extras/libunwind/commit/500aa227911bd185a94bfc071d68f4d3b03cb3b1#r39048012 This allows to disable `-fno-omit-frame-pointer` in `clang` builds that improves performance at least by 1% in average. [#10761](https://github.com/ClickHouse/ClickHouse/pull/10761) ([Amos Bird](https://github.com/amosbird)). +* Fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization. [#10715](https://github.com/ClickHouse/ClickHouse/pull/10715) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement + +* Fix several non significant errors in unit tests. [#11262](https://github.com/ClickHouse/ClickHouse/pull/11262) ([alesapin](https://github.com/alesapin)). +* Fix (false) MSan report in MergeTreeIndexFullText. The issue first appeared in [#9968](https://github.com/ClickHouse/ClickHouse/issues/9968). [#10801](https://github.com/ClickHouse/ClickHouse/pull/10801) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +### ClickHouse release v20.4.5.36-stable 2020-06-10 + +#### Bug Fix + +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove redundant lock during parts send in ReplicatedMergeTree. [#11354](https://github.com/ClickHouse/ClickHouse/pull/11354) ([alesapin](https://github.com/alesapin)). +* Fix support for `\G` (vertical output) in clickhouse-client in multiline mode. This closes [#9933](https://github.com/ClickHouse/ClickHouse/issues/9933). [#11350](https://github.com/ClickHouse/ClickHouse/pull/11350) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential segfault when using `Lazy` database. [#11348](https://github.com/ClickHouse/ClickHouse/pull/11348) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now merges stopped before change metadata in `ALTER` queries. [#11335](https://github.com/ClickHouse/ClickHouse/pull/11335) ([alesapin](https://github.com/alesapin)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix insignificant data race in clickhouse-copier. Found by integration tests. [#11313](https://github.com/ClickHouse/ClickHouse/pull/11313) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed S3 globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). +* Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#11145](https://github.com/ClickHouse/ClickHouse/pull/11145) ([filimonov](https://github.com/filimonov)). +* Fix excessive reserving of threads for simple queries (optimization for reducing the number of threads, which was partly broken after changes in pipeline). [#11114](https://github.com/ClickHouse/ClickHouse/pull/11114) ([Azat Khuzhin](https://github.com/azat)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). + +#### Build/Testing/Packaging Improvement + +* Fix several flaky integration tests. [#11355](https://github.com/ClickHouse/ClickHouse/pull/11355) ([alesapin](https://github.com/alesapin)). + +### ClickHouse release v20.4.4.18-stable 2020-05-26 + +No changes compared to v20.4.3.16-stable. + ### ClickHouse release v20.4.3.16-stable 2020-05-23 #### Bug Fix @@ -799,6 +883,82 @@ ## ClickHouse release v20.1 +### ClickHouse release v20.1.16.120-stable 2020-60-26 + +#### Bug Fix + +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([tavplubix](https://github.com/tavplubix)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization. [#10715](https://github.com/ClickHouse/ClickHouse/pull/10715) ([Anton Popov](https://github.com/CurtizJ)). + + +### ClickHouse release v20.1.15.109-stable 2020-06-19 + +#### Bug Fix + +* Fix excess lock for structure during alter. [#11790](https://github.com/ClickHouse/ClickHouse/pull/11790) ([alesapin](https://github.com/alesapin)). + + +### ClickHouse release v20.1.14.107-stable 2020-06-11 + +#### Bug Fix + +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + + +### ClickHouse release v20.1.13.105-stable 2020-06-10 + +#### Bug Fix + +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)). +* Fixed parseDateTime64BestEffort argument resolution bugs. [#10925](https://github.com/ClickHouse/ClickHouse/issues/10925). [#11038](https://github.com/ClickHouse/ClickHouse/pull/11038) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix incorrect raw data size in method getRawData(). [#10964](https://github.com/ClickHouse/ClickHouse/pull/10964) ([Igr](https://github.com/ObjatieGroba)). +* Fix backward compatibility with tuples in Distributed tables. [#10889](https://github.com/ClickHouse/ClickHouse/pull/10889) ([Anton Popov](https://github.com/CurtizJ)). +* Fix SIGSEGV in StringHashTable (if such key does not exist). [#10870](https://github.com/ClickHouse/ClickHouse/pull/10870) ([Azat Khuzhin](https://github.com/azat)). +* Fixed bug in `ReplicatedMergeTree` which might cause some `ALTER` on `OPTIMIZE` query to hang waiting for some replica after it become inactive. [#10849](https://github.com/ClickHouse/ClickHouse/pull/10849) ([tavplubix](https://github.com/tavplubix)). +* Fix columns order after Block::sortColumns() (also add a test that shows that it affects some real use case - Buffer engine). [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)). +* Fix the issue with ODBC bridge when no quoting of identifiers is requested. This fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan and MSan report in DateLUT. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* - Make use of `src_type` for correct type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Fix `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix combinator -OrNull and -OrDefault when combined with -State. [#10741](https://github.com/ClickHouse/ClickHouse/pull/10741) ([hcz](https://github.com/hczhcz)). +* Fix disappearing totals. Totals could have being filtered if query had had join or subquery with external where condition. Fixes [#10674](https://github.com/ClickHouse/ClickHouse/issues/10674). [#10698](https://github.com/ClickHouse/ClickHouse/pull/10698) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix multiple usages of `IN` operator with the identical set in one query. [#10686](https://github.com/ClickHouse/ClickHouse/pull/10686) ([Anton Popov](https://github.com/CurtizJ)). +* Fix order of parameters in AggregateTransform constructor. [#10667](https://github.com/ClickHouse/ClickHouse/pull/10667) ([palasonic1](https://github.com/palasonic1)). +* Fix the lack of parallel execution of remote queries with `distributed_aggregation_memory_efficient` enabled. Fixes [#10655](https://github.com/ClickHouse/ClickHouse/issues/10655). [#10664](https://github.com/ClickHouse/ClickHouse/pull/10664) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)). +* Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)). +* * Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)). + + ### ClickHouse release v20.1.12.86, 2020-05-26 #### Bug Fix diff --git a/utils/simple-backport/changelog.sh b/utils/simple-backport/changelog.sh index 4b898f4c75c..2738eeda44b 100755 --- a/utils/simple-backport/changelog.sh +++ b/utils/simple-backport/changelog.sh @@ -29,6 +29,7 @@ find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; "${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq > "changelog-prs.txt" echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." +if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi function github_download() { From 99514a7e243fc9f2742f62294620baf46bb27cff Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 2 Jul 2020 10:13:02 +0300 Subject: [PATCH 158/330] fixup --- docker/test/performance-comparison/compare.sh | 13 ++++++++++--- tests/performance/sum_map.xml | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 64b86e116e8..d3debe82c56 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -794,9 +794,16 @@ done wait unset IFS -# Remember that grep sets error code when nothing is found, hence the bayan -# operator. -grep -H -m2 -i '' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||: +# Prefer to grep for clickhouse_driver exception messages, but if there are none, +# just show a couple of lines from the log. +for log in *-err.log +do + test=$(basename "$log" "-err.log") + { + grep -H -m2 -i '\(Exception\|Error\):[^:]' "$log" \ + || head -2 "$log" + } | sed "s/^/$test\t/" >> run-errors.tsv ||: +done } function report_metrics diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml index 29ef169c25e..b732c150220 100644 --- a/tests/performance/sum_map.xml +++ b/tests/performance/sum_map.xml @@ -22,7 +22,7 @@ - CREATE TABLE sum_map_{scale} AS + CREATE TABLE sum_map_{scale} ENGINE Memory AS SELECT arrayMap(x -> (x % 23), range(50)) AS key, arrayMap(x -> intDiv(number, x + 1), range(50)) AS val From c7d8bf68b82a620b4640746ba440d72a7c3456bc Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 2 Jul 2020 10:25:06 +0300 Subject: [PATCH 159/330] DOCS-635: Translated the EN version of the AvroConfluent format description (#11930) * DOCSUP-1350 (#128) * edited EN version * add EN and RU translation * minor changes * CLICKHOUSEDOCS-635: Updated the description. Co-authored-by: Sergei Shtykov Co-authored-by: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> --- docs/en/interfaces/formats.md | 6 +-- docs/en/operations/settings/settings.md | 8 ++-- docs/ru/interfaces/formats.md | 49 +++++++++++++++++++++++++ docs/ru/operations/settings/settings.md | 6 +++ 4 files changed, 61 insertions(+), 8 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 3be3490cffe..ae96cb6dda4 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1055,11 +1055,11 @@ Each Avro message embeds a schema id that can be resolved to the actual schema w Schemas are cached once resolved. -Schema Registry URL is configured with [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#settings-format_avro_schema_registry_url) +Schema Registry URL is configured with [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#format_avro_schema_registry_url). ### Data Types Matching {#data_types-matching-1} -Same as [Avro](#data-format-avro) +Same as [Avro](#data-format-avro). ### Usage {#usage} @@ -1093,7 +1093,7 @@ SELECT * FROM topic1_stream; ``` !!! note "Warning" - Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. + Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. ## Parquet {#data-format-parquet} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index bbb878995d6..9e7e36dec1c 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1368,13 +1368,11 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB) Default value: 32768 (32 KiB) -## format\_avro\_schema\_registry\_url {#settings-format_avro_schema_registry_url} +## format\_avro\_schema\_registry\_url {#format_avro_schema_registry_url} -Sets Confluent Schema Registry URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format +Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format. -Type: URL - -Default value: Empty +Default value: `Empty`. ## background\_pool\_size {#background_pool_size} diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 822f5543f9b..9eebc3f8bac 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -945,6 +945,55 @@ message MessageType { ClickHouse пишет и читает сообщения `Protocol Buffers` в формате `length-delimited`. Это означает, что перед каждым сообщением пишется его длина в формате [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). См. также [как читать и записывать сообщения Protocol Buffers в формате length-delimited в различных языках программирования](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). +## Avro {#data-format-avro} + +## AvroConfluent {#data-format-avro-confluent} + +Для формата `AvroConfluent` ClickHouse поддерживает декодирование сообщений `Avro` с одним объектом. Такие сообщения используются с [Kafka] (http://kafka.apache.org/) и реестром схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html). + +Каждое сообщение `Avro` содержит идентификатор схемы, который может быть разрешен для фактической схемы с помощью реестра схем. + +Схемы кэшируются после разрешения. + +URL-адрес реестра схем настраивается с помощью [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#format_avro_schema_registry_url). + +### Соответствие типов данных {#sootvetstvie-tipov-dannykh-0} + +Такое же, как в [Avro](#data-format-avro). + +### Использование {#ispolzovanie} + +Чтобы быстро проверить разрешение схемы, используйте [kafkacat](https://github.com/edenhill/kafkacat) с языком запросов [clickhouse-local](../operations/utilities/clickhouse-local.md): + +``` bash +$ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse-local --input-format AvroConfluent --format_avro_schema_registry_url 'http://schema-registry' -S "field1 Int64, field2 String" -q 'select * from table' +1 a +2 b +3 c +``` + +Чтобы использовать `AvroConfluent` с [Kafka](../engines/table-engines/integrations/kafka.md): + +``` sql +CREATE TABLE topic1_stream +( + field1 String, + field2 String +) +ENGINE = Kafka() +SETTINGS +kafka_broker_list = 'kafka-broker', +kafka_topic_list = 'topic1', +kafka_group_name = 'group1', +kafka_format = 'AvroConfluent'; + +SET format_avro_schema_registry_url = 'http://schema-registry'; + +SELECT * FROM topic1_stream; +``` +!!! note "Внимание" + `format_avro_schema_registry_url` необходимо настроить в `users.xml`, чтобы сохранить значение после перезапуска. Также можно использовать настройку `format_avro_schema_registry_url` табличного движка `Kafka`. + ## Parquet {#data-format-parquet} [Apache Parquet](http://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 4cccaa4e2d7..29de01f7c97 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1313,6 +1313,12 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; Значение по умолчанию: 16. +## format\_avro\_schema\_registry\_url {#format_avro_schema_registry_url} + +Задает URL реестра схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html) для использования с форматом [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent). + +Значение по умолчанию: `Пустая строка`. + ## min_insert_block_size_rows_for_materialized_views {#min-insert-block-size-rows-for-materialized-views} Устанавливает минимальное количество строк в блоке, который может быть вставлен в таблицу запросом `INSERT`. Блоки меньшего размера склеиваются в блоки большего размера. Настройка применяется только для блоков, вставляемых в [материализованное представление](../../sql-reference/statements/create.md#create-view). Настройка позволяет избежать избыточного потребления памяти. From 79cd33a5674242b133bb91270e70381968f5b465 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Jul 2020 10:44:47 +0300 Subject: [PATCH 160/330] Fix tests. --- src/Interpreters/ExpressionAnalyzer.cpp | 9 +++++---- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 2 +- .../queries/0_stateless/01356_wrong_filter-type_bug.sql | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1bc7abc9cc7..bfb7abe8fe5 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -78,6 +78,7 @@ namespace ErrorCodes extern const int ILLEGAL_PREWHERE; extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; } namespace @@ -637,9 +638,9 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( step.can_remove_required_output.push_back(true); auto filter_type = step.actions->getSampleBlock().getByName(prewhere_column_name).type; - if (!isInteger(removeNullable(filter_type))) + if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); { /// Remove unused source_columns from prewhere actions. @@ -728,9 +729,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions); auto filter_type = step.actions->getSampleBlock().getByName(where_column_name).type; - if (!isInteger(removeNullable(filter_type))) + if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); return true; } diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index cc7d43f6003..8cb24bb0cd6 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -319,7 +319,7 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P prewhere_info->prewhere_actions->execute(block); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); - if (!isInteger(removeNullable(prewhere_column.type))) + if (!prewhere_column.type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(), ErrorCodes::LOGICAL_ERROR); diff --git a/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql b/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql index 43b464f95f0..b3f48967ba2 100644 --- a/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql +++ b/tests/queries/0_stateless/01356_wrong_filter-type_bug.sql @@ -3,7 +3,7 @@ drop table if exists t0; CREATE TABLE t0 (`c0` String, `c1` Int32 CODEC(NONE), `c2` Int32) ENGINE = MergeTree() ORDER BY tuple(); insert into t0 values ('a', 1, 2); -SELECT t0.c2, t0.c1, t0.c0 FROM t0 PREWHERE t0.c0 ORDER BY ((t0.c2)>=(t0.c1)), (((- (((t0.c0)>(t0.c0))))) IS NULL) FORMAT TabSeparatedWithNamesAndTypes; -- {serverError 43} -SELECT t0.c2, t0.c1, t0.c0 FROM t0 WHERE t0.c0 ORDER BY ((t0.c2)>=(t0.c1)), (((- (((t0.c0)>(t0.c0))))) IS NULL) FORMAT TabSeparatedWithNamesAndTypes settings optimize_move_to_prewhere=0; -- {serverError 43} +SELECT t0.c2, t0.c1, t0.c0 FROM t0 PREWHERE t0.c0 ORDER BY ((t0.c2)>=(t0.c1)), (((- (((t0.c0)>(t0.c0))))) IS NULL) FORMAT TabSeparatedWithNamesAndTypes; -- {serverError 59} +SELECT t0.c2, t0.c1, t0.c0 FROM t0 WHERE t0.c0 ORDER BY ((t0.c2)>=(t0.c1)), (((- (((t0.c0)>(t0.c0))))) IS NULL) FORMAT TabSeparatedWithNamesAndTypes settings optimize_move_to_prewhere=0; -- {serverError 59} drop table if exists t0; From 4c3ae82273959bf53434df4b1f821cd3c77455a5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Jul 2020 12:02:15 +0300 Subject: [PATCH 161/330] Better PVS image --- docker/test/pvs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 5a6aea5d320..8d9fb784a3e 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -25,7 +25,7 @@ ENV PKG_VERSION="pvs-studio-7.08.39365.50-amd64.deb" RUN wget "https://files.viva64.com/$PKG_VERSION" RUN sudo dpkg -i "$PKG_VERSION" -CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ +CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . && ninja re2_st && \ pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \ From e79be9b1ed8f1bbb9a4752c2cf7f7d22584846db Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 2 Jul 2020 12:04:40 +0300 Subject: [PATCH 162/330] DOCS-605: Description for the always_fetch_merged_part setting (#11921) * Revolg DOCSUP-998 Document the always_fetch_merged_part setting (#123) * Add always_fetch_merged_part setting * revolg-DOCSUP-998-add_always_fetch_merged_part_setting link fixed * Apply suggestions from code review Co-authored-by: BayoNet * Add always_fetch_merged_part setting. Updates. * Update docs/en/operations/settings/settings.md Co-authored-by: BayoNet * Add always_fetch_merged_part setting. Updates. Co-authored-by: Olga Revyakina Co-authored-by: BayoNet * CLICKHOUSEDOCS-605: Minor fixes. * CLICKHOUSEDOCS-605: Added Plausible to Adopters. * Update docs/ru/operations/settings/settings.md Co-authored-by: alesapin * Update docs/en/operations/settings/settings.md Co-authored-by: alesapin * CLICKHOUSEDOCS-605: Fixed access rights description. Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> Co-authored-by: Olga Revyakina Co-authored-by: Sergei Shtykov Co-authored-by: alesapin --- docs/en/introduction/adopters.md | 3 ++- docs/en/operations/access-rights.md | 2 +- docs/en/operations/settings/settings.md | 17 +++++++++++++++++ docs/ru/operations/access-rights.md | 2 +- docs/ru/operations/settings/settings.md | 17 +++++++++++++++++ 5 files changed, 38 insertions(+), 3 deletions(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 3ebadd6d002..01669e012d6 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -50,7 +50,8 @@ toc_title: Adopters | Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | | QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | | Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | | Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md index 78db369e8e8..f7e1b98d6e3 100644 --- a/docs/en/operations/access-rights.md +++ b/docs/en/operations/access-rights.md @@ -34,7 +34,7 @@ By default, the ClickHouse server provides the `default` user account which is n If you just started using ClickHouse, consider the following scenario: 1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user. -2. Log in to the `default` user account and create all the required users. Don’t forget to create an administrator account (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`). +2. Log in to the `default` user account and create all the required users. Don’t forget to create an administrator account (`GRANT ALL ON *.* TO admin_user_account WITH GRANT OPTION`). 3. [Restrict permissions](../operations/settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it. ### Properties of Current Solution {#access-control-properties} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 9e7e36dec1c..e0dd5323dcd 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1416,6 +1416,23 @@ Possible values: Default value: 16. +## always_fetch_merged_part {#always_fetch_merged_part} + +Prohibits data parts merging in [Replicated*MergeTree](../../engines/table-engines/mergetree-family/replication.md)-engine tables. + +When merging is prohibited, the replica never merges parts and always downloads merged parts from other replicas. If there is no required data yet, the replica waits for it. CPU and disk load on the replica server decreases, but the network load on cluster increases. This setting can be useful on servers with relatively weak CPUs or slow disks, such as servers for backups storage. + +Possible values: + +- 0 — `Replicated*MergeTree`-engine tables merge data parts at the replica. +- 1 — `Replicated*MergeTree`-engine tables don't merge data parts at the replica. The tables download merged data parts from other replicas. + +Default value: 0. + +**See Also** + +- [Data Replication](../../engines/table-engines/mergetree-family/replication.md) + ## background\_distributed\_schedule\_pool\_size {#background_distributed_schedule_pool_size} Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at ClickHouse server start and can’t be changed in a user session. diff --git a/docs/ru/operations/access-rights.md b/docs/ru/operations/access-rights.md index 99da2550e70..de8265e3ba1 100644 --- a/docs/ru/operations/access-rights.md +++ b/docs/ru/operations/access-rights.md @@ -31,7 +31,7 @@ ClickHouse поддерживает управление доступом на Если вы начали пользоваться ClickHouse недавно, попробуйте следующий сценарий: 1. [Включите](#enabling-access-control) SQL-ориентированное управление доступом для пользователя `default`. -2. Войдите под пользователем `default` и создайте всех необходимых пользователей. Не забудьте создать аккаунт администратора (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`). +2. Войдите под пользователем `default` и создайте всех необходимых пользователей. Не забудьте создать аккаунт администратора (`GRANT ALL ON *.* TO admin_user_account WITH GRANT OPTION`). 3. [Ограничьте разрешения](settings/permissions-for-queries.md#permissions_for_queries) для пользователя `default` и отключите для него SQL-ориентированное управление доступом. ### Особенности реализации {#access-control-properties} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 29de01f7c97..8d11c252bb7 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1205,6 +1205,23 @@ Default value: 0. Значение по умолчанию: 16. +## always_fetch_merged_part {#always_fetch_merged_part} + +Запрещает слияние данных для таблиц семейства [Replicated*MergeTree](../../engines/table-engines/mergetree-family/replication.md). + +Если слияние запрещено, реплика никогда не выполняет слияние отдельных кусков данных, а всегда загружает объединённые данные из других реплик. Если объединённых данных пока нет, реплика ждет их появления. Нагрузка на процессор и диски на реплике уменьшается, но нагрузка на сеть в кластере возрастает. Настройка может быть полезна на репликах с относительно слабыми процессорами или медленными дисками, например, на репликах для хранения архивных данных. + +Возможные значения: + +- 0 — таблицы семейства `Replicated*MergeTree` выполняют слияние данных на реплике. +- 1 — таблицы семейства `Replicated*MergeTree` не выполняют слияние данных на реплике, а загружают объединённые данные из других реплик. + +Значение по умолчанию: 0. + +**См. также:** + +- [Репликация данных](../../engines/table-engines/mergetree-family/replication.md) + ## transform_null_in {#transform_null_in} Разрешает сравнивать значения [NULL](../../sql-reference/syntax.md#null-literal) в операторе [IN](../../sql-reference/operators/in.md). From e9d3c48f56ca07a7104d58d8ead5fdce76db81de Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 2 Jul 2020 12:28:23 +0300 Subject: [PATCH 163/330] Update Dockerfile --- docker/server/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 132a5d89959..5568f1240e3 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -7,6 +7,7 @@ ARG gosu_ver=1.10 RUN apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ + ca-certificates \ dirmngr \ gnupg \ && mkdir -p /etc/apt/sources.list.d \ From b9b7abe816b5d8c428f94617cda4e072c61243d7 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 2 Jul 2020 12:31:18 +0300 Subject: [PATCH 164/330] revert e9d3c48f56ca07a7104d58d8ead5fdce76db81de --- docker/server/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 5568f1240e3..132a5d89959 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -7,7 +7,6 @@ ARG gosu_ver=1.10 RUN apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ - ca-certificates \ dirmngr \ gnupg \ && mkdir -p /etc/apt/sources.list.d \ From b4492bc5d4db5f02dd404cf0cbb9bf336036d3a0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 2 Jul 2020 12:45:27 +0300 Subject: [PATCH 165/330] [blog] 'Package Repository Behind CDN' post (#12082) * support iframes from DataLens * initial blog post text --- docs/tools/website.py | 14 +++- .../en/2020/package-repository-behind-cdn.md | 71 +++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 website/blog/en/2020/package-repository-behind-cdn.md diff --git a/docs/tools/website.py b/docs/tools/website.py index 97d699b9916..688fb887ff7 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -17,8 +17,15 @@ import util def handle_iframe(iframe, soup): - if not iframe.attrs['src'].startswith('https://www.youtube.com/'): - raise RuntimeError('iframes are allowed only for YouTube') + allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/'] + illegal_domain = True + iframe_src = iframe.attrs['src'] + for domain in allowed_domains: + if iframe_src.startswith(domain): + illegal_domain = False + break + if illegal_domain: + raise RuntimeError(f'iframe from illegal domain: {iframe_src}') wrapper = soup.new_tag('div') wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9'] iframe.insert_before(wrapper) @@ -42,8 +49,11 @@ def adjust_markdown_html(content): for a in soup.find_all('a'): a_class = a.attrs.get('class') + a_href = a.attrs.get('href') if a_class and 'headerlink' in a_class: a.string = '\xa0' + if a_href and a_href.startswith('http'): + a.attrs['target'] = '_blank' for iframe in soup.find_all('iframe'): handle_iframe(iframe, soup) diff --git a/website/blog/en/2020/package-repository-behind-cdn.md b/website/blog/en/2020/package-repository-behind-cdn.md new file mode 100644 index 00000000000..c5857bcd4a4 --- /dev/null +++ b/website/blog/en/2020/package-repository-behind-cdn.md @@ -0,0 +1,71 @@ +--- +title: 'Package Repository Behind CDN' +image: 'https://blog-images.clickhouse.tech/en/2020/package-repository-behind-cdn/main.jpg' +date: '2020-07-02' +tags: ['article', 'CDN', 'Cloudflare', 'repository', 'deb', 'rpm', 'tgz'] +--- + +On initial open-source launch, ClickHouse packages were published at an independent repository implemented on Yandex infrastructure. We'd love to use the default repositories of Linux distributions, but, unfortunately, they have their own strict rules on third-party library usage and software compilation options. These rules happen to contradict with how ClickHouse is produced. In 2018 ClickHouse was added to [official Debian repository](https://packages.debian.org/sid/clickhouse-server) as an experiment, but it didn't get much traction. Adaptation to those rules ended up producing more like a demo version of ClickHouse with crippled performance and limited features. + +!!! info "TL;DR" + If you have configured your system to use for fetching ClickHouse packages, replace it with . + +Distributing packages via our own repository was working totally fine until ClickHouse has started getting traction in countries far from Moscow, most notably the USA and China. Downloading large files of packages from remote location was especially painful for Chinese ClickHouse users, likely due to how China is connected to the rest of the world via its famous firewall. But at least it worked (with high latencies and low throughput), while in some smaller countries there was completely no access to this repository and people living there had to host their own mirrors on neutral ground as a workaround. + +Earlier this year we made the ClickHouse official website to be served via global CDN by [Cloudflare](https://www.cloudflare.com) on a `clickhouse.tech` domain. To solve the download issues discussed above, we have also configured a new location for ClickHouse packages that are also served by Cloudflare at [repo.clickhouse.tech](https://repo.clickhouse.tech). It used to have some quirks, but now it seems to be working fine while improving throughput and latencies in remote geographical locations by over an order of magnitude. + +## Switching To Repository Behind CDN + +This transition has some more benefits besides improving the package fetching, but let's get back to them in a minute. One of the key reasons for this post is that we can't actually influence the repository configuration of ClickHouse users. We have updated all instructions, but for people who have followed these instructions earlier, **action is required** to use the new location behind CDN. Basically, you need to replace `http://repo.yandex.ru/clickhouse/` with `https://repo.clickhouse.tech/` in your package manager configuration. + +One-liner for Ubuntu or Debian: +```bash +sudo apt-get install apt-transport-https ca-certificates && sudo perl -pi -e 's|http://repo.yandex.ru/clickhouse/|https://repo.clickhouse.tech/|g' /etc/apt/sources.list.d/clickhouse.list && sudo apt-get update +``` + +One-liner for RedHat or CentOS: +```bash +sudo perl -pi -e 's|http://repo.yandex.ru/clickhouse/|https://repo.clickhouse.tech/|g' /etc/yum.repos.d/clickhouse* +``` + +As you might have noticed, the domain name is not the only thing that has changed: the new URL uses `https://` protocol. Usually, it's considered less important for package repositories compared to normal websites because most package managers check [GPG signatures](https://en.wikipedia.org/wiki/GNU_Privacy_Guard) for what they download anyway. However it still has some benefits: for example, it's not so uncommon for people to download packages via browser, `curl` or `wget`, and install them manually (while for [tgz](https://repo.clickhouse.tech/tgz/) builds it's the only option). Fewer opportunities for sniffing traffic can't hurt either. The downside is that `apt` in some Debian flavors has no HTTPS support by default and needs a couple more packages to be installed (`apt-transport-https` and `ca-certificates`). + +## Investigating Repository Usage + +The next important thing we obtained by using Cloudflare for our package repository is observability. Of course the same could have been implemented from scratch, but it'd require extra resources to develop and maintain, while Cloudflare provides quite rich tools for analyzing what's going on in your domains. + +!!! info "Did you know?" + It's kind of off-topic, but those Cloudflare features are internally based on ClickHouse, see their [HTTP analytics](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) and [DNS analytics](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/) blog posts. + +Just a few weeks ago they have also added [cache analytics](https://blog.cloudflare.com/introducing-cache-analytics/) feature, which allowed to drill into how effectively the content is cached on CDN edges and improve the CDN configuration accordingly. For example, it allowed debugging some inconsistencies in cached repository metadata. + +## Digging Deeper + +All those built-in observability tools provided by Cloudflare share one weak point: they are purely technical and generic, without any domain-specific awareness. They excel at debugging low-level issues, but it's hard to get a higher-level picture based on them. With our package repository scenario, we're not so interested in frequent metadata update requests, but we'd like to see reports on package downloads by version, kind, and so on. We definitely didn't want to operate a separate infrastructure to get those reports, but given there was no out-of-the-box solution, we had to be creative and managed to find a cool middle ground. + +Ever heard the [“serverless computing”](https://en.wikipedia.org/wiki/Serverless_computing) hype recently? That was the basic idea: let's assemble a bunch of serverless or managed services to get what we want, without any dedicated servers. The plan was pretty straightforward: + +1. Dump details about package downloads to a ClickHouse database. +2. Connect some [BI](https://en.wikipedia.org/wiki/Business_intelligence) tool to that ClickHouse database and configure required charts/dashboards. + +Implementing it required a little bit of research, but the overall solution appeared to be quite elegant: + +1. For a ClickHouse database, it was a no-brainer to use [Yandex Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse). With a few clicks in the admin interface, we got a running ClickHouse cluster with properly configured high-availability and automated backups. Ad-hoc SQL queries could be run from that same admin interface. +2. Cloudflare allows customers to run custom code on CDN edge servers in a serverless fashion (so-called [workers](https://workers.cloudflare.com)). Those workers are executed in a tight sandbox which doesn't allow for anything complicated, but this feature fits perfectly to gather some data about download events and send it somewhere else. This is normally a paid feature, but special thanks to Connor Peshek from Cloudflare who arranged a lot of extra features for free on `clickhouse.tech` when we have applied to their [open-source support program](https://developers.cloudflare.com/sponsorships/). +3. To avoid publicly exposing yet another ClickHouse instance (like we did with **[playground](https://clickhouse.tech/docs/en/getting-started/playground/)** regardless of being a 100% anti-pattern), the download event data is sent to [Yandex Cloud Functions](https://cloud.yandex.com/services/functions). It's a generic serverless computing framework at Yandex Cloud, which also allows running custom code without maintaining any servers, but with less strict sandbox limitations and direct access to other cloud services like Managed ClickHouse that was needed for this task. +4. It didn't require much effort to choose a visualization tool either, as [DataLens BI](https://cloud.yandex.com/docs/datalens/) is tightly integrated with ClickHouse, capable to build what's required right from the UI, and satisfies the “no servers” requirement because it's a SaaS solution. Public access option for charts and dashboards have also appeared to be handy. + +There's not so much data collected yet, but here's a live example of how the resulting data visualization looks like. For example, here we can see that LTS releases of ClickHouse are not so popular yet *(yes, we have [LTS releases](https://clickhouse.tech/docs/en/faq/operations/production/)!)*: +![iframe](https://datalens.yandex/qk01mwxkgiysm?_embedded=1) + +While here we confirmed that `rpm` is at least as popular as `deb`: +![iframe](https://datalens.yandex/lfvldsf92i2uh?_embedded=1) + +Or you can take a look at all key charts for `repo.clickhouse.tech` together on a handy **[dashboard](https://datalens.yandex/pjzq4rot3t2ql)** with a filtering possibility. + +## Lessons Learned + +* CDN is a must-have if you want people from all over the world to download some artifacts that you produce. Beware the huge pay-for-traffic bills from most CDN providers though. +* Generic technical system metrics and drill-downs are a good starting point, but not always enough. +* Serverless is not a myth. Nowadays it is indeed possible to build useful products by just integrating various infrastructure services together, without any dedicated servers to take care of. + From 03b36c262e26e6335009f3c97ec8f4b5ddd20502 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 2 Jul 2020 03:09:57 +0300 Subject: [PATCH 166/330] Improve REVOKE command: now it requires only grant/admin option for only access which will be revoked. REVOKE ALL FROM user1 now revokes all granted roles. --- src/Access/AccessRights.cpp | 57 +++- src/Access/AccessRights.h | 4 +- src/Access/ContextAccess.cpp | 197 ++++++++----- src/Access/ContextAccess.h | 17 +- src/Access/RoleCache.cpp | 2 +- src/Interpreters/InterpreterGrantQuery.cpp | 262 ++++++++++++++---- src/Parsers/ParserGrantQuery.cpp | 13 +- .../test_create_user_and_login/test.py | 50 ++++ tests/integration/test_role/test.py | 42 +++ 9 files changed, 504 insertions(+), 140 deletions(-) diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 82ff3aaba98..988e8305605 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -426,12 +426,17 @@ public: friend bool operator!=(const Node & left, const Node & right) { return !(left == right); } - void merge(const Node & other, const Helper & helper) + void makeUnion(const Node & other, const Helper & helper) { - mergeAccessRec(other); + makeUnionRec(other); calculateFinalAccessRec(helper); } + void makeIntersection(const Node & other, const Helper & helper) + { + makeIntersectionRec(other); + calculateFinalAccessRec(helper); + } ProtoElements getElements() const { @@ -723,12 +728,12 @@ private: max_access = final_access | max_access_among_children; } - void mergeAccessRec(const Node & rhs) + void makeUnionRec(const Node & rhs) { if (rhs.children) { for (const auto & [rhs_childname, rhs_child] : *rhs.children) - getChild(rhs_childname).mergeAccessRec(rhs_child); + getChild(rhs_childname).makeUnionRec(rhs_child); } access |= rhs.access; if (children) @@ -740,6 +745,24 @@ private: } } } + + void makeIntersectionRec(const Node & rhs) + { + if (rhs.children) + { + for (const auto & [rhs_childname, rhs_child] : *rhs.children) + getChild(rhs_childname).makeIntersectionRec(rhs_child); + } + access &= rhs.access; + if (children) + { + for (auto & [lhs_childname, lhs_child] : *children) + { + if (!rhs.tryGetChild(lhs_childname)) + lhs_child.access &= rhs.access; + } + } + } }; @@ -989,7 +1012,7 @@ bool operator ==(const AccessRights & left, const AccessRights & right) } -void AccessRights::merge(const AccessRights & other) +void AccessRights::makeUnion(const AccessRights & other) { auto helper = [](std::unique_ptr & root_node, const std::unique_ptr & other_root_node) { @@ -1001,7 +1024,29 @@ void AccessRights::merge(const AccessRights & other) } if (other_root_node) { - root_node->merge(*other_root_node, Helper::instance()); + root_node->makeUnion(*other_root_node, Helper::instance()); + if (!root_node->access && !root_node->children) + root_node = nullptr; + } + }; + helper(root, other.root); + helper(root_with_grant_option, other.root_with_grant_option); +} + + +void AccessRights::makeIntersection(const AccessRights & other) +{ + auto helper = [](std::unique_ptr & root_node, const std::unique_ptr & other_root_node) + { + if (!root_node) + { + if (other_root_node) + root_node = std::make_unique(*other_root_node); + return; + } + if (other_root_node) + { + root_node->makeIntersection(*other_root_node, Helper::instance()); if (!root_node->access && !root_node->children) root_node = nullptr; } diff --git a/src/Access/AccessRights.h b/src/Access/AccessRights.h index 7706edcb40a..d1cd35f09c5 100644 --- a/src/Access/AccessRights.h +++ b/src/Access/AccessRights.h @@ -93,7 +93,9 @@ public: /// Merges two sets of access rights together. /// It's used to combine access rights from multiple roles. - void merge(const AccessRights & other); + void makeUnion(const AccessRights & other); + + void makeIntersection(const AccessRights & other); friend bool operator ==(const AccessRights & left, const AccessRights & right); friend bool operator !=(const AccessRights & left, const AccessRights & right) { return !(left == right); } diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 4a156c5972d..c53d073e43d 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -30,6 +30,73 @@ namespace ErrorCodes extern const int UNKNOWN_USER; } + +namespace +{ + std::shared_ptr mixAccessRightsFromUserAndRoles(const User & user, const EnabledRolesInfo & roles_info) + { + auto res = std::make_shared(user.access); + res->makeUnion(roles_info.access); + return res; + } + + std::shared_ptr applyParamsToAccessRights(const AccessRights & access, const ContextAccessParams & params) + { + auto res = std::make_shared(access); + + static const AccessFlags table_ddl = AccessType::CREATE_DATABASE | AccessType::CREATE_TABLE | AccessType::CREATE_VIEW + | AccessType::ALTER_TABLE | AccessType::ALTER_VIEW | AccessType::DROP_DATABASE | AccessType::DROP_TABLE | AccessType::DROP_VIEW + | AccessType::TRUNCATE; + + static const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; + static const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; + static const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; + static const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; + + if (params.readonly) + res->revoke(write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY); + + if (params.readonly == 1) + { + /// Table functions are forbidden in readonly mode. + /// For example, for readonly = 2 - allowed. + res->revoke(AccessType::CREATE_TEMPORARY_TABLE); + } + + if (!params.allow_ddl) + res->revoke(table_and_dictionary_ddl); + + if (!params.allow_introspection) + res->revoke(AccessType::INTROSPECTION); + + /// Anyone has access to the "system" database. + res->grant(AccessType::SELECT, DatabaseCatalog::SYSTEM_DATABASE); + + if (params.readonly != 1) + { + /// User has access to temporary or external table if such table was resolved in session or query context + res->grant(AccessFlags::allTableFlags() | AccessFlags::allColumnFlags(), DatabaseCatalog::TEMPORARY_DATABASE); + } + + if (params.readonly) + { + /// No grant option in readonly mode. + res->revokeGrantOption(AccessType::ALL); + } + + return res; + } + + + std::array to_array(const UUID & id) + { + std::array ids; + ids[0] = id; + return ids; + } +} + + ContextAccess::ContextAccess(const AccessControlManager & manager_, const Params & params_) : manager(&manager_) , params(params_) @@ -54,6 +121,10 @@ void ContextAccess::setUser(const UserPtr & user_) const /// User has been dropped. auto nothing_granted = std::make_shared(); access = nothing_granted; + access_without_readonly = nothing_granted; + access_with_allow_ddl = nothing_granted; + access_with_allow_introspection = nothing_granted; + access_from_user_and_roles = nothing_granted; subscription_for_user_change = {}; subscription_for_roles_changes = {}; enabled_roles = nullptr; @@ -108,56 +179,18 @@ void ContextAccess::setRolesInfo(const std::shared_ptr & enabled_row_policies = manager->getEnabledRowPolicies(*params.user_id, roles_info->enabled_roles); enabled_quota = manager->getEnabledQuota(*params.user_id, user_name, roles_info->enabled_roles, params.address, params.quota_key); enabled_settings = manager->getEnabledSettings(*params.user_id, user->settings, roles_info->enabled_roles, roles_info->settings_from_enabled_roles); - setFinalAccess(); + calculateAccessRights(); } -void ContextAccess::setFinalAccess() const +void ContextAccess::calculateAccessRights() const { - auto final_access = std::make_shared(); - *final_access = user->access; - if (roles_info) - final_access->merge(roles_info->access); + access_from_user_and_roles = mixAccessRightsFromUserAndRoles(*user, *roles_info); + access = applyParamsToAccessRights(*access_from_user_and_roles, params); - static const AccessFlags table_ddl = AccessType::CREATE_DATABASE | AccessType::CREATE_TABLE | AccessType::CREATE_VIEW - | AccessType::ALTER_TABLE | AccessType::ALTER_VIEW | AccessType::DROP_DATABASE | AccessType::DROP_TABLE | AccessType::DROP_VIEW - | AccessType::TRUNCATE; - - static const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; - static const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; - static const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; - static const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; - - if (params.readonly) - final_access->revoke(write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY); - - if (params.readonly == 1) - { - /// Table functions are forbidden in readonly mode. - /// For example, for readonly = 2 - allowed. - final_access->revoke(AccessType::CREATE_TEMPORARY_TABLE); - } - - if (!params.allow_ddl) - final_access->revoke(table_and_dictionary_ddl); - - if (!params.allow_introspection) - final_access->revoke(AccessType::INTROSPECTION); - - /// Anyone has access to the "system" database. - final_access->grant(AccessType::SELECT, DatabaseCatalog::SYSTEM_DATABASE); - - if (params.readonly != 1) - { - /// User has access to temporary or external table if such table was resolved in session or query context - final_access->grant(AccessFlags::allTableFlags() | AccessFlags::allColumnFlags(), DatabaseCatalog::TEMPORARY_DATABASE); - } - - if (params.readonly) - { - /// No grant option in readonly mode. - final_access->revokeGrantOption(AccessType::ALL); - } + access_without_readonly = nullptr; + access_with_allow_ddl = nullptr; + access_with_allow_introspection = nullptr; if (trace_log) { @@ -168,10 +201,8 @@ void ContextAccess::setFinalAccess() const boost::algorithm::join(roles_info->getEnabledRolesNames(), ", ")); } LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", params.readonly, params.allow_ddl, params.allow_introspection); - LOG_TRACE(trace_log, "List of all grants: {}", final_access->toString()); + LOG_TRACE(trace_log, "List of all grants: {}", access->toString()); } - - access = final_access; } @@ -361,9 +392,7 @@ void ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &... std::lock_guard lock{mutex}; if (!user) - { show_error("User has been dropped", ErrorCodes::UNKNOWN_USER); - } if (grant_option && access->isGranted(flags, args...)) { @@ -381,7 +410,7 @@ void ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &... { Params changed_params = params; changed_params.readonly = 0; - access_without_readonly = manager->getContextAccess(changed_params); + access_without_readonly = applyParamsToAccessRights(*access_from_user_and_roles, changed_params); } if (access_without_readonly->isGranted(flags, args...)) @@ -402,7 +431,7 @@ void ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &... { Params changed_params = params; changed_params.allow_ddl = true; - access_with_allow_ddl = manager->getContextAccess(changed_params); + access_with_allow_ddl = applyParamsToAccessRights(*access_from_user_and_roles, changed_params); } if (access_with_allow_ddl->isGranted(flags, args...)) @@ -417,7 +446,7 @@ void ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &... { Params changed_params = params; changed_params.allow_introspection = true; - access_with_allow_introspection = manager->getContextAccess(changed_params); + access_with_allow_introspection = applyParamsToAccessRights(*access_from_user_and_roles, changed_params); } if (access_with_allow_introspection->isGranted(flags, args...)) @@ -483,25 +512,65 @@ void ContextAccess::checkGrantOption(const AccessRightsElement & element) const void ContextAccess::checkGrantOption(const AccessRightsElements & elements) const { checkAccessImpl(elements); } -void ContextAccess::checkAdminOption(const UUID & role_id) const +template +void ContextAccess::checkAdminOptionImpl(const Container & role_ids, const GetNameFunction & get_name_function) const { if (isGranted(AccessType::ROLE_ADMIN)) return; auto info = getRolesInfo(); - if (info && info->enabled_roles_with_admin_option.count(role_id)) + if (!info) + { + if (!user) + throw Exception(user_name + ": User has been dropped", ErrorCodes::UNKNOWN_USER); return; + } - if (!user) - throw Exception(user_name + ": User has been dropped", ErrorCodes::UNKNOWN_USER); + size_t i = 0; + for (auto it = std::begin(role_ids); it != std::end(role_ids); ++it, ++i) + { + const UUID & role_id = *it; + if (info->enabled_roles_with_admin_option.count(role_id)) + continue; - std::optional role_name = manager->readName(role_id); - if (!role_name) - role_name = "ID {" + toString(role_id) + "}"; - throw Exception( - getUserName() + ": Not enough privileges. To execute this query it's necessary to have the grant " + backQuoteIfNeed(*role_name) - + " WITH ADMIN OPTION ", - ErrorCodes::ACCESS_DENIED); + auto role_name = get_name_function(role_id, i); + if (!role_name) + role_name = "ID {" + toString(role_id) + "}"; + String msg = "To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option"; + if (info->enabled_roles.count(role_id)) + msg = "Role " + backQuote(*role_name) + " is granted, but without ADMIN option. " + msg; + throw Exception(getUserName() + ": Not enough privileges. " + msg, ErrorCodes::ACCESS_DENIED); + } +} + +void ContextAccess::checkAdminOption(const UUID & role_id) const +{ + checkAdminOptionImpl(to_array(role_id), [this](const UUID & id, size_t) { return manager->tryReadName(id); }); +} + +void ContextAccess::checkAdminOption(const UUID & role_id, const String & role_name) const +{ + checkAdminOptionImpl(to_array(role_id), [&role_name](const UUID &, size_t) { return std::optional{role_name}; }); +} + +void ContextAccess::checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const +{ + checkAdminOptionImpl(to_array(role_id), [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); +} + +void ContextAccess::checkAdminOption(const std::vector & role_ids) const +{ + checkAdminOptionImpl(role_ids, [this](const UUID & id, size_t) { return manager->tryReadName(id); }); +} + +void ContextAccess::checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const +{ + checkAdminOptionImpl(role_ids, [&names_of_roles](const UUID &, size_t i) { return std::optional{names_of_roles[i]}; }); +} + +void ContextAccess::checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const +{ + checkAdminOptionImpl(role_ids, [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); } } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 997ea585c68..9a5758b79a6 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -135,6 +135,11 @@ public: /// Checks if a specified role is granted with admin option, and throws an exception if not. void checkAdminOption(const UUID & role_id) const; + void checkAdminOption(const UUID & role_id, const String & role_name) const; + void checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const; + void checkAdminOption(const std::vector & role_ids) const; + void checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const; + void checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; /// Makes an instance of ContextAccess which provides full access to everything /// without any limitations. This is used for the global context. @@ -148,7 +153,7 @@ private: void setUser(const UserPtr & user_) const; void setRolesInfo(const std::shared_ptr & roles_info_) const; void setSettingsAndConstraints() const; - void setFinalAccess() const; + void calculateAccessRights() const; template bool isGrantedImpl(const AccessFlags & flags) const; @@ -180,6 +185,9 @@ private: template void checkAccessImpl2(const AccessFlags & flags, const Args &... args) const; + template + void checkAdminOptionImpl(const Container & role_ids, const GetNameFunction & get_name_function) const; + const AccessControlManager * manager = nullptr; const Params params; mutable Poco::Logger * trace_log = nullptr; @@ -193,9 +201,10 @@ private: mutable std::shared_ptr enabled_row_policies; mutable std::shared_ptr enabled_quota; mutable std::shared_ptr enabled_settings; - mutable std::shared_ptr access_without_readonly; - mutable std::shared_ptr access_with_allow_ddl; - mutable std::shared_ptr access_with_allow_introspection; + mutable std::shared_ptr access_without_readonly; + mutable std::shared_ptr access_with_allow_ddl; + mutable std::shared_ptr access_with_allow_introspection; + mutable std::shared_ptr access_from_user_and_roles; mutable std::mutex mutex; }; diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index a0468958d42..3dca4b7719e 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -43,7 +43,7 @@ namespace roles_info.enabled_roles_with_admin_option.emplace(role_id); roles_info.names_of_roles[role_id] = role->getName(); - roles_info.access.merge(role->access); + roles_info.access.makeUnion(role->access); roles_info.settings_from_enabled_roles.merge(role->settings); for (const auto & granted_role : role->granted_roles.roles) diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index b7c62197059..2f468507eb6 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -9,58 +9,197 @@ #include #include #include +#include namespace DB { namespace { - template - void updateFromQueryImpl(T & grantee, const ASTGrantQuery & query, const std::vector & roles_from_query) + using Kind = ASTGrantQuery::Kind; + + void doGrantAccess( + AccessRights & current_access, + const AccessRightsElements & access_to_grant, + bool with_grant_option) + { + if (with_grant_option) + current_access.grantWithGrantOption(access_to_grant); + else + current_access.grant(access_to_grant); + } + + + AccessRightsElements getFilteredAccessRightsElementsToRevoke( + const AccessRights & current_access, const AccessRightsElements & access_to_revoke, bool grant_option) + { + AccessRights intersection; + if (grant_option) + intersection.grantWithGrantOption(access_to_revoke); + else + intersection.grant(access_to_revoke); + intersection.makeIntersection(current_access); + + AccessRightsElements res; + for (auto & element : intersection.getElements()) + { + if ((element.kind == Kind::GRANT) && (element.grant_option || !grant_option)) + res.emplace_back(std::move(element)); + } + + return res; + } + + void doRevokeAccess( + AccessRights & current_access, + const AccessRightsElements & access_to_revoke, + bool grant_option, + const std::shared_ptr & context) + { + if (context && !context->hasGrantOption(access_to_revoke)) + context->checkGrantOption(getFilteredAccessRightsElementsToRevoke(current_access, access_to_revoke, grant_option)); + + if (grant_option) + current_access.revokeGrantOption(access_to_revoke); + else + current_access.revoke(access_to_revoke); + } + + + void doGrantRoles(GrantedRoles & granted_roles, + const RolesOrUsersSet & roles_to_grant, + bool with_admin_option) + { + auto ids = roles_to_grant.getMatchingIDs(); + + if (with_admin_option) + granted_roles.grantWithAdminOption(ids); + else + granted_roles.grant(ids); + } + + + std::vector + getFilteredListOfRolesToRevoke(const GrantedRoles & granted_roles, const RolesOrUsersSet & roles_to_revoke, bool admin_option) + { + std::vector ids; + if (roles_to_revoke.all) + { + boost::range::set_difference( + admin_option ? granted_roles.roles_with_admin_option : granted_roles.roles, + roles_to_revoke.except_ids, + std::back_inserter(ids)); + } + else + { + boost::range::set_intersection( + admin_option ? granted_roles.roles_with_admin_option : granted_roles.roles, + roles_to_revoke.getMatchingIDs(), + std::back_inserter(ids)); + } + return ids; + } + + void doRevokeRoles(GrantedRoles & granted_roles, + RolesOrUsersSet * default_roles, + const RolesOrUsersSet & roles_to_revoke, + bool admin_option, + const std::unordered_map & names_of_roles, + const std::shared_ptr & context) + { + auto ids = getFilteredListOfRolesToRevoke(granted_roles, roles_to_revoke, admin_option); + + if (context) + context->checkAdminOption(ids, names_of_roles); + + if (admin_option) + granted_roles.revokeAdminOption(ids); + else + { + granted_roles.revoke(ids); + if (default_roles) + { + for (const UUID & id : ids) + default_roles->ids.erase(id); + for (const UUID & id : ids) + default_roles->except_ids.erase(id); + } + } + } + + + template + void collectRoleNamesTemplate( + std::unordered_map & names_of_roles, + const T & grantee, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const AccessControlManager & access_control) + { + for (const auto & id : getFilteredListOfRolesToRevoke(grantee.granted_roles, roles_from_query, query.admin_option)) + { + auto name = access_control.tryReadName(id); + if (name) + names_of_roles.emplace(id, std::move(*name)); + } + } + + void collectRoleNames( + std::unordered_map & names_of_roles, + const IAccessEntity & grantee, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const AccessControlManager & access_control) + { + if (const auto * user = typeid_cast(&grantee)) + collectRoleNamesTemplate(names_of_roles, *user, query, roles_from_query, access_control); + else if (const auto * role = typeid_cast(&grantee)) + collectRoleNamesTemplate(names_of_roles, *role, query, roles_from_query, access_control); + } + + + template + void updateFromQueryTemplate( + T & grantee, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const std::unordered_map & names_of_roles, + const std::shared_ptr & context) { - using Kind = ASTGrantQuery::Kind; if (!query.access_rights_elements.empty()) { if (query.kind == Kind::GRANT) - { - if (query.grant_option) - grantee.access.grantWithGrantOption(query.access_rights_elements); - else - grantee.access.grant(query.access_rights_elements); - } + doGrantAccess(grantee.access, query.access_rights_elements, query.grant_option); else - { - if (query.grant_option) - grantee.access.revokeGrantOption(query.access_rights_elements); - else - grantee.access.revoke(query.access_rights_elements); - } + doRevokeAccess(grantee.access, query.access_rights_elements, query.grant_option, context); } if (!roles_from_query.empty()) { if (query.kind == Kind::GRANT) - { - if (query.admin_option) - grantee.granted_roles.grantWithAdminOption(roles_from_query); - else - grantee.granted_roles.grant(roles_from_query); - } + doGrantRoles(grantee.granted_roles, roles_from_query, query.admin_option); else { - if (query.admin_option) - grantee.granted_roles.revokeAdminOption(roles_from_query); - else - grantee.granted_roles.revoke(roles_from_query); - + RolesOrUsersSet * grantee_default_roles = nullptr; if constexpr (std::is_same_v) - { - for (const UUID & role_from_query : roles_from_query) - grantee.default_roles.ids.erase(role_from_query); - } + grantee_default_roles = &grantee.default_roles; + doRevokeRoles(grantee.granted_roles, grantee_default_roles, roles_from_query, query.admin_option, names_of_roles, context); } } } + + void updateFromQueryImpl( + IAccessEntity & grantee, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const std::unordered_map & names_or_roles, + const std::shared_ptr & context) + { + if (auto * user = typeid_cast(&grantee)) + updateFromQueryTemplate(*user, query, roles_from_query, names_or_roles, context); + else if (auto * role = typeid_cast(&grantee)) + updateFromQueryTemplate(*role, query, roles_from_query, names_or_roles, context); + } } @@ -68,40 +207,45 @@ BlockIO InterpreterGrantQuery::execute() { auto & query = query_ptr->as(); query.replaceCurrentUserTagWithName(context.getUserName()); - auto access = context.getAccess(); - auto & access_control = context.getAccessControlManager(); - - std::vector roles_from_query; - if (query.roles) - { - roles_from_query = RolesOrUsersSet{*query.roles, access_control}.getMatchingIDs(access_control); - for (const UUID & role_from_query : roles_from_query) - access->checkAdminOption(role_from_query); - } if (!query.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, context, query.access_rights_elements, true); + auto access = context.getAccess(); + auto & access_control = context.getAccessControlManager(); query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); - access->checkGrantOption(query.access_rights_elements); + + RolesOrUsersSet roles_from_query; + if (query.roles) + roles_from_query = RolesOrUsersSet{*query.roles, access_control}; std::vector to_roles = RolesOrUsersSet{*query.to_roles, access_control, context.getUserID()}.getMatchingIDs(access_control); + std::unordered_map names_of_roles; + if (!roles_from_query.empty() && (query.kind == Kind::REVOKE)) + { + for (const auto & id : to_roles) + { + auto entity = access_control.tryRead(id); + if (entity) + collectRoleNames(names_of_roles, *entity, query, roles_from_query, access_control); + } + } + + if (query.kind == Kind::GRANT) /// For Kind::REVOKE the grant/admin option is checked inside updateFromQueryImpl(). + { + if (!query.access_rights_elements.empty()) + access->checkGrantOption(query.access_rights_elements); + + if (!roles_from_query.empty()) + access->checkAdminOption(roles_from_query.getMatchingIDs()); + } + auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto clone = entity->clone(); - if (auto user = typeid_cast>(clone)) - { - updateFromQueryImpl(*user, query, roles_from_query); - return user; - } - else if (auto role = typeid_cast>(clone)) - { - updateFromQueryImpl(*role, query, roles_from_query); - return role; - } - else - return entity; + updateFromQueryImpl(*clone, query, roles_from_query, names_of_roles, access); + return clone; }; access_control.update(to_roles, update_func); @@ -112,19 +256,19 @@ BlockIO InterpreterGrantQuery::execute() void InterpreterGrantQuery::updateUserFromQuery(User & user, const ASTGrantQuery & query) { - std::vector roles_from_query; + RolesOrUsersSet roles_from_query; if (query.roles) - roles_from_query = RolesOrUsersSet{*query.roles}.getMatchingIDs(); - updateFromQueryImpl(user, query, roles_from_query); + roles_from_query = RolesOrUsersSet{*query.roles}; + updateFromQueryImpl(user, query, roles_from_query, {}, nullptr); } void InterpreterGrantQuery::updateRoleFromQuery(Role & role, const ASTGrantQuery & query) { - std::vector roles_from_query; + RolesOrUsersSet roles_from_query; if (query.roles) - roles_from_query = RolesOrUsersSet{*query.roles}.getMatchingIDs(); - updateFromQueryImpl(role, query, roles_from_query); + roles_from_query = RolesOrUsersSet{*query.roles}; + updateFromQueryImpl(role, query, roles_from_query, {}, nullptr); } } diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index 62efd5314ac..6e42b165b21 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -19,6 +19,8 @@ namespace ErrorCodes namespace { + using Kind = ASTGrantQuery::Kind; + bool parseAccessFlags(IParser::Pos & pos, Expected & expected, AccessFlags & access_flags) { static constexpr auto is_one_of_access_type_words = [](IParser::Pos & pos_) @@ -154,13 +156,16 @@ namespace } - bool parseRoles(IParser::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) + bool parseRoles(IParser::Pos & pos, Expected & expected, Kind kind, bool id_mode, std::shared_ptr & roles) { return IParserBase::wrapParseImpl(pos, [&] { - ASTPtr ast; ParserRolesOrUsersSet roles_p; roles_p.allowRoleNames().useIDMode(id_mode); + if (kind == Kind::REVOKE) + roles_p.allowAll(); + + ASTPtr ast; if (!roles_p.parse(pos, ast, expected)) return false; @@ -174,7 +179,6 @@ namespace { return IParserBase::wrapParseImpl(pos, [&] { - using Kind = ASTGrantQuery::Kind; if (kind == Kind::GRANT) { if (!ParserKeyword{"TO"}.ignore(pos, expected)) @@ -217,7 +221,6 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) attach = true; } - using Kind = ASTGrantQuery::Kind; Kind kind; if (ParserKeyword{"GRANT"}.ignore(pos, expected)) kind = Kind::GRANT; @@ -242,7 +245,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) AccessRightsElements elements; std::shared_ptr roles; - if (!parseAccessRightsElements(pos, expected, elements) && !parseRoles(pos, expected, attach, roles)) + if (!parseAccessRightsElements(pos, expected, elements) && !parseRoles(pos, expected, kind, attach, roles)) return false; if (cluster.empty()) diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index e1bc99ca75b..392a4ef98ee 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -64,6 +64,56 @@ def test_grant_option(): instance.query('REVOKE SELECT ON test.table FROM A, B') +def test_revoke_requires_grant_option(): + instance.query("CREATE USER A") + instance.query("CREATE USER B") + + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + + expected_error = "Not enough privileges" + assert expected_error in instance.query_and_get_error("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + + instance.query("GRANT SELECT ON test.table TO A") + expected_error = "privileges have been granted, but without grant option" + assert expected_error in instance.query_and_get_error("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + + instance.query("GRANT SELECT ON test.table TO A WITH GRANT OPTION") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE SELECT ON test.* FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE ALL ON test.* FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE ALL ON *.* FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("REVOKE GRANT OPTION FOR ALL ON *.* FROM A") + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + expected_error = "privileges have been granted, but without grant option" + assert expected_error in instance.query_and_get_error("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + + instance.query("GRANT SELECT ON test.* TO A WITH GRANT OPTION") + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT SELECT ON test.table TO B\n" + instance.query("REVOKE SELECT ON test.table FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + def test_introspection(): instance.query("CREATE USER A") instance.query("CREATE USER B") diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 5fb521fc1ff..ce6e4e53512 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -97,6 +97,48 @@ def test_admin_option(): assert instance.query("SELECT * FROM test_table", user='B') == "1\t5\n2\t10\n" +def test_revoke_requires_admin_option(): + instance.query("CREATE USER A, B") + instance.query("CREATE ROLE R1, R2") + + instance.query("GRANT R1 TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1 TO B\n" + + expected_error = "necessary to have the role R1 granted" + assert expected_error in instance.query_and_get_error("REVOKE R1 FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1 TO B\n" + + instance.query("GRANT R1 TO A") + expected_error = "granted, but without ADMIN option" + assert expected_error in instance.query_and_get_error("REVOKE R1 FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1 TO B\n" + + instance.query("GRANT R1 TO A WITH ADMIN OPTION") + instance.query("REVOKE R1 FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT R1 TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1 TO B\n" + instance.query("REVOKE ALL FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT R1, R2 TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1, R2 TO B\n" + expected_error = "necessary to have the role R2 granted" + assert expected_error in instance.query_and_get_error("REVOKE ALL FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1, R2 TO B\n" + instance.query("REVOKE ALL EXCEPT R2 FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "GRANT R2 TO B\n" + instance.query("GRANT R2 TO A WITH ADMIN OPTION") + instance.query("REVOKE ALL FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + instance.query("GRANT R1, R2 TO B") + assert instance.query("SHOW GRANTS FOR B") == "GRANT R1, R2 TO B\n" + instance.query("REVOKE ALL FROM B", user='A') + assert instance.query("SHOW GRANTS FOR B") == "" + + def test_introspection(): instance.query("CREATE USER A") instance.query("CREATE USER B") From 6527e04ec3f946d6d83e3c8b8c7a5fca32c243b4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Jul 2020 15:35:51 +0300 Subject: [PATCH 167/330] Fix limiting the number of threads for VIEW. --- src/Processors/QueryPipeline.h | 7 +++++++ src/Processors/QueryPlan/QueryPlan.cpp | 4 ++-- src/Processors/QueryPlan/ReadFromStorageStep.cpp | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h index 7dddb2526e5..adab155d201 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/Processors/QueryPipeline.h @@ -167,6 +167,13 @@ public: /// Set upper limit for the recommend number of threads void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } + /// Update upper limit for the recommend number of threads + void limitMaxThreads(size_t max_threads_) + { + if (max_threads == 0 || max_threads_ < max_threads) + max_threads = max_threads_; + } + /// Convert query pipeline to single or several pipes. Pipe getPipe() &&; Pipes getPipes() &&; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 25983c25c7e..cd8c442a3db 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -153,8 +153,8 @@ QueryPipelinePtr QueryPlan::buildQueryPipeline() bool limit_max_threads = frame.pipelines.empty(); last_pipeline = frame.node->step->updatePipeline(std::move(frame.pipelines)); - if (limit_max_threads) - last_pipeline->setMaxThreads(max_threads); + if (limit_max_threads && max_threads) + last_pipeline->limitMaxThreads(max_threads); stack.pop(); } diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.cpp b/src/Processors/QueryPlan/ReadFromStorageStep.cpp index e0781c24f7f..7e8d44abed8 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp @@ -113,7 +113,7 @@ ReadFromStorageStep::ReadFromStorageStep( } } - if (pipes.size() == 1) + if (pipes.size() == 1 && !storage->isView()) pipeline->setMaxThreads(1); for (auto & pipe : pipes) From 4d01fb3cbce2971f19a66ac97e85092523487125 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 2 Jul 2020 15:54:32 +0300 Subject: [PATCH 168/330] Update tips.md --- docs/en/operations/tips.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index b18daedf3d6..c42108ee40e 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -120,6 +120,7 @@ zoo.cfg: tickTime=2000 # The number of ticks that the initial # synchronization phase can take +# This value is not quite motivated initLimit=30000 # The number of ticks that can pass between # sending a request and getting an acknowledgement @@ -127,6 +128,9 @@ syncLimit=10 maxClientCnxns=2000 +# It is the maximum value that client may request and the server will accept. +# It is Ok to have high maxSessionTimeout on server to allow clients to work with high session timeout if they want. +# But we request session timeout of 30 seconds by default (you can change it with session_timeout_ms in ClickHouse config). maxSessionTimeout=60000000 # the directory where the snapshot is stored. dataDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '}}' }}/data From aca1d8ac775a450b3d6365981f21646b5173542e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Jul 2020 16:06:10 +0300 Subject: [PATCH 169/330] Added test. --- .../queries/0_stateless/01356_view_threads.reference | 3 +++ tests/queries/0_stateless/01356_view_threads.sql | 12 ++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/01356_view_threads.reference create mode 100644 tests/queries/0_stateless/01356_view_threads.sql diff --git a/tests/queries/0_stateless/01356_view_threads.reference b/tests/queries/0_stateless/01356_view_threads.reference new file mode 100644 index 00000000000..4e9079198d5 --- /dev/null +++ b/tests/queries/0_stateless/01356_view_threads.reference @@ -0,0 +1,3 @@ +0 249999500000 +1 250000000000 +1 diff --git a/tests/queries/0_stateless/01356_view_threads.sql b/tests/queries/0_stateless/01356_view_threads.sql new file mode 100644 index 00000000000..5290ec555af --- /dev/null +++ b/tests/queries/0_stateless/01356_view_threads.sql @@ -0,0 +1,12 @@ +drop table if exists table_01356_view_threads; + +create view table_01356_view_threads as select number % 10 as g, sum(number) as s from numbers_mt(1000000) group by g; + +set log_queries = 1; +set max_threads = 16; +select g % 2 as gg, sum(s) from table_01356_view_threads group by gg order by gg; + +system flush logs; +select length(thread_ids) >= 16 from system.query_log where event_date >= today() - 1 and lower(query) like '%select g % 2 as gg, sum(s) from table_01356_view_threads group by gg order by gg%' and type = 'QueryFinish' order by query_start_time desc limit 1; + +drop table if exists table_01356_view_threads; From 923c7e7c2dff9adf1181316636579aad6a8d39c6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Jul 2020 16:19:25 +0300 Subject: [PATCH 170/330] Remove pvs studio from images list --- docker/images.json | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker/images.json b/docker/images.json index 0df7e53a2db..23f8cc0d9fd 100644 --- a/docker/images.json +++ b/docker/images.json @@ -15,10 +15,6 @@ "docker/test/pvs" ] }, - "docker/test/pvs": { - "name": "yandex/clickhouse-pvs-test", - "dependent": [] - }, "docker/test/coverage": { "name": "yandex/clickhouse-coverage", "dependent": [] From 96df2e6b7131ed27a4257d3119e88548cf2176cb Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Jul 2020 17:35:10 +0300 Subject: [PATCH 171/330] Better shutdown and conversion --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 8 +---- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 33 ++++++++++++------- .../integration/test_storage_rabbitmq/test.py | 2 +- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index a1442f91fbe..e10a4eb0f96 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include #include @@ -152,11 +150,7 @@ Block RabbitMQBlockInputStream::readImpl() result_block.insert(column); } - return ConvertingBlockInputStream( - std::make_shared(result_block), - getHeader(), - ConvertingBlockInputStream::MatchColumnsMode::Name) - .read(); + return result_block; } } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 7426e939bec..00da53e7909 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -153,11 +154,14 @@ Pipes StorageRabbitMQ::read( Pipes pipes; pipes.reserve(num_created_consumers); + auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); for (size_t i = 0; i < num_created_consumers; ++i) { - pipes.emplace_back( - std::make_shared(std::make_shared( - *this, metadata_snapshot, context, column_names, log))); + auto rabbit_stream = std::make_shared( + *this, metadata_snapshot, context, column_names, log); + auto converting_stream = std::make_shared( + rabbit_stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Name); + pipes.emplace_back(std::make_shared(converting_stream)); } if (!loop_started) @@ -202,17 +206,17 @@ void StorageRabbitMQ::shutdown() { stream_cancelled = true; + event_handler->stop(); + + looping_task->deactivate(); + streaming_task->deactivate(); + heartbeat_task->deactivate(); + for (size_t i = 0; i < num_created_consumers; ++i) { popReadBuffer(); } - streaming_task->deactivate(); - heartbeat_task->deactivate(); - - event_handler->stop(); - looping_task->deactivate(); - connection->close(); } @@ -355,17 +359,22 @@ bool StorageRabbitMQ::streamToViews() BlockInputStreams streams; streams.reserve(num_created_consumers); + auto metadata_snapshot = getInMemoryMetadataPtr(); + auto column_names = block_io.out->getHeader().getNames(); + auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); for (size_t i = 0; i < num_created_consumers; ++i) { - auto stream = std::make_shared(*this, getInMemoryMetadataPtr(), rabbitmq_context, block_io.out->getHeader().getNames(), log); - streams.emplace_back(stream); + auto rabbit_stream = std::make_shared(*this, metadata_snapshot, rabbitmq_context, column_names, log); + auto converting_stream = std::make_shared(rabbit_stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Name); + + streams.emplace_back(converting_stream); // Limit read batch to maximum block size to allow DDL IBlockInputStream::LocalLimits limits; const Settings & settings = global_context.getSettingsRef(); limits.speed_limits.max_execution_time = settings.stream_flush_interval_ms; limits.timeout_overflow_mode = OverflowMode::BREAK; - stream->setLimits(limits); + rabbit_stream->setLimits(limits); } if (!loop_started) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 42b7101f9c6..6da7239fc94 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -816,7 +816,7 @@ def test_rabbitmq_insert(rabbitmq_cluster): channel.stop_consuming() consumer.basic_qos(prefetch_count=50) - consumer.basic_consume(onReceived, queue_name) + consumer.basic_consume(queue_name, onReceived) consumer.start_consuming() consumer_connection.close() From ab54a96cc5154aefd6ad10485972bf63854faa72 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Jul 2020 17:38:09 +0300 Subject: [PATCH 172/330] Reverse arguments --- tests/integration/test_storage_rabbitmq/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 6da7239fc94..42b7101f9c6 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -816,7 +816,7 @@ def test_rabbitmq_insert(rabbitmq_cluster): channel.stop_consuming() consumer.basic_qos(prefetch_count=50) - consumer.basic_consume(queue_name, onReceived) + consumer.basic_consume(onReceived, queue_name) consumer.start_consuming() consumer_connection.close() From 57d727d0784fad8d4cddd4d1c734a5626c3fbb65 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Jul 2020 17:51:10 +0300 Subject: [PATCH 173/330] Fix result_rows and result_bytes metrics for selects. --- src/DataStreams/BlockIO.h | 12 +++++++++--- src/Interpreters/executeQuery.cpp | 11 ++++++++++- src/Processors/Formats/IOutputFormat.cpp | 2 ++ src/Processors/Formats/IOutputFormat.h | 8 ++++++++ src/Processors/QueryPipeline.h | 2 ++ 5 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/DataStreams/BlockIO.h b/src/DataStreams/BlockIO.h index d4733e6aebe..91d7efac8d1 100644 --- a/src/DataStreams/BlockIO.h +++ b/src/DataStreams/BlockIO.h @@ -31,8 +31,8 @@ struct BlockIO QueryPipeline pipeline; /// Callbacks for query logging could be set here. - std::function finish_callback; - std::function exception_callback; + std::function finish_callback; + std::function exception_callback; /// When it is true, don't bother sending any non-empty blocks to the out stream bool null_format = false; @@ -41,7 +41,13 @@ struct BlockIO void onFinish() { if (finish_callback) - finish_callback(in.get(), out.get()); + { + QueryPipeline * pipeline_ptr = nullptr; + if (pipeline.initialized()) + pipeline_ptr = &pipeline; + + finish_callback(in.get(), out.get(), pipeline_ptr); + } } void onException() diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 41fa60324ec..860c56b1052 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -479,7 +479,8 @@ static std::tuple executeQueryImpl( } /// Also make possible for caller to log successful query finish and exception during execution. - auto finish_callback = [elem, &context, log_queries, log_queries_min_type = settings.log_queries_min_type] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable + auto finish_callback = [elem, &context, log_queries, log_queries_min_type = settings.log_queries_min_type] + (IBlockInputStream * stream_in, IBlockOutputStream * stream_out, QueryPipeline * query_pipeline) mutable { QueryStatus * process_list_elem = context.getProcessListElement(); @@ -528,6 +529,14 @@ static std::tuple executeQueryImpl( elem.result_bytes = counting_stream->getProgress().read_bytes; } } + else if (query_pipeline) + { + if (const auto * output_format = query_pipeline->getOutputFormat()) + { + elem.result_rows = output_format->getResultRows(); + elem.result_bytes = output_format->getResultBytes(); + } + } if (elem.read_rows != 0) { diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 334843036dc..f7fc6170cad 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -59,6 +59,8 @@ void IOutputFormat::work() switch (current_block_kind) { case Main: + result_rows += current_chunk.getNumRows(); + result_bytes += current_chunk.allocatedBytes(); consume(std::move(current_chunk)); break; case Totals: diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 2e3db50ee6e..5b8f664aa23 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -79,6 +79,14 @@ public: void setTotals(const Block & totals) { consumeTotals(Chunk(totals.getColumns(), totals.rows())); } void setExtremes(const Block & extremes) { consumeExtremes(Chunk(extremes.getColumns(), extremes.rows())); } + + size_t getResultRows() const { return result_rows; } + size_t getResultBytes() const { return result_rows; } + +private: + /// Counters for consumed chunks. Are used for QueryLog. + size_t result_rows = 0; + size_t result_bytes = 0; }; } diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h index 7dddb2526e5..9a33b549ab5 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/Processors/QueryPipeline.h @@ -110,6 +110,8 @@ public: void addCreatingSetsTransform(ProcessorPtr transform); /// Resize pipeline to single output and add IOutputFormat. Pipeline will be completed after this transformation. void setOutputFormat(ProcessorPtr output); + /// Get current OutputFormat. + IOutputFormat * getOutputFormat() const { return output_format; } /// Sink is a processor with single input port and no output ports. Creates sink for each output port. /// Pipeline will be completed after this transformation. void setSinks(const ProcessorGetterWithStreamKind & getter); From e7ab4df1485e69b47e4e548f56a9f35a46781a07 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Jul 2020 18:00:37 +0300 Subject: [PATCH 174/330] Fix result_rows and result_bytes metrics for selects. --- src/Processors/Formats/IOutputFormat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 5b8f664aa23..ae5e4d72d3c 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -81,7 +81,7 @@ public: void setExtremes(const Block & extremes) { consumeExtremes(Chunk(extremes.getColumns(), extremes.rows())); } size_t getResultRows() const { return result_rows; } - size_t getResultBytes() const { return result_rows; } + size_t getResultBytes() const { return result_bytes; } private: /// Counters for consumed chunks. Are used for QueryLog. From 8513e1ec7498ca5cb2dc162ebd642cadfc708c6e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 2 Jul 2020 18:17:50 +0300 Subject: [PATCH 175/330] improve breadcrumbs markup --- website/templates/docs/content.html | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/website/templates/docs/content.html b/website/templates/docs/content.html index 711ab0bd3b8..29db92e377a 100644 --- a/website/templates/docs/content.html +++ b/website/templates/docs/content.html @@ -7,21 +7,19 @@ {% endif %} {% if ancestors %} {% set ancestor_ns = namespace(level=ancestors|length) %} -
- -
+ {% endif %} {% include "templates/docs/machine-translated.html" %} From 3f51419889f869750838d8417031b45a3497d565 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Jul 2020 18:26:44 +0300 Subject: [PATCH 176/330] Fix tests. --- src/DataTypes/DataTypeNullable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 1766b399c2a..22d403da6c4 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -90,7 +90,7 @@ public: bool canBeComparedWithCollation() const override { return nested_data_type->canBeComparedWithCollation(); } bool canBeUsedAsVersion() const override { return false; } bool isSummable() const override { return nested_data_type->isSummable(); } - bool canBeUsedInBooleanContext() const override { return nested_data_type->canBeUsedInBooleanContext(); } + bool canBeUsedInBooleanContext() const override { return nested_data_type->canBeUsedInBooleanContext() || onlyNull(); } bool haveMaximumSizeOfValue() const override { return nested_data_type->haveMaximumSizeOfValue(); } size_t getMaximumSizeOfValueInMemory() const override { return 1 + nested_data_type->getMaximumSizeOfValueInMemory(); } bool isNullable() const override { return true; } From a5cbeda1e36d2976ce5da074b9ce8393eb0565a3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 2 Jul 2020 19:13:42 +0300 Subject: [PATCH 177/330] fix segfault with -StateResample combinators --- src/Interpreters/Aggregator.cpp | 10 ++++++++-- .../0_stateless/01356_state_resample.reference | 8 ++++++++ tests/queries/0_stateless/01356_state_resample.sql | 14 ++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01356_state_resample.reference create mode 100644 tests/queries/0_stateless/01356_state_resample.sql diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 5f7414b774b..1b8439fc704 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -1180,10 +1181,15 @@ Block Aggregator::prepareBlockAndFill( if (aggregate_functions[i]->isState()) { /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states. - ColumnAggregateFunction & column_aggregate_func = assert_cast(*final_aggregate_columns[i]); + ColumnAggregateFunction * column_aggregate_func = nullptr; + /// Aggregate state can be wrapped into array if aggregate function ends with -Resample combinator. + if (auto * column_array = typeid_cast(final_aggregate_columns[i].get())) + column_aggregate_func = &assert_cast(column_array->getData()); + else + column_aggregate_func = &assert_cast(*final_aggregate_columns[i]); for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func.addArena(pool); + column_aggregate_func->addArena(pool); } } } diff --git a/tests/queries/0_stateless/01356_state_resample.reference b/tests/queries/0_stateless/01356_state_resample.reference new file mode 100644 index 00000000000..40c606b4a68 --- /dev/null +++ b/tests/queries/0_stateless/01356_state_resample.reference @@ -0,0 +1,8 @@ +[900,910,920,930,940,950,960,970,980,990,1000,1010,1020,1030,1040,1050,1060,1070,1080,1090] +[900,910,920,930,940,950,960,970,980,990,1000,1010,1020,1030,1040,1050,1060,1070,1080,1090] +[360,243,306,372,252,315,384,261,324,396,270,333,408,279,342,420,288,351,432,297] +[300,364,246,309,376,255,318,388,264,327,400,273,336,412,282,345,424,291,354,436] +[240,303,368,249,312,380,258,321,392,267,330,404,276,339,416,285,348,428,294,357] +[[0,20,40],[1,21,41],[2,22,42],[3,23,43],[4,24,44],[5,25,45],[6,26,46],[7,27,47],[8,28,48],[9,29,49],[10,30],[11,31],[12,32],[13,33],[14,34],[15,35],[16,36],[17,37],[18,38],[19,39]] +[[0,20,40],[1,21,41],[2,22,42],[3,23,43],[4,24,44],[5,25,45],[6,26,46],[7,27,47],[8,28,48],[9,29,49],[10,30],[11,31],[12,32],[13,33],[14,34],[15,35],[16,36],[17,37],[18,38],[19,39]] +[1800,1820,1840,1860,1880,1900,1920,1940,1960,1980,2000,2020,2040,2060,2080,2100,2120,2140,2160,2180] diff --git a/tests/queries/0_stateless/01356_state_resample.sql b/tests/queries/0_stateless/01356_state_resample.sql new file mode 100644 index 00000000000..6be28e19d87 --- /dev/null +++ b/tests/queries/0_stateless/01356_state_resample.sql @@ -0,0 +1,14 @@ +select sumResample(0, 20, 1)(number, number % 20) from numbers(200); +select arrayMap(x -> finalizeAggregation(x), state) from (select sumStateResample(0, 20, 1)(number, number % 20) as state from numbers(200)); +select arrayMap(x -> finalizeAggregation(x), state) from +( + select sumStateResample(0,20,1)(number, number%20) as state from numbers(200) group by number % 3 +); + +select groupArrayResample(0, 20, 1)(number, number % 20) from numbers(50); +select arrayMap(x -> finalizeAggregation(x), state) from (select groupArrayStateResample(0, 20, 1)(number, number % 20) state from numbers(50)); + +select arrayMap(x -> finalizeAggregation(x), state) from +( + select sumStateResample(0, 20, 1)(number, number % 20) as state from remote('127.0.0.{1,2}', numbers(200)) +); From c6c7ee30d98081df345d6c082e2e0cb7da5b65a0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Jul 2020 19:44:04 +0300 Subject: [PATCH 178/330] Less race conditions --- .../RabbitMQ/RabbitMQBlockInputStream.h | 1 - src/Storages/RabbitMQ/RabbitMQHandler.cpp | 28 +++++++------------ src/Storages/RabbitMQ/RabbitMQHandler.h | 4 +-- .../ReadBufferFromRabbitMQConsumer.cpp | 14 +++++----- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 16 +++++------ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 10 +++---- src/Storages/RabbitMQ/StorageRabbitMQ.h | 20 ++++++------- .../WriteBufferToRabbitMQProducer.cpp | 22 +++++++-------- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 14 +++++----- 9 files changed, 60 insertions(+), 69 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h index d171893d3b3..7db80065608 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h @@ -26,7 +26,6 @@ public: void readPrefixImpl() override; Block readImpl() override; - ///void readSuffixImpl() override; private: StorageRabbitMQ & storage; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 0af4918762b..5d17ff23b64 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -20,35 +20,27 @@ RabbitMQHandler::RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_) : { } - void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) { LOG_ERROR(log, "Library error report: {}", message); if (!connection->usable() || !connection->ready()) - { throw Exception("Connection error", ErrorCodes::CANNOT_CONNECT_RABBITMQ); - } } - -void RabbitMQHandler::startBackgroundLoop() -{ - /// stop_loop variable is updated in a separate thread - while (!stop_loop.load()) - { - uv_run(loop, UV_RUN_NOWAIT); - } -} - - void RabbitMQHandler::startLoop() { - if (starting_loop.try_lock()) - { + std::lock_guard lock(startup_mutex); + /// stop_loop variable is updated in a separate thread + while (!stop_loop.load()) + uv_run(loop, UV_RUN_NOWAIT); +} + +void RabbitMQHandler::iterateLoop() +{ + std::unique_lock lock(startup_mutex, std::defer_lock); + if (lock.try_lock()) uv_run(loop, UV_RUN_NOWAIT); - starting_loop.unlock(); - } } } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 0ffcd028e1b..5893ace1d2f 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -19,15 +19,15 @@ public: void onError(AMQP::TcpConnection * connection, const char * message) override; void stop() { stop_loop.store(true); } - void startBackgroundLoop(); void startLoop(); + void iterateLoop(); private: uv_loop_t * loop; Poco::Logger * log; std::atomic stop_loop = false; - std::timed_mutex starting_loop; + std::mutex startup_mutex; }; } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 6ae2e6afeed..be42749300d 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -37,11 +37,11 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( HandlerPtr event_handler_, const String & exchange_name_, const Names & routing_keys_, - const size_t channel_id_, + size_t channel_id_, Poco::Logger * log_, char row_delimiter_, - const bool bind_by_id_, - const size_t num_queues_, + bool bind_by_id_, + size_t num_queues_, const String & exchange_type_, const String & local_exchange_, const std::atomic & stopped_) @@ -327,7 +327,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) */ while (!default_bindings_created && !default_bindings_error || (exchange_type_set && !bindings_created && !bindings_error)) { - startEventLoop(); + iterateEventLoop(); } } @@ -378,7 +378,7 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() /// These variables are updated in a separate thread. while (count_subscribed != wait_subscribed && !consumer_error) { - startEventLoop(); + iterateEventLoop(); } LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); @@ -395,9 +395,9 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() } -void ReadBufferFromRabbitMQConsumer::startEventLoop() +void ReadBufferFromRabbitMQConsumer::iterateEventLoop() { - event_handler->startLoop(); + event_handler->iterateLoop(); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index c9452fb249d..9dbb42bd648 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -27,11 +27,11 @@ public: HandlerPtr event_handler_, const String & exchange_name_, const Names & routing_keys_, - const size_t channel_id_, + size_t channel_id_, Poco::Logger * log_, char row_delimiter_, - const bool bind_by_id_, - const size_t num_queues_, + bool bind_by_id_, + size_t num_queues_, const String & exchange_type_, const String & local_exchange_, const std::atomic & stopped_); @@ -47,14 +47,14 @@ private: ChannelPtr consumer_channel; HandlerPtr event_handler; - const String & exchange_name; - const Names & routing_keys; + const String exchange_name; + const Names routing_keys; const size_t channel_id; const bool bind_by_id; const size_t num_queues; - const String & exchange_type; - const String & local_exchange; + const String exchange_type; + const String local_exchange; const String local_default_exchange; const String local_hash_exchange; @@ -81,7 +81,7 @@ private: void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startEventLoop(); + void iterateEventLoop(); }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 00da53e7909..5b3dec65f00 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -40,7 +40,7 @@ namespace DB static const auto CONNECT_SLEEP = 200; static const auto RETRIES_MAX = 1000; -static const auto RESCHEDULE_MS = 500; +static const auto HEARTBEAT_RESCHEDULE_MS = 3000; namespace ErrorCodes { @@ -90,7 +90,7 @@ StorageRabbitMQ::StorageRabbitMQ( size_t cnt_retries = 0; while (!connection->ready() && ++cnt_retries != RETRIES_MAX) { - uv_run(loop.get(), UV_RUN_NOWAIT); + event_handler->iterateLoop(); std::this_thread::sleep_for(std::chrono::milliseconds(CONNECT_SLEEP)); } @@ -125,9 +125,9 @@ void StorageRabbitMQ::heartbeatFunc() { if (!stream_cancelled) { - LOG_DEBUG(log, "Sending RabbitMQ heartbeat"); + LOG_TRACE(log, "Sending RabbitMQ heartbeat"); connection->heartbeat(); - heartbeat_task->scheduleAfter(RESCHEDULE_MS * 10); + heartbeat_task->scheduleAfter(HEARTBEAT_RESCHEDULE_MS); } } @@ -135,7 +135,7 @@ void StorageRabbitMQ::heartbeatFunc() void StorageRabbitMQ::loopingFunc() { LOG_DEBUG(log, "Starting event looping iterations"); - event_handler->startBackgroundLoop(); + event_handler->startLoop(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index c40211bed70..e1c8b33c91e 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -32,18 +32,18 @@ public: void shutdown() override; Pipes read( - const Names & column_names, - const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; BlockOutputStreamPtr write( - const ASTPtr & query, - const StorageMetadataPtr & metadata_snapshot, - const Context & context) override; + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const Context & context) override; void pushReadBuffer(ConsumerBufferPtr buf); ConsumerBufferPtr popReadBuffer(); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 2539728aab3..d96a1c02db8 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -26,13 +26,13 @@ static const auto LOOP_WAIT = 10; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( std::pair & parsed_address, Context & global_context, - std::pair & login_password_, + const std::pair & login_password_, const String & routing_key_, - const String exchange_, + const String & exchange_, Poco::Logger * log_, - const size_t num_queues_, - const bool bind_by_id_, - const bool use_transactional_channel_, + size_t num_queues_, + bool bind_by_id_, + bool use_transactional_channel_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_) @@ -63,7 +63,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( size_t cnt_retries = 0; while (!connection->ready() && ++cnt_retries != RETRIES_MAX) { - uv_run(loop.get(), UV_RUN_NOWAIT); + event_handler->iterateLoop(); std::this_thread::sleep_for(std::chrono::milliseconds(CONNECT_SLEEP)); } @@ -144,7 +144,7 @@ void WriteBufferToRabbitMQProducer::writingFunc() producer_channel->publish(exchange_name, routing_key, payload); } } - startEventLoop(); + iterateEventLoop(); } } @@ -168,7 +168,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() /// These variables are updated in a separate thread and starting the loop blocks current thread while (!exchange_declared && !exchange_error) { - startEventLoop(); + iterateEventLoop(); } } @@ -207,7 +207,7 @@ void WriteBufferToRabbitMQProducer::finilizeProducer() size_t count_retries = 0; while ((!answer_received || wait_rollback) && ++count_retries != RETRIES_MAX) { - startEventLoop(); + iterateEventLoop(); std::this_thread::sleep_for(std::chrono::milliseconds(LOOP_WAIT)); } } @@ -222,9 +222,9 @@ void WriteBufferToRabbitMQProducer::nextImpl() } -void WriteBufferToRabbitMQProducer::startEventLoop() +void WriteBufferToRabbitMQProducer::iterateEventLoop() { - event_handler->startLoop(); + event_handler->iterateLoop(); } } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 7d7f571ab3e..26a52b0b41c 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -22,13 +22,13 @@ public: WriteBufferToRabbitMQProducer( std::pair & parsed_address, Context & global_context, - std::pair & login_password_, + const std::pair & login_password_, const String & routing_key_, - const String exchange_, + const String & exchange_, Poco::Logger * log_, - const size_t num_queues_, - const bool bind_by_id_, - const bool use_transactional_channel_, + size_t num_queues_, + bool bind_by_id_, + bool use_transactional_channel_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_ @@ -42,11 +42,11 @@ public: private: void nextImpl() override; void checkExchange(); - void startEventLoop(); + void iterateEventLoop(); void writingFunc(); void finilizeProducer(); - std::pair & login_password; + const std::pair login_password; const String routing_key; const String exchange_name; const bool bind_by_id; From 17d49af183952aa86859bb7e67dd7e87b0d4a810 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 2 Jul 2020 20:12:36 +0300 Subject: [PATCH 179/330] fix test with buffer table --- tests/performance/polymorphic_parts_l.xml | 2 +- tests/performance/polymorphic_parts_m.xml | 2 +- tests/performance/polymorphic_parts_s.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml index ea20d08409a..bd3f9cd8e42 100644 --- a/tests/performance/polymorphic_parts_l.xml +++ b/tests/performance/polymorphic_parts_l.xml @@ -15,7 +15,7 @@
CREATE TABLE hits_buffer AS hits_10m_single - ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + ENGINE = Buffer(default, hits_wide, 16, 10, 100, 10000, 1000000, 10000000, 100000000) diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml index 2cf94d33ae8..f39de52e16e 100644 --- a/tests/performance/polymorphic_parts_m.xml +++ b/tests/performance/polymorphic_parts_m.xml @@ -15,7 +15,7 @@ CREATE TABLE hits_buffer AS hits_10m_single - ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + ENGINE = Buffer(default, hits_wide, 16, 10, 100, 10000, 1000000, 10000000, 100000000) diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml index fbb6903c3c0..c1806372930 100644 --- a/tests/performance/polymorphic_parts_s.xml +++ b/tests/performance/polymorphic_parts_s.xml @@ -15,7 +15,7 @@ CREATE TABLE hits_buffer AS hits_10m_single - ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + ENGINE = Buffer(default, hits_wide, 16, 10, 100, 10000, 1000000, 10000000, 100000000) From b81c66a03148afc02647df3c11aec40382329912 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 2 Jul 2020 22:16:21 +0300 Subject: [PATCH 180/330] [docker] install ca-certificates before the first apt-get update (#12095) * [docker] install ca-certificates before first apt-get update * Update Dockerfile --- docker/client/Dockerfile | 1 + docker/server/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index c4683d35e99..efa85c2a366 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -6,6 +6,7 @@ ARG version=20.6.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ + ca-certificates \ dirmngr \ gnupg \ && mkdir -p /etc/apt/sources.list.d \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 132a5d89959..001d09520ad 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -7,6 +7,7 @@ ARG gosu_ver=1.10 RUN apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ + ca-certificates \ dirmngr \ gnupg \ && mkdir -p /etc/apt/sources.list.d \ @@ -19,7 +20,6 @@ RUN apt-get update \ clickhouse-client=$version \ clickhouse-server=$version \ locales \ - ca-certificates \ wget \ && rm -rf \ /var/lib/apt/lists/* \ From 3435328465bd2a8b58876cf30aa34b43ad9fafb1 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Fri, 3 Jul 2020 00:26:55 +0300 Subject: [PATCH 181/330] DOCS-522: max_parser_depth (#12097) * asiana21-DOCSUP-925-max_parser_depth (#132) * docs(max_parser_depth): added the setting description * docs(max_parser_depth): some changes * Update docs/en/operations/settings/settings.md Co-authored-by: BayoNet * Update docs/en/operations/settings/settings.md Co-authored-by: BayoNet * Update docs/en/operations/settings/settings.md Co-authored-by: BayoNet * docs(max_parser_depth): added ru translation * docs(max_parser_depth): removed quotation marks Co-authored-by: asiana21 Co-authored-by: BayoNet * CLICKHOUSEDOCS-522: Fixed the link. Co-authored-by: AsiaKorushkina <43650329+AsiaKorushkina@users.noreply.github.com> Co-authored-by: asiana21 Co-authored-by: Sergei Shtykov --- docs/en/operations/settings/settings.md | 11 +++++++++++ docs/ru/development/architecture.md | 2 +- docs/ru/operations/settings/settings.md | 11 +++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e0dd5323dcd..7d1f9a72a21 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -727,6 +727,17 @@ The INSERT query also contains data for INSERT that is processed by a separate s Default value: 256 KiB. +## max\_parser\_depth {#max_parser_depth} + +Limits maximum recursion depth in the recursive descent parser. Allows to control stack size. + +Possible values: + +- Positive integer. +- 0 — Recursion depth is unlimited. + +Default value: 1000. + ## interactive\_delay {#interactive-delay} The interval in microseconds for checking whether request execution has been cancelled and sending the progress. diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index 811af7a75f3..a9ed231af08 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -116,7 +116,7 @@ ClickHouse - полноценная колоночная СУБД. Данные Обычный функции не изменяют число строк и работают так, как если бы обрабатывали каждую строку независимо. В действительности же, функции вызываются не к отдельным строкам, а блокам данных для реализации векторизованного выполнения запросов. -Некоторые функции, такие как [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), и [runningAccumulate](../sql-reference/functions/other-functions.md#function-runningaccumulate), эксплуатируют блочную обработку и нарушают независимость строк. +Некоторые функции, такие как [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), и [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulate), эксплуатируют блочную обработку и нарушают независимость строк. ClickHouse имеет сильную типизацию, поэтому нет никакого неявного преобразования типов. Если функция не поддерживает определенную комбинацию типов, она создает исключение. Но функции могут работать (перегружаться) для многих различных комбинаций типов. Например, функция `plus` (для реализации `+` оператор) работает для любой комбинации числовых типов: `UInt8` + `Float32`, `UInt16` + `Int8` и так далее. Кроме того, некоторые вариадические функции, такие как `concat`, могут принимать любое количество аргументов. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 8d11c252bb7..fd6bbf4121d 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -644,6 +644,17 @@ log_query_threads=1 Значение по умолчанию: 256 Кб. +## max\_parser\_depth {#max_parser_depth} + +Ограничивает максимальную глубину рекурсии в парсере рекурсивного спуска. Позволяет контролировать размер стека. + +Возможные значения: + +- Положительное целое число. +- 0 — Глубина рекурсии не ограничена. + +Значение по умолчанию: 1000. + ## interactive\_delay {#interactive-delay} Интервал в микросекундах для проверки, не запрошена ли остановка выполнения запроса, и отправки прогресса. From 0700a705bc186105d1d459ab80e4c2a1a368317b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 29 Jun 2020 23:07:45 +0300 Subject: [PATCH 182/330] Rewrite curl dependency in a more ch compatible way - add support of unbundled curl - add CURL::libcurl libraries - avoid explicit linkage of daemon with curl (added with sentry) - set CACHE variables for non-direct users: - mariadb-connector-c - aws-s3-cmake - sentry-native Cc: @alexey-milovidov Cc: @alesapin (requires docker image update) Cc: @abyss7 Refs: #11300 Refs: #8011 Refs: #8905 v2: replace cmake/find/curl.cmake with proper contrib/curl-cmake (as pointed by @abyss7, cmake/find/*.cmake is deprecated) --- base/daemon/CMakeLists.txt | 1 - cmake/find/sentry.cmake | 2 - contrib/CMakeLists.txt | 23 +- contrib/curl-cmake/CMakeLists.txt | 329 ++++++++++++---------- docker/test/integration/runner/Dockerfile | 1 + utils/build/build_debian_unbundled.sh | 2 +- 6 files changed, 190 insertions(+), 168 deletions(-) diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 04d2f059b39..26d59a57e7f 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -8,6 +8,5 @@ target_include_directories (daemon PUBLIC ..) target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) if (USE_SENTRY) - target_link_libraries (daemon PRIVATE curl) target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) endif () diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index eadf071141e..f202c9100a8 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -7,8 +7,6 @@ endif () if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT_UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) - set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) - set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) set (SENTRY_TRANSPORT "curl" CACHE STRING "") set (SENTRY_BACKEND "none" CACHE STRING "") set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f2222797bff..ba0bad90c5e 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -106,6 +106,12 @@ if (ENABLE_LDAP AND USE_INTERNAL_LDAP_LIBRARY) add_subdirectory (openldap-cmake) endif () +# Should go before: +# - mariadb-connector-c +# - aws-s3-cmake +# - sentry-native +add_subdirectory (curl-cmake) + function(mysql_support) set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC) set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC) @@ -263,23 +269,6 @@ if (USE_INTERNAL_GRPC_LIBRARY) add_subdirectory(grpc-cmake) endif () -if (USE_INTERNAL_AWS_S3_LIBRARY OR USE_SENTRY) - set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) - set (save_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) - set (save_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) - set (save_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set (save_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}) - add_subdirectory(curl-cmake) - set (CMAKE_C_FLAGS ${save_CMAKE_C_FLAGS}) - set (CMAKE_REQUIRED_LIBRARIES ${save_CMAKE_REQUIRED_LIBRARIES}) - set (CMAKE_CMAKE_REQUIRED_INCLUDES ${save_CMAKE_REQUIRED_INCLUDES}) - set (CMAKE_REQUIRED_FLAGS ${save_CMAKE_REQUIRED_FLAGS}) - set (CMAKE_CMAKE_MODULE_PATH ${save_CMAKE_MODULE_PATH}) - - # The library is large - avoid bloat. - target_compile_options (curl PRIVATE -g0) -endif () - if (USE_INTERNAL_AWS_S3_LIBRARY) add_subdirectory(aws-s3-cmake) diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index d0f6a7773b0..9edec1633c0 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -1,152 +1,187 @@ -set (CURL_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl) -set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) -set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) +option (ENABLE_CURL "Enable curl" ${ENABLE_LIBRARIES}) -set (SRCS - ${CURL_DIR}/lib/file.c - ${CURL_DIR}/lib/timeval.c - ${CURL_DIR}/lib/base64.c - ${CURL_DIR}/lib/hostip.c - ${CURL_DIR}/lib/progress.c - ${CURL_DIR}/lib/formdata.c - ${CURL_DIR}/lib/cookie.c - ${CURL_DIR}/lib/http.c - ${CURL_DIR}/lib/sendf.c - ${CURL_DIR}/lib/url.c - ${CURL_DIR}/lib/dict.c - ${CURL_DIR}/lib/if2ip.c - ${CURL_DIR}/lib/speedcheck.c - ${CURL_DIR}/lib/ldap.c - ${CURL_DIR}/lib/version.c - ${CURL_DIR}/lib/getenv.c - ${CURL_DIR}/lib/escape.c - ${CURL_DIR}/lib/mprintf.c - ${CURL_DIR}/lib/telnet.c - ${CURL_DIR}/lib/netrc.c - ${CURL_DIR}/lib/getinfo.c - ${CURL_DIR}/lib/transfer.c - ${CURL_DIR}/lib/strcase.c - ${CURL_DIR}/lib/easy.c - ${CURL_DIR}/lib/security.c - ${CURL_DIR}/lib/curl_fnmatch.c - ${CURL_DIR}/lib/fileinfo.c - ${CURL_DIR}/lib/wildcard.c - ${CURL_DIR}/lib/krb5.c - ${CURL_DIR}/lib/memdebug.c - ${CURL_DIR}/lib/http_chunks.c - ${CURL_DIR}/lib/strtok.c - ${CURL_DIR}/lib/connect.c - ${CURL_DIR}/lib/llist.c - ${CURL_DIR}/lib/hash.c - ${CURL_DIR}/lib/multi.c - ${CURL_DIR}/lib/content_encoding.c - ${CURL_DIR}/lib/share.c - ${CURL_DIR}/lib/http_digest.c - ${CURL_DIR}/lib/md4.c - ${CURL_DIR}/lib/md5.c - ${CURL_DIR}/lib/http_negotiate.c - ${CURL_DIR}/lib/inet_pton.c - ${CURL_DIR}/lib/strtoofft.c - ${CURL_DIR}/lib/strerror.c - ${CURL_DIR}/lib/amigaos.c - ${CURL_DIR}/lib/hostasyn.c - ${CURL_DIR}/lib/hostip4.c - ${CURL_DIR}/lib/hostip6.c - ${CURL_DIR}/lib/hostsyn.c - ${CURL_DIR}/lib/inet_ntop.c - ${CURL_DIR}/lib/parsedate.c - ${CURL_DIR}/lib/select.c - ${CURL_DIR}/lib/splay.c - ${CURL_DIR}/lib/strdup.c - ${CURL_DIR}/lib/socks.c - ${CURL_DIR}/lib/curl_addrinfo.c - ${CURL_DIR}/lib/socks_gssapi.c - ${CURL_DIR}/lib/socks_sspi.c - ${CURL_DIR}/lib/curl_sspi.c - ${CURL_DIR}/lib/slist.c - ${CURL_DIR}/lib/nonblock.c - ${CURL_DIR}/lib/curl_memrchr.c - ${CURL_DIR}/lib/imap.c - ${CURL_DIR}/lib/pop3.c - ${CURL_DIR}/lib/smtp.c - ${CURL_DIR}/lib/pingpong.c - ${CURL_DIR}/lib/rtsp.c - ${CURL_DIR}/lib/curl_threads.c - ${CURL_DIR}/lib/warnless.c - ${CURL_DIR}/lib/hmac.c - ${CURL_DIR}/lib/curl_rtmp.c - ${CURL_DIR}/lib/openldap.c - ${CURL_DIR}/lib/curl_gethostname.c - ${CURL_DIR}/lib/gopher.c - ${CURL_DIR}/lib/idn_win32.c - ${CURL_DIR}/lib/http_proxy.c - ${CURL_DIR}/lib/non-ascii.c - ${CURL_DIR}/lib/asyn-thread.c - ${CURL_DIR}/lib/curl_gssapi.c - ${CURL_DIR}/lib/http_ntlm.c - ${CURL_DIR}/lib/curl_ntlm_wb.c - ${CURL_DIR}/lib/curl_ntlm_core.c - ${CURL_DIR}/lib/curl_sasl.c - ${CURL_DIR}/lib/rand.c - ${CURL_DIR}/lib/curl_multibyte.c - ${CURL_DIR}/lib/hostcheck.c - ${CURL_DIR}/lib/conncache.c - ${CURL_DIR}/lib/dotdot.c - ${CURL_DIR}/lib/x509asn1.c - ${CURL_DIR}/lib/http2.c - ${CURL_DIR}/lib/smb.c - ${CURL_DIR}/lib/curl_endian.c - ${CURL_DIR}/lib/curl_des.c - ${CURL_DIR}/lib/system_win32.c - ${CURL_DIR}/lib/mime.c - ${CURL_DIR}/lib/sha256.c - ${CURL_DIR}/lib/setopt.c - ${CURL_DIR}/lib/curl_path.c - ${CURL_DIR}/lib/curl_ctype.c - ${CURL_DIR}/lib/curl_range.c - ${CURL_DIR}/lib/psl.c - ${CURL_DIR}/lib/doh.c - ${CURL_DIR}/lib/urlapi.c - ${CURL_DIR}/lib/curl_get_line.c - ${CURL_DIR}/lib/altsvc.c - ${CURL_DIR}/lib/socketpair.c - ${CURL_DIR}/lib/vauth/vauth.c - ${CURL_DIR}/lib/vauth/cleartext.c - ${CURL_DIR}/lib/vauth/cram.c - ${CURL_DIR}/lib/vauth/digest.c - ${CURL_DIR}/lib/vauth/digest_sspi.c - ${CURL_DIR}/lib/vauth/krb5_gssapi.c - ${CURL_DIR}/lib/vauth/krb5_sspi.c - ${CURL_DIR}/lib/vauth/ntlm.c - ${CURL_DIR}/lib/vauth/ntlm_sspi.c - ${CURL_DIR}/lib/vauth/oauth2.c - ${CURL_DIR}/lib/vauth/spnego_gssapi.c - ${CURL_DIR}/lib/vauth/spnego_sspi.c - ${CURL_DIR}/lib/vtls/openssl.c - ${CURL_DIR}/lib/vtls/gtls.c - ${CURL_DIR}/lib/vtls/vtls.c - ${CURL_DIR}/lib/vtls/nss.c - ${CURL_DIR}/lib/vtls/polarssl.c - ${CURL_DIR}/lib/vtls/polarssl_threadlock.c - ${CURL_DIR}/lib/vtls/wolfssl.c - ${CURL_DIR}/lib/vtls/schannel.c - ${CURL_DIR}/lib/vtls/schannel_verify.c - ${CURL_DIR}/lib/vtls/sectransp.c - ${CURL_DIR}/lib/vtls/gskit.c - ${CURL_DIR}/lib/vtls/mbedtls.c - ${CURL_DIR}/lib/vtls/mesalink.c - ${CURL_DIR}/lib/vtls/bearssl.c - ${CURL_DIR}/lib/vquic/ngtcp2.c - ${CURL_DIR}/lib/vquic/quiche.c - ${CURL_DIR}/lib/vssh/libssh2.c - ${CURL_DIR}/lib/vssh/libssh.c -) +if (ENABLE_CURL) + option (USE_INTERNAL_CURL "Use internal curl library" ${NOT_UNBUNDLED}) -add_library (curl ${SRCS}) + if (USE_INTERNAL_CURL) + set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") -target_compile_definitions(curl PRIVATE HAVE_CONFIG_H BUILDING_LIBCURL CURL_HIDDEN_SYMBOLS libcurl_EXPORTS) -target_include_directories(curl PUBLIC ${CURL_DIR}/include ${CURL_DIR}/lib .) + set (SRCS + ${LIBRARY_DIR}/lib/file.c + ${LIBRARY_DIR}/lib/timeval.c + ${LIBRARY_DIR}/lib/base64.c + ${LIBRARY_DIR}/lib/hostip.c + ${LIBRARY_DIR}/lib/progress.c + ${LIBRARY_DIR}/lib/formdata.c + ${LIBRARY_DIR}/lib/cookie.c + ${LIBRARY_DIR}/lib/http.c + ${LIBRARY_DIR}/lib/sendf.c + ${LIBRARY_DIR}/lib/url.c + ${LIBRARY_DIR}/lib/dict.c + ${LIBRARY_DIR}/lib/if2ip.c + ${LIBRARY_DIR}/lib/speedcheck.c + ${LIBRARY_DIR}/lib/ldap.c + ${LIBRARY_DIR}/lib/version.c + ${LIBRARY_DIR}/lib/getenv.c + ${LIBRARY_DIR}/lib/escape.c + ${LIBRARY_DIR}/lib/mprintf.c + ${LIBRARY_DIR}/lib/telnet.c + ${LIBRARY_DIR}/lib/netrc.c + ${LIBRARY_DIR}/lib/getinfo.c + ${LIBRARY_DIR}/lib/transfer.c + ${LIBRARY_DIR}/lib/strcase.c + ${LIBRARY_DIR}/lib/easy.c + ${LIBRARY_DIR}/lib/security.c + ${LIBRARY_DIR}/lib/curl_fnmatch.c + ${LIBRARY_DIR}/lib/fileinfo.c + ${LIBRARY_DIR}/lib/wildcard.c + ${LIBRARY_DIR}/lib/krb5.c + ${LIBRARY_DIR}/lib/memdebug.c + ${LIBRARY_DIR}/lib/http_chunks.c + ${LIBRARY_DIR}/lib/strtok.c + ${LIBRARY_DIR}/lib/connect.c + ${LIBRARY_DIR}/lib/llist.c + ${LIBRARY_DIR}/lib/hash.c + ${LIBRARY_DIR}/lib/multi.c + ${LIBRARY_DIR}/lib/content_encoding.c + ${LIBRARY_DIR}/lib/share.c + ${LIBRARY_DIR}/lib/http_digest.c + ${LIBRARY_DIR}/lib/md4.c + ${LIBRARY_DIR}/lib/md5.c + ${LIBRARY_DIR}/lib/http_negotiate.c + ${LIBRARY_DIR}/lib/inet_pton.c + ${LIBRARY_DIR}/lib/strtoofft.c + ${LIBRARY_DIR}/lib/strerror.c + ${LIBRARY_DIR}/lib/amigaos.c + ${LIBRARY_DIR}/lib/hostasyn.c + ${LIBRARY_DIR}/lib/hostip4.c + ${LIBRARY_DIR}/lib/hostip6.c + ${LIBRARY_DIR}/lib/hostsyn.c + ${LIBRARY_DIR}/lib/inet_ntop.c + ${LIBRARY_DIR}/lib/parsedate.c + ${LIBRARY_DIR}/lib/select.c + ${LIBRARY_DIR}/lib/splay.c + ${LIBRARY_DIR}/lib/strdup.c + ${LIBRARY_DIR}/lib/socks.c + ${LIBRARY_DIR}/lib/curl_addrinfo.c + ${LIBRARY_DIR}/lib/socks_gssapi.c + ${LIBRARY_DIR}/lib/socks_sspi.c + ${LIBRARY_DIR}/lib/curl_sspi.c + ${LIBRARY_DIR}/lib/slist.c + ${LIBRARY_DIR}/lib/nonblock.c + ${LIBRARY_DIR}/lib/curl_memrchr.c + ${LIBRARY_DIR}/lib/imap.c + ${LIBRARY_DIR}/lib/pop3.c + ${LIBRARY_DIR}/lib/smtp.c + ${LIBRARY_DIR}/lib/pingpong.c + ${LIBRARY_DIR}/lib/rtsp.c + ${LIBRARY_DIR}/lib/curl_threads.c + ${LIBRARY_DIR}/lib/warnless.c + ${LIBRARY_DIR}/lib/hmac.c + ${LIBRARY_DIR}/lib/curl_rtmp.c + ${LIBRARY_DIR}/lib/openldap.c + ${LIBRARY_DIR}/lib/curl_gethostname.c + ${LIBRARY_DIR}/lib/gopher.c + ${LIBRARY_DIR}/lib/idn_win32.c + ${LIBRARY_DIR}/lib/http_proxy.c + ${LIBRARY_DIR}/lib/non-ascii.c + ${LIBRARY_DIR}/lib/asyn-thread.c + ${LIBRARY_DIR}/lib/curl_gssapi.c + ${LIBRARY_DIR}/lib/http_ntlm.c + ${LIBRARY_DIR}/lib/curl_ntlm_wb.c + ${LIBRARY_DIR}/lib/curl_ntlm_core.c + ${LIBRARY_DIR}/lib/curl_sasl.c + ${LIBRARY_DIR}/lib/rand.c + ${LIBRARY_DIR}/lib/curl_multibyte.c + ${LIBRARY_DIR}/lib/hostcheck.c + ${LIBRARY_DIR}/lib/conncache.c + ${LIBRARY_DIR}/lib/dotdot.c + ${LIBRARY_DIR}/lib/x509asn1.c + ${LIBRARY_DIR}/lib/http2.c + ${LIBRARY_DIR}/lib/smb.c + ${LIBRARY_DIR}/lib/curl_endian.c + ${LIBRARY_DIR}/lib/curl_des.c + ${LIBRARY_DIR}/lib/system_win32.c + ${LIBRARY_DIR}/lib/mime.c + ${LIBRARY_DIR}/lib/sha256.c + ${LIBRARY_DIR}/lib/setopt.c + ${LIBRARY_DIR}/lib/curl_path.c + ${LIBRARY_DIR}/lib/curl_ctype.c + ${LIBRARY_DIR}/lib/curl_range.c + ${LIBRARY_DIR}/lib/psl.c + ${LIBRARY_DIR}/lib/doh.c + ${LIBRARY_DIR}/lib/urlapi.c + ${LIBRARY_DIR}/lib/curl_get_line.c + ${LIBRARY_DIR}/lib/altsvc.c + ${LIBRARY_DIR}/lib/socketpair.c + ${LIBRARY_DIR}/lib/vauth/vauth.c + ${LIBRARY_DIR}/lib/vauth/cleartext.c + ${LIBRARY_DIR}/lib/vauth/cram.c + ${LIBRARY_DIR}/lib/vauth/digest.c + ${LIBRARY_DIR}/lib/vauth/digest_sspi.c + ${LIBRARY_DIR}/lib/vauth/krb5_gssapi.c + ${LIBRARY_DIR}/lib/vauth/krb5_sspi.c + ${LIBRARY_DIR}/lib/vauth/ntlm.c + ${LIBRARY_DIR}/lib/vauth/ntlm_sspi.c + ${LIBRARY_DIR}/lib/vauth/oauth2.c + ${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c + ${LIBRARY_DIR}/lib/vauth/spnego_sspi.c + ${LIBRARY_DIR}/lib/vtls/openssl.c + ${LIBRARY_DIR}/lib/vtls/gtls.c + ${LIBRARY_DIR}/lib/vtls/vtls.c + ${LIBRARY_DIR}/lib/vtls/nss.c + ${LIBRARY_DIR}/lib/vtls/polarssl.c + ${LIBRARY_DIR}/lib/vtls/polarssl_threadlock.c + ${LIBRARY_DIR}/lib/vtls/wolfssl.c + ${LIBRARY_DIR}/lib/vtls/schannel.c + ${LIBRARY_DIR}/lib/vtls/schannel_verify.c + ${LIBRARY_DIR}/lib/vtls/sectransp.c + ${LIBRARY_DIR}/lib/vtls/gskit.c + ${LIBRARY_DIR}/lib/vtls/mbedtls.c + ${LIBRARY_DIR}/lib/vtls/mesalink.c + ${LIBRARY_DIR}/lib/vtls/bearssl.c + ${LIBRARY_DIR}/lib/vquic/ngtcp2.c + ${LIBRARY_DIR}/lib/vquic/quiche.c + ${LIBRARY_DIR}/lib/vssh/libssh2.c + ${LIBRARY_DIR}/lib/vssh/libssh.c + ) -target_compile_definitions(curl PRIVATE OS="${CMAKE_SYSTEM_NAME}") + add_library (curl ${SRCS}) -target_link_libraries(curl PRIVATE ssl) + target_compile_definitions (curl PRIVATE + HAVE_CONFIG_H + BUILDING_LIBCURL + CURL_HIDDEN_SYMBOLS + libcurl_EXPORTS + OS="${CMAKE_SYSTEM_NAME}" + ) + target_include_directories (curl PUBLIC + ${LIBRARY_DIR}/include + ${LIBRARY_DIR}/lib + . # curl_config.h + ) + + target_link_libraries (curl PRIVATE ssl) + + # The library is large - avoid bloat (XXX: is it?) + target_compile_options (curl PRIVATE -g0) + + # find_package(CURL) compatibility for the following packages that uses + # find_package(CURL)/include(FindCURL): + # - mariadb-connector-c + # - aws-s3-cmake + # - sentry-native + set (CURL_FOUND ON CACHE BOOL "") + set (CURL_ROOT_DIR ${LIBRARY_DIR} CACHE PATH "") + set (CURL_INCLUDE_DIR ${LIBRARY_DIR}/include CACHE PATH "") + set (CURL_INCLUDE_DIRS ${LIBRARY_DIR}/include CACHE PATH "") + set (CURL_LIBRARY curl CACHE STRING "") + set (CURL_LIBRARIES ${CURL_LIBRARY} CACHE STRING "") + set (CURL_VERSION_STRING 7.67.0 CACHE STRING "") + add_library (CURL::libcurl ALIAS ${CURL_LIBRARY}) + else () + find_package (CURL) + endif () +endif () + +message (STATUS "Using curl: ${CURL_INCLUDE_DIRS} : ${CURL_LIBRARIES}") diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 9c1fe66cf7b..423ecb06122 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -26,6 +26,7 @@ RUN apt-get update \ liblua5.1-dev \ luajit \ libssl-dev \ + libcurl4-openssl-dev \ gdb \ && rm -rf \ /var/lib/apt/lists/* \ diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index f5f59ce7a13..5b2129fc5bf 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev libboost-iostreams-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev rapidjson-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev libboost-iostreams-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libcurl4-openssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev rapidjson-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT From 4c751bf8be64120d743b3d60bf8e0b9454ec6b4d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 3 Jul 2020 03:58:57 +0300 Subject: [PATCH 183/330] Added results for AWS Lightsail --- website/benchmark/hardware/index.html | 4 +- .../results/046_aws_lightsail_4vcpu.json | 54 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html index 61f9d99f5d8..88ddb2d0868 100644 --- a/website/benchmark/hardware/index.html +++ b/website/benchmark/hardware/index.html @@ -64,7 +64,9 @@ Results for MacBook Pro are from Denis Glazachev. MacOS Catalina Version Results for AMD EPYC 7702 are from Peng Gao in sina.com.
Results for Intel NUC are from Alexander Zaitsev, Altinity.
Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
-Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.

+Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.
+Results for AWS Lightsail is from Vamsi Krishna B. +

{% endblock %} diff --git a/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json b/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json new file mode 100644 index 00000000000..75938abc9f0 --- /dev/null +++ b/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS Lightsail 4vCPU", + "system_full": "AWS Lightsail E5-2686 v4 @ 2.30GHz, 16 GiB RAM", + "time": "2020-07-02 00:00:00", + "kind": "cloud", + "result": + [ +[0.002, 0.001, 0.001], +[0.046, 0.026, 0.025], +[0.156, 0.077, 0.078], +[0.746, 0.098, 0.095], +[1.383, 0.233, 0.218], +[2.161, 0.646, 0.626], +[0.041, 0.037, 0.038], +[0.032, 0.029, 0.026], +[1.494, 1.190, 1.159], +[1.843, 1.354, 1.357], +[0.841, 0.375, 0.375], +[1.254, 0.446, 0.448], +[2.235, 1.792, 1.746], +[4.175, 2.354, 2.315], +[2.602, 2.075, 2.042], +[2.258, 2.085, 2.058], +[6.402, 5.909, 5.895], +[4.178, 3.618, 3.670], +[12.978, 12.037, 11.764], +[0.754, 0.107, 0.102], +[19.615, 1.888, 1.868], +[21.740, 2.208, 2.171], +[41.009, 5.277, 5.245], +[38.068, 2.475, 2.435], +[4.739, 0.693, 0.680], +[1.766, 0.549, 0.542], +[4.730, 0.684, 0.672], +[19.010, 1.849, 1.811], +[15.999, 3.086, 3.099], +[3.655, 3.609, 3.593], +[3.967, 1.768, 1.836], +[10.566, 3.036, 2.963], +[20.065, 19.091, null], +[21.474, 8.597, 8.501], +[21.484, 8.563, 8.533], +[3.850, 3.487, 3.477], +[0.408, 0.240, 0.239], +[0.125, 0.087, 0.084], +[0.132, 0.073, 0.073], +[0.685, 0.471, 0.480], +[0.089, 0.028, 0.025], +[0.044, 0.027, 0.018], +[0.007, 0.007, 0.006] + ] + } +] From 457f56be0c37d608e562ccf24f67f235bb2a7206 Mon Sep 17 00:00:00 2001 From: manmitya <30998567+manmitya@users.noreply.github.com> Date: Fri, 3 Jul 2020 08:04:42 +0300 Subject: [PATCH 184/330] (typo) in doc (#12099) --- docs/ru/sql-reference/functions/array-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 24b81d743f3..99eda1bf45e 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -948,7 +948,7 @@ flatten(array_of_arrays) **Параметры** -- `array_of_arrays` — [Массивов](../../sql-reference/functions/array-functions.md) массивов. Например, `[[1,2,3], [4,5]]`. +- `array_of_arrays` — [Массив](../../sql-reference/functions/array-functions.md) массивов. Например, `[[1,2,3], [4,5]]`. **Примеры** From 68ca587a71fe6aa391b370385b524eb2949cc8d0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:11:17 +0300 Subject: [PATCH 185/330] Update polymorphic_parts_l.xml --- tests/performance/polymorphic_parts_l.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml index bd3f9cd8e42..4ffffe0e539 100644 --- a/tests/performance/polymorphic_parts_l.xml +++ b/tests/performance/polymorphic_parts_l.xml @@ -15,7 +15,7 @@ CREATE TABLE hits_buffer AS hits_10m_single - ENGINE = Buffer(default, hits_wide, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + ENGINE = Buffer(default, hits_wide, 1, 0, 0, 10000, 10000, 0, 0) From d1a93523b6629aeb21665bb5a857d9ac5b84f40f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:11:46 +0300 Subject: [PATCH 186/330] Update polymorphic_parts_m.xml --- tests/performance/polymorphic_parts_m.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml index f39de52e16e..251f313edc7 100644 --- a/tests/performance/polymorphic_parts_m.xml +++ b/tests/performance/polymorphic_parts_m.xml @@ -15,7 +15,7 @@ CREATE TABLE hits_buffer AS hits_10m_single - ENGINE = Buffer(default, hits_wide, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + ENGINE = Buffer(default, hits_wide, 1, 0, 0, 10000, 10000, 0, 0) From 03becdbae4697e91b179a052ab0730acbbd618cf Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:12:15 +0300 Subject: [PATCH 187/330] Update polymorphic_parts_s.xml --- tests/performance/polymorphic_parts_s.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml index c1806372930..04602149da1 100644 --- a/tests/performance/polymorphic_parts_s.xml +++ b/tests/performance/polymorphic_parts_s.xml @@ -15,7 +15,7 @@ CREATE TABLE hits_buffer AS hits_10m_single - ENGINE = Buffer(default, hits_wide, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + ENGINE = Buffer(default, hits_wide, 1, 0, 0, 10000, 10000, 0, 0) From 6104872caef42f583ff2d00902d4f51f0ee62cba Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 1 Jul 2020 20:46:28 +0300 Subject: [PATCH 188/330] kafka: fix SIGSEGV on DROP TABLE After #11599 it is possible that messages of the ReadBufferFromKafkaConsumer will be cleaned-up right in read_kafka_message callback (from KafkaBlockInputStream) if the stop flag isset (i.e. DROP TABLE is waiting the consumer), and if read_kafka_message already processed some rows it will not return 0 and the loop after will try to get current topic from the buffer, which uses messages in the underlying and this will got SIGSEGV: 12:14:56.173262 [ 55421 ] {f7930856-d478-4e41-af56-24ce7b693e95} executeQuery: (from 0.0.0.0:0, user: ) DROP TABLE IF EXISTS data.queue 12:14:56.173285 [ 55421 ] {f7930856-d478-4e41-af56-24ce7b693e95} StorageKafka (newly_queue): Waiting for cleanup 12:14:56.180016 [ 55390 ] {} BaseDaemon: Received signal 11 12:14:56.180267 [ 4914 ] {} BaseDaemon: ######################################## 12:14:56.181879 [ 4914 ] {} BaseDaemon: (version 20.6.1.1, build id: 4CE0298F08583658) (from thread 55468) (no query) Received signal Segmentation fault (11) 12:14:56.181900 [ 4914 ] {} BaseDaemon: Address: 0x8 Access: read. Address not mapped to object. 12:14:56.181909 [ 4914 ] {} BaseDaemon: Stack trace: 12:14:56.184676 [ 4914 ] {} BaseDaemon: 3. /ch/contrib/cppkafka/include/cppkafka/message.h:111: DB::KafkaBlockInputStream::readImpl() @ 0xe343f1c in /usr/lib/debug/usr/bin/clickhouse 12:14:56.185553 [ 4914 ] {} BaseDaemon: 4. /ch/contrib/libcxx/include/vector:1003: DB::IBlockInputStream::read() @ 0xd9d95bd in /usr/lib/debug/usr/bin/clickhouse 12:14:56.188238 [ 4914 ] {} BaseDaemon: 5. /ch/src/DataStreams/copyData.cpp:26: DB::copyData() @ 0xd9f712a in /usr/lib/debug/usr/bin/clickhouse 12:14:56.188780 [ 4914 ] {} BaseDaemon: 6. /ch/contrib/libcxx/include/vector:1532: DB::StorageKafka::streamToViews() @ 0xe335e73 in /usr/lib/debug/usr/bin/clickhouse 12:14:56.189331 [ 4914 ] {} BaseDaemon: 7. /ch/src/Storages/Kafka/StorageKafka.cpp:491: DB::StorageKafka::threadFunc() @ 0xe336738 in /usr/lib/debug/usr/bin/clickhouse 55421 thread (shows that it still waiting for deactivation): 5 std::__1::lock_guard<>::lock_guard () at ../contrib/libcxx/include/__mutex_base:90 6 DB::BackgroundSchedulePoolTaskInfo::deactivate (this=0x7fc7e4465f20) at ../src/Core/BackgroundSchedulePool.cpp:59 7 DB::StorageKafka::shutdown (this=0x7fc7e45e4600) at ../contrib/libcxx/include/memory:3821 And just in case thread where read_kafka_message is called: 0 DB::ReadBufferFromKafkaConsumer::nextImpl (this=0x7fd4901d4118) at ../contrib/libcxx/include/atomic:1491 1 DB::ReadBuffer::next (this=0x7fd4901d4118) at ../src/IO/ReadBuffer.h:59 2 DB::ReadBuffer::eof (this=0x7fd4901d4118) at ../src/IO/ReadBuffer.h:81 3 DB::skipWhitespaceIfAny (buf=...) at ../src/IO/ReadHelpers.h:945 4 DB::JSONEachRowRowInputFormat::readRow (ext=..., columns=..., this=0x7fd499a7a020) at ../src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp:222 5 DB::JSONEachRowRowInputFormat::readRow (this=0x7fd499a7a020, columns=..., ext=...) at ../src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp:218 6 DB::IRowInputFormat::generate (this=0x7fd499a7a020) at ../src/Processors/Formats/IRowInputFormat.cpp:64 7 DB::ISource::work (this=0x7fd499a7a020) at ../src/Processors/ISource.cpp:48 8 DB::KafkaBlockInputStream::::operator()(void) const () at ../contrib/libcxx/include/memory:3826 9 DB::KafkaBlockInputStream::readImpl (this=0x7fd46e718820) at ../contrib/libcxx/include/new:340 Cc: @filimonov --- src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 3fd28cde5e5..876aa8d8c54 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -420,12 +420,9 @@ void ReadBufferFromKafkaConsumer::resetIfStopped() /// Do commit messages implicitly after we processed the previous batch. bool ReadBufferFromKafkaConsumer::nextImpl() { - /// NOTE: ReadBuffer was implemented with an immutable underlying contents in mind. /// If we failed to poll any message once - don't try again. /// Otherwise, the |poll_timeout| expectations get flawn. - resetIfStopped(); - if (!allowed || !hasMorePolledMessages()) return false; From e7179c402c3b1cda8bb4e32d2d7faecd126b62a8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 2 Jul 2020 22:45:46 +0300 Subject: [PATCH 189/330] kafka: check that the data is still usable after parsing --- src/Storages/Kafka/KafkaBlockInputStream.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index 078d2e74771..ca846cc10f0 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -140,6 +140,13 @@ Block KafkaBlockInputStream::readImpl() if (new_rows) { + // In read_kafka_message(), ReadBufferFromKafkaConsumer::nextImpl() + // will be called, that may make something unusable, i.e. clean + // ReadBufferFromKafkaConsumer::messages, which is accessed from + // ReadBufferFromKafkaConsumer::currentTopic() (and other helpers). + if (buffer->isStalled()) + throw Exception("Polled messages became unusable", ErrorCodes::LOGICAL_ERROR); + buffer->storeLastReadMessageOffset(); auto topic = buffer->currentTopic(); From 850820002038d61e9668be5660a89067c0f1c29b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 2 Jul 2020 22:25:09 +0300 Subject: [PATCH 190/330] kafka: improve logging during engine shutdown This will help with tracking possible issues, when you need to know was buffer released or not. --- src/Storages/Kafka/StorageKafka.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 6499941a68d..7fca745cf81 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -274,9 +274,10 @@ void StorageKafka::shutdown() LOG_TRACE(log, "Waiting for cleanup"); task->deactivate(); - // Close all consumers + LOG_TRACE(log, "Closing consumers"); for (size_t i = 0; i < num_created_consumers; ++i) auto buffer = popReadBuffer(); + LOG_TRACE(log, "Consumers closed"); rd_kafka_wait_destroyed(CLEANUP_TIMEOUT_MS); } From de8bc99d48d17e12fb2259a7a8076d02e8ffd2c5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 2 Jul 2020 22:25:59 +0300 Subject: [PATCH 191/330] kafka: avoid superior polling after DROP/DETACH TABLE Before this patch isStalled() was checked before polledDataUnusable(), and after DROP TABLE isStalled() == true (although this looks tricky). --- src/Storages/Kafka/KafkaBlockInputStream.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index ca846cc10f0..c5f598a756c 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -193,14 +193,14 @@ Block KafkaBlockInputStream::readImpl() total_rows = total_rows + new_rows; } - else if (buffer->isStalled()) - { - ++failed_poll_attempts; - } else if (buffer->polledDataUnusable()) { break; } + else if (buffer->isStalled()) + { + ++failed_poll_attempts; + } else { LOG_WARNING(log, "Parsing of message (topic: {}, partition: {}, offset: {}) return no rows.", buffer->currentTopic(), buffer->currentPartition(), buffer->currentOffset()); From bd5e5e94620d65b574c9d2c987a15efd91ad74ab Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 2 Jul 2020 22:51:45 +0300 Subject: [PATCH 192/330] kafka: remove outdated comment As stated by @filimonov it is not relevant (after #11599) --- src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 876aa8d8c54..5dea41d049e 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -420,9 +420,6 @@ void ReadBufferFromKafkaConsumer::resetIfStopped() /// Do commit messages implicitly after we processed the previous batch. bool ReadBufferFromKafkaConsumer::nextImpl() { - /// NOTE: ReadBuffer was implemented with an immutable underlying contents in mind. - /// If we failed to poll any message once - don't try again. - /// Otherwise, the |poll_timeout| expectations get flawn. if (!allowed || !hasMorePolledMessages()) return false; From 3c717d4aca7a31fd86f2de382ca85593f60075d5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Jul 2020 10:34:08 +0300 Subject: [PATCH 193/330] Added test. --- tests/queries/0_stateless/01357_result_rows.reference | 2 ++ tests/queries/0_stateless/01357_result_rows.sql | 5 +++++ 2 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/01357_result_rows.reference create mode 100644 tests/queries/0_stateless/01357_result_rows.sql diff --git a/tests/queries/0_stateless/01357_result_rows.reference b/tests/queries/0_stateless/01357_result_rows.reference new file mode 100644 index 00000000000..cd121fd3feb --- /dev/null +++ b/tests/queries/0_stateless/01357_result_rows.reference @@ -0,0 +1,2 @@ +1 +1 1 diff --git a/tests/queries/0_stateless/01357_result_rows.sql b/tests/queries/0_stateless/01357_result_rows.sql new file mode 100644 index 00000000000..17c2d15979a --- /dev/null +++ b/tests/queries/0_stateless/01357_result_rows.sql @@ -0,0 +1,5 @@ +set log_queries = 1; +select count() > 0 from system.settings; + +system flush logs; +select result_rows, result_bytes >= 8 from system.query_log where event_date >= today() - 1 and lower(query) like '%select count() > 0 from system.settings%' and type = 'QueryFinish' order by query_start_time desc limit 1; From d5732d39729722207dffbc9074dce7615dfbe0eb Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 3 Jul 2020 10:57:22 +0300 Subject: [PATCH 194/330] Create codeql-analysis.yml --- .github/workflows/codeql-analysis.yml | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/codeql-analysis.yml diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 00000000000..bd8e8deef9e --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,33 @@ +name: "CodeQL Scanning" + +on: + schedule: + - cron: '0 19 * * *' +jobs: + CodeQL-Build: + + runs-on: self-hosted + timeout-minutes: 1440 + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + fetch-depth: 2 + submodules: 'recursive' + + - run: git checkout HEAD^2 + if: ${{ github.event_name == 'pull_request' }} + + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + + with: + languages: cpp + + - run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-9 g++-9 && mkdir build + - run: cd build && CC=gcc-9 CXX=g++-9 cmake .. + - run: cd build && ninja + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 From c57edd201888c51a4ccfbcdb3bef87a50cd5d854 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Jul 2020 11:02:35 +0300 Subject: [PATCH 195/330] Tiny fixes --- programs/server/config.xml | 2 -- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 4 +-- .../configs/rabbitmq.xml | 3 ++- .../test_storage_rabbitmq/configs/users.xml | 25 ------------------- 4 files changed, 4 insertions(+), 30 deletions(-) delete mode 100644 tests/integration/test_storage_rabbitmq/configs/users.xml diff --git a/programs/server/config.xml b/programs/server/config.xml index f1feb6d4eff..24779657ced 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -66,8 +66,6 @@ 8443 9440 --> - root - clickhouse diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 5b3dec65f00..9d5e7fcd652 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -77,8 +77,8 @@ StorageRabbitMQ::StorageRabbitMQ( , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , login_password(std::make_pair( - global_context.getConfigRef().getString("rabbitmq_username", "root"), - global_context.getConfigRef().getString("rabbitmq_password", "clickhouse"))) + global_context.getConfigRef().getString("rabbitmq.username"), + global_context.getConfigRef().getString("rabbitmq.password"))) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) { loop = std::make_unique(); diff --git a/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml b/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml index 33a8a43fb1a..3002b6aa415 100644 --- a/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml +++ b/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml @@ -1,5 +1,6 @@ - earliest + root + clickhouse diff --git a/tests/integration/test_storage_rabbitmq/configs/users.xml b/tests/integration/test_storage_rabbitmq/configs/users.xml deleted file mode 100644 index 246e6b069ef..00000000000 --- a/tests/integration/test_storage_rabbitmq/configs/users.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - ::/0 - - default - default - - - - - - - - From d962fd931b481fce1f5d328c444cd57d53823008 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 3 Jul 2020 11:10:05 +0300 Subject: [PATCH 196/330] Create anchore-analysis.yml --- .github/workflows/anchore-analysis.yml | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/anchore-analysis.yml diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml new file mode 100644 index 00000000000..f0b54bef3e1 --- /dev/null +++ b/.github/workflows/anchore-analysis.yml @@ -0,0 +1,37 @@ +# This workflow checks out code, performs an Anchore container image +# vulnerability and compliance scan, and integrates the results with +# GitHub Advanced Security code scanning feature. For more information on +# the Anchore scan action usage and parameters, see +# https://github.com/anchore/scan-action. For more information on +# Anchore container image scanning in general, see +# https://docs.anchore.com. + +name: Docker Container Scan (clickhouse-server) + +on: + pull_request: + paths: docker/server/Dockerfile + schedule: + - cron: '0 21 * * *' + +jobs: + Anchore-Build-Scan: + runs-on: ubuntu-latest + steps: + - name: Checkout the code + uses: actions/checkout@v2 + - name: Build the Docker image + run: | + cd docker/server + perl -pi -e 's|=\$version||g' Dockerfile + docker build . --file Dockerfile --tag localbuild/testimage:latest + - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled + uses: anchore/scan-action@master + with: + image-reference: "localbuild/testimage:latest" + dockerfile-path: "docker/server/Dockerfile" + acs-report-enable: true + - name: Upload Anchore Scan Report + uses: github/codeql-action/upload-sarif@v1 + with: + sarif_file: results.sarif From 1ffc4ad88a5d4f8ab29394a7121af007129c40ea Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Jul 2020 11:46:10 +0300 Subject: [PATCH 197/330] Update polymorphic_parts_s.xml --- tests/performance/polymorphic_parts_s.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml index 04602149da1..b4dd87a7ae3 100644 --- a/tests/performance/polymorphic_parts_s.xml +++ b/tests/performance/polymorphic_parts_s.xml @@ -27,7 +27,7 @@ INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(1000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact From 02ee988d80afc2719bbe79e77ed7ae47722b8e68 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Jul 2020 11:46:29 +0300 Subject: [PATCH 198/330] Update polymorphic_parts_m.xml --- tests/performance/polymorphic_parts_m.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml index 251f313edc7..54a81def55e 100644 --- a/tests/performance/polymorphic_parts_m.xml +++ b/tests/performance/polymorphic_parts_m.xml @@ -27,7 +27,7 @@ INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(10000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact From e27e5096676815cbfd159f1a92bea541a36bf774 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Fri, 3 Jul 2020 11:46:48 +0300 Subject: [PATCH 199/330] Update polymorphic_parts_l.xml --- tests/performance/polymorphic_parts_l.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml index 4ffffe0e539..d2ae9417bf7 100644 --- a/tests/performance/polymorphic_parts_l.xml +++ b/tests/performance/polymorphic_parts_l.xml @@ -27,7 +27,7 @@ INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100000) INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(1000000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact From 2577d71ed5ac02403b2376c6d4c216909e598d1e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 3 Jul 2020 11:57:38 +0300 Subject: [PATCH 200/330] Changelog for 20.5 --- CHANGELOG.md | 353 ++++++++++++++++++++++ utils/simple-backport/changelog.sh | 4 +- utils/simple-backport/format-changelog.py | 26 +- 3 files changed, 375 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cb41d335bc..9a7b1d44b2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,356 @@ +## ClickHouse release 20.4 + +### ClickHouse release v20.5.2.7-stable 2020-07-02 + +#### Backward Incompatible Change + +* Return non-Nullable result from COUNT(DISTINCT), and `uniq` aggregate functions family. If all passed values are NULL, return zero instead. This improves SQL compatibility. [#11661](https://github.com/ClickHouse/ClickHouse/pull/11661) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added a check for the case when user-level setting is specified in a wrong place. User-level settings should be specified in `users.xml` inside `` section for specific user profile (or in `` for default settings). The server won't start with exception message in log. This fixes [#9051](https://github.com/ClickHouse/ClickHouse/issues/9051). If you want to skip the check, you can either move settings to the appropriate place or add `1` to config.xml. [#11449](https://github.com/ClickHouse/ClickHouse/pull/11449) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* The setting `input_format_with_names_use_header` is enabled by default. It will affect parsing of input formats `-WithNames` and `-WithNamesAndTypes`. [#10937](https://github.com/ClickHouse/ClickHouse/pull/10937) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove `experimental_use_processors` setting. It is enabled by default. [#10924](https://github.com/ClickHouse/ClickHouse/pull/10924) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### New Feature + +* Added support for MySQL style global variables syntax (stub). This is needed for compatibility of MySQL protocol. [#11832](https://github.com/ClickHouse/ClickHouse/pull/11832) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* `minMap` and `maxMap` functions were added. [#11603](https://github.com/ClickHouse/ClickHouse/pull/11603) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Add the `system.asynchronous_metric_log` table that logs historical metrics from `system.asynchronous_metrics`. [#11588](https://github.com/ClickHouse/ClickHouse/pull/11588) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add functions `extractAllGroupsHorizontal(haystack, re)` and `extractAllGroupsVertical(haystack, re)`. [#11554](https://github.com/ClickHouse/ClickHouse/pull/11554) ([Vasily Nemkov](https://github.com/Enmk)). +* Add SHOW CLUSTER(S) queries. [#11467](https://github.com/ClickHouse/ClickHouse/pull/11467) ([hexiaoting](https://github.com/hexiaoting)). +* Add `netloc` function for extracting network location, similar to `urlparse(url)`, `netloc` in python. [#11356](https://github.com/ClickHouse/ClickHouse/pull/11356) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Add 2 more virtual columns for engine=Kafka to access message headers. [#11283](https://github.com/ClickHouse/ClickHouse/pull/11283) ([filimonov](https://github.com/filimonov)). +* Add `_timestamp_ms` virtual column for Kafka engine (type is `Nullable(DateTime64(3))`). [#11260](https://github.com/ClickHouse/ClickHouse/pull/11260) ([filimonov](https://github.com/filimonov)). +* Add function `fuzzBits` that randomly flips bits in a string with given probability. [#11237](https://github.com/ClickHouse/ClickHouse/pull/11237) ([Andrei Nekrashevich](https://github.com/xolm)). +* Use HTTP client for S3 based on Poco. [#11230](https://github.com/ClickHouse/ClickHouse/pull/11230) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add query performance metrics based on Linux `perf_events`. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Default S3 credentials and custom auth headers. [#11134](https://github.com/ClickHouse/ClickHouse/pull/11134) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). +* - Add's a hasSubStr function that allows for look for sub sequences in arrays. [#11071](https://github.com/ClickHouse/ClickHouse/pull/11071) ([Ryad Zenine](https://github.com/r-zenine)). +* Now support NULL and NOT NULL modifiers for data types in create query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). +* Added new functions to import/export DateTime64 as Int64 with various precision: `to-/fromUnixTimestamp64Milli/-Micro/-Nano`. [#10923](https://github.com/ClickHouse/ClickHouse/pull/10923) ([Vasily Nemkov](https://github.com/Enmk)). +* Allow specifying `mongodb://` URI for MongoDB dictionaries. [#10915](https://github.com/ClickHouse/ClickHouse/pull/10915) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Support writes in ODBC Table function [#10554](https://github.com/ClickHouse/ClickHouse/pull/10554) ([ageraab](https://github.com/ageraab)). [#10901](https://github.com/ClickHouse/ClickHouse/pull/10901) ([tavplubix](https://github.com/tavplubix)). +* Support `ALTER RENAME COLUMN` for the distributed table engine. Continuation of [#10727](https://github.com/ClickHouse/ClickHouse/issues/10727). Fixes [#10747](https://github.com/ClickHouse/ClickHouse/issues/10747). [#10887](https://github.com/ClickHouse/ClickHouse/pull/10887) ([alesapin](https://github.com/alesapin)). +* Added new complex key direct layout to dictionaries, that does not store anything locally during query execution. [#10850](https://github.com/ClickHouse/ClickHouse/pull/10850) ([Artem Streltsov](https://github.com/kekekekule)). +* OFFSET keyword can now be used without an affiliated LIMIT clause. [#10802](https://github.com/ClickHouse/ClickHouse/pull/10802) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Added `system.licenses` table. This table contains licenses of third-party libraries that are located in `contrib` directory. This closes [#2890](https://github.com/ClickHouse/ClickHouse/issues/2890). [#10795](https://github.com/ClickHouse/ClickHouse/pull/10795) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* New function function toStartOfSecond(DateTime64) -> DateTime64 that nullifies sub-second part of DateTime64 value. [#10722](https://github.com/ClickHouse/ClickHouse/pull/10722) ([Vasily Nemkov](https://github.com/Enmk)). +* Add data type Point (Tuple(Float64, Float64)) and Polygon (Array(Array(Tuple(Float64, Float64))). [#10678](https://github.com/ClickHouse/ClickHouse/pull/10678) ([Alexey Ilyukhov](https://github.com/livace)). +* Default user and database creation on docker image starting. [#10637](https://github.com/ClickHouse/ClickHouse/pull/10637) ([Paramtamtam](https://github.com/tarampampam)). +* Added a new layout ```direct``` which loads all the data directly from the source for each query, without storing or caching data. [#10622](https://github.com/ClickHouse/ClickHouse/pull/10622) ([Artem Streltsov](https://github.com/kekekekule)). +* Add new input format `JSONAsString` that accepts a sequence of JSON objects separated by newlines, spaces and/or commas. [#10607](https://github.com/ClickHouse/ClickHouse/pull/10607) ([Kruglov Pavel](https://github.com/Avogar)). +* Allowed to profile memory with finer granularity steps than 4 MiB. Added sampling memory profiler to capture random allocations/deallocations. [#10598](https://github.com/ClickHouse/ClickHouse/pull/10598) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* (Only Linux) Clickhouse server now tries to fallback to ProcfsMetricsProvider when clickhouse binary is not attributed with CAP_NET_ADMIN capability to collect per-query system metrics (for CPU and I/O). [#10544](https://github.com/ClickHouse/ClickHouse/pull/10544) ([Alexander Kazakov](https://github.com/Akazz)). +* Added ability to delete a subset of expired rows, which satisfies the condition in WHERE clause. Added ability to replace expired rows with aggregates of them specified in GROUP BY clause. [#10537](https://github.com/ClickHouse/ClickHouse/pull/10537) ([expl0si0nn](https://github.com/expl0si0nn)). +* Function that extracts from haystack all matching non-overlapping groups with regular expressions, and put those into `Array(Array(String))` column. [#10534](https://github.com/ClickHouse/ClickHouse/pull/10534) ([Vasily Nemkov](https://github.com/Enmk)). +* Selects with final are executed in parallel. Added setting `max_final_threads` to limit the number of threads used. [#10463](https://github.com/ClickHouse/ClickHouse/pull/10463) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Implementation of PostgreSQL wire protocol. [#10242](https://github.com/ClickHouse/ClickHouse/pull/10242) ([Movses](https://github.com/MovElb)). +* Added OpenCl support and bitonic sort algorithm, which can be used for sorting integer types of data in single column. Needs to be build with flag `-DENABLE_OPENCL=1`. For using bitonic sort algorithm instead of others you need to set `bitonic_sort` for Setting's option `special_sort` and make sure that OpenCL is available. [#10232](https://github.com/ClickHouse/ClickHouse/pull/10232) ([Ri](https://github.com/margaritiko)). +* `SimpleAggregateFunction` now also supports `sumMap`. [#10000](https://github.com/ClickHouse/ClickHouse/pull/10000) ([Ildus Kurbangaliev](https://github.com/ildus)). + +#### Bug Fix + +* Fix rare crash caused by using Nullable column in prewhere condition. Continuation of [#11869](https://github.com/ClickHouse/ClickHouse/issues/11869). [#11895](https://github.com/ClickHouse/ClickHouse/pull/11895) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix potential floating point exception when parsing DateTime64. This fixes [#11374](https://github.com/ClickHouse/ClickHouse/issues/11374). [#11875](https://github.com/ClickHouse/ClickHouse/pull/11875) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result for `if()` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). +* Preserve column alias with optimize_aggregators_of_group_by_keys (`optimize_aggregators_of_group_by_keys` has been introduced in [#11667](https://github.com/ClickHouse/ClickHouse/issues/11667)). [#11806](https://github.com/ClickHouse/ClickHouse/pull/11806) ([Azat Khuzhin](https://github.com/azat)). +* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `Scalar doesn't exist` exception when using `WITH ...` in `SELECT ... FROM merge_tree_table ...` https://github.com/ClickHouse/ClickHouse/issues/11621. [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)). +* Fixes crash in special generated queries when `optimize_arithmetic_operations_in_aggregate_functions = 1`. [#11756](https://github.com/ClickHouse/ClickHouse/pull/11756) ([Ruslan](https://github.com/kamalov-ruslan)). +* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). +* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)). +* Parse metadata stored in zookeeper before checking for equality. [#11739](https://github.com/ClickHouse/ClickHouse/pull/11739) ([Azat Khuzhin](https://github.com/azat)). +* Fixed LOGICAL_ERROR caused by wrong type deduction of complex literals in Values input format. [#11732](https://github.com/ClickHouse/ClickHouse/pull/11732) ([tavplubix](https://github.com/tavplubix)). +* Fix `ORDER BY ... WITH FILL` over const columns. [#11697](https://github.com/ClickHouse/ClickHouse/pull/11697) ([Anton Popov](https://github.com/CurtizJ)). +* Fix very rare race condition in SYSTEM SYNC REPLICA. If the replicated table is created and at the same time from the separate connection another client is issuing `SYSTEM SYNC REPLICA` command on that table (this is unlikely, because another client should be aware that the table is created), it's possible to get nullptr dereference. [#11691](https://github.com/ClickHouse/ClickHouse/pull/11691) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Pass proper timeouts when communicating with XDBC bridge. Recently timeouts were not respected when checking bridge liveness and receiving meta info. [#11690](https://github.com/ClickHouse/ClickHouse/pull/11690) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `LIMIT n WITH TIES` usage together with `ORDER BY` statement, which contains aliases. [#11689](https://github.com/ClickHouse/ClickHouse/pull/11689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible `Pipeline stuck` for selects with parallel `FINAL`. Fixes [#11636](https://github.com/ClickHouse/ClickHouse/issues/11636). [#11682](https://github.com/ClickHouse/ClickHouse/pull/11682) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error which leads to an incorrect state of `system.mutations`. It may show that whole mutation is already done but the server still has `MUTATE_PART` tasks in the replication queue and tries to execute them. This fixes [#11611](https://github.com/ClickHouse/ClickHouse/issues/11611). [#11681](https://github.com/ClickHouse/ClickHouse/pull/11681) ([alesapin](https://github.com/alesapin)). +* Fix syntax hilite in CREATE USER query. [#11664](https://github.com/ClickHouse/ClickHouse/pull/11664) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* without -q option the database does not get created at startup. [#11604](https://github.com/ClickHouse/ClickHouse/pull/11604) ([giordyb](https://github.com/giordyb)). +* Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix race conditions in CREATE/DROP of different replicas of ReplicatedMergeTree. Continue to work if the table was not removed completely from ZooKeeper or not created successfully. This fixes [#11432](https://github.com/ClickHouse/ClickHouse/issues/11432). [#11592](https://github.com/ClickHouse/ClickHouse/pull/11592) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed rare segfault in `SHOW CREATE TABLE` Fixes [#11490](https://github.com/ClickHouse/ClickHouse/issues/11490). [#11579](https://github.com/ClickHouse/ClickHouse/pull/11579) ([tavplubix](https://github.com/tavplubix)). +* All queries in HTTP session have had the same query_id. It is fixed. [#11578](https://github.com/ClickHouse/ClickHouse/pull/11578) ([tavplubix](https://github.com/tavplubix)). +* Now clickhouse-server docker container will prefer IPv6 checking server aliveness. [#11550](https://github.com/ClickHouse/ClickHouse/pull/11550) ([Ivan Starkov](https://github.com/istarkov)). +* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix shard_num/replica_num for `` (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). +* Fix async INSERT into Distributed for prefer_localhost_replica=0 and w/o internal_replication. [#11527](https://github.com/ClickHouse/ClickHouse/pull/11527) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `Pipeline stuck` exception for `INSERT SELECT FINAL` where `SELECT` (`max_threads`>1) has multiple streams but `INSERT` has only one (`max_insert_threads`==0). [#11455](https://github.com/ClickHouse/ClickHouse/pull/11455) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong result in queries like `select count() from t, u`. [#11454](https://github.com/ClickHouse/ClickHouse/pull/11454) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). +* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash in JOIN over LowCarinality(T) and Nullable(T). [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Better errors for `joinGet()` functions. [#11389](https://github.com/ClickHouse/ClickHouse/pull/11389) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove redundant lock during parts send in ReplicatedMergeTree. [#11354](https://github.com/ClickHouse/ClickHouse/pull/11354) ([alesapin](https://github.com/alesapin)). +* Fix support for `\G` (vertical output) in clickhouse-client in multiline mode. This closes [#9933](https://github.com/ClickHouse/ClickHouse/issues/9933). [#11350](https://github.com/ClickHouse/ClickHouse/pull/11350) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential segfault when using `Lazy` database. [#11348](https://github.com/ClickHouse/ClickHouse/pull/11348) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash in direct selects from StorageJoin (without JOIN) and wrong nullability. [#11340](https://github.com/ClickHouse/ClickHouse/pull/11340) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now merges stopped before change metadata in `ALTER` queries. [#11335](https://github.com/ClickHouse/ClickHouse/pull/11335) ([alesapin](https://github.com/alesapin)). +* Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix insignificant data race in clickhouse-copier. Found by integration tests. [#11313](https://github.com/ClickHouse/ClickHouse/pull/11313) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix wrong markup in documentation. [#11263](https://github.com/ClickHouse/ClickHouse/pull/11263) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Now `primary.idx` will be checked if it's defined in `CREATE` query. [#11199](https://github.com/ClickHouse/ClickHouse/pull/11199) ([alesapin](https://github.com/alesapin)). +* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed S3 globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). +* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). +* Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). +* Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#11145](https://github.com/ClickHouse/ClickHouse/pull/11145) ([filimonov](https://github.com/filimonov)). +* Fix excessive reserving of threads for simple queries (optimization for reducing the number of threads, which was partly broken after changes in pipeline). [#11114](https://github.com/ClickHouse/ClickHouse/pull/11114) ([Azat Khuzhin](https://github.com/azat)). +* Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)). +* Fixed deadlock during server startup after update with changes in structure of system log tables. [#11106](https://github.com/ClickHouse/ClickHouse/pull/11106) ([alesapin](https://github.com/alesapin)). +* Fixed memory leak in registerDiskS3. [#11074](https://github.com/ClickHouse/ClickHouse/pull/11074) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix error `No such name in Block::erase()` when JOIN appears with PREWHERE or `optimize_move_to_prewhere` makes PREWHERE from WHERE. [#11051](https://github.com/ClickHouse/ClickHouse/pull/11051) ([Artem Zuikov](https://github.com/4ertus2)). +* Fixes the potential missed data during termination of Kafka engine table. [#11048](https://github.com/ClickHouse/ClickHouse/pull/11048) ([filimonov](https://github.com/filimonov)). +* Fixed parseDateTime64BestEffort argument resolution bugs. [#10925](https://github.com/ClickHouse/ClickHouse/issues/10925). [#11038](https://github.com/ClickHouse/ClickHouse/pull/11038) ([Vasily Nemkov](https://github.com/Enmk)). +* Now it's possible to `ADD/DROP` and `RENAME` the same one column in a single `ALTER` query. Exception message for simultaneous `MODIFY` and `RENAME` became more clear. Partially fixes [#10669](https://github.com/ClickHouse/ClickHouse/issues/10669). [#11037](https://github.com/ClickHouse/ClickHouse/pull/11037) ([alesapin](https://github.com/alesapin)). +* Fixed parsing of S3 URLs. [#11036](https://github.com/ClickHouse/ClickHouse/pull/11036) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix very rare potential use-after-free error in MergeTree if table was not created successfully. [#10986](https://github.com/ClickHouse/ClickHouse/pull/10986) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix metadata (relative path for rename) and data (relative path for symlink) handling for Atomic database. [#10980](https://github.com/ClickHouse/ClickHouse/pull/10980) ([Azat Khuzhin](https://github.com/azat)). +* Fix server crash on concurrent `ALTER` and `DROP DATABASE` queries with `Atomic` database engine. [#10968](https://github.com/ClickHouse/ClickHouse/pull/10968) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect raw data size in method getRawData(). [#10964](https://github.com/ClickHouse/ClickHouse/pull/10964) ([Igr](https://github.com/ObjatieGroba)). +* Fix incompatibility of two-level aggregation between versions 20.1 and earlier. This incompatibility happens when different versions of ClickHouse are used on initiator node and remote nodes and the size of GROUP BY result is large and aggregation is performed by a single String field. It leads to several unmerged rows for a single key in result. [#10952](https://github.com/ClickHouse/ClickHouse/pull/10952) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Avoid sending partially written files by the DistributedBlockOutputStream. [#10940](https://github.com/ClickHouse/ClickHouse/pull/10940) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in `SELECT count(notNullIn(NULL, []))`. [#10920](https://github.com/ClickHouse/ClickHouse/pull/10920) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#10910](https://github.com/ClickHouse/ClickHouse/pull/10910) ([filimonov](https://github.com/filimonov)). +* Now it's possible to execute multiple `ALTER RENAME` like `a TO b, c TO a`. [#10895](https://github.com/ClickHouse/ClickHouse/pull/10895) ([alesapin](https://github.com/alesapin)). +* Fix possible race which could happen when you get result from aggregate function state from multiple thread for the same column. The only way (which I found) it can happen is when you use `finalizeAggregation` function while reading from table with `Memory` engine which stores `AggregateFunction` state for `quanite*` function. [#10890](https://github.com/ClickHouse/ClickHouse/pull/10890) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix backward compatibility with tuples in Distributed tables. [#10889](https://github.com/ClickHouse/ClickHouse/pull/10889) ([Anton Popov](https://github.com/CurtizJ)). +* Fix SIGSEGV in StringHashTable (if such key does not exist). [#10870](https://github.com/ClickHouse/ClickHouse/pull/10870) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `WATCH` hangs after `LiveView` table was dropped from database with `Atomic` engine. [#10859](https://github.com/ClickHouse/ClickHouse/pull/10859) ([tavplubix](https://github.com/tavplubix)). +* Fixed bug in `ReplicatedMergeTree` which might cause some `ALTER` on `OPTIMIZE` query to hang waiting for some replica after it become inactive. [#10849](https://github.com/ClickHouse/ClickHouse/pull/10849) ([tavplubix](https://github.com/tavplubix)). +* Now constraints are updated if the column participating in `CONSTRAINT` expression was renamed. Fixes [#10844](https://github.com/ClickHouse/ClickHouse/issues/10844). [#10847](https://github.com/ClickHouse/ClickHouse/pull/10847) ([alesapin](https://github.com/alesapin)). +* Fix potential read of uninitialized memory in cache dictionary. [#10834](https://github.com/ClickHouse/ClickHouse/pull/10834) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix columns order after Block::sortColumns() (also add a test that shows that it affects some real use case - Buffer engine). [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)). +* Fix the issue with ODBC bridge when no quoting of identifiers is requested. This fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan and MSan report in DateLUT. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make use of `src_type` for correct type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Get rid of old libunwind patches. https://github.com/ClickHouse-Extras/libunwind/commit/500aa227911bd185a94bfc071d68f4d3b03cb3b1#r39048012 This allows to disable `-fno-omit-frame-pointer` in `clang` builds that improves performance at least by 1% in average. [#10761](https://github.com/ClickHouse/ClickHouse/pull/10761) ([Amos Bird](https://github.com/amosbird)). +* Fix avgWeighted when using floating-point weight over multiple shards. [#10758](https://github.com/ClickHouse/ClickHouse/pull/10758) ([Baudouin Giard](https://github.com/bgiard)). +* Fix `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix combinator -OrNull and -OrDefault when combined with -State. [#10741](https://github.com/ClickHouse/ClickHouse/pull/10741) ([hcz](https://github.com/hczhcz)). +* Fix crash in `generateRandom` with nested types. Fixes [#10583](https://github.com/ClickHouse/ClickHouse/issues/10583). [#10734](https://github.com/ClickHouse/ClickHouse/pull/10734) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix data corruption for `LowCardinality(FixedString)` key column in `SummingMergeTree` which could have happened after merge. Fixes [#10489](https://github.com/ClickHouse/ClickHouse/issues/10489). [#10721](https://github.com/ClickHouse/ClickHouse/pull/10721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization. [#10715](https://github.com/ClickHouse/ClickHouse/pull/10715) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible buffer overflow in function `h3EdgeAngle`. [#10711](https://github.com/ClickHouse/ClickHouse/pull/10711) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix disappearing totals. Totals could have being filtered if query had had join or subquery with external where condition. Fixes [#10674](https://github.com/ClickHouse/ClickHouse/issues/10674). [#10698](https://github.com/ClickHouse/ClickHouse/pull/10698) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix atomicity of HTTP insert. This fixes [#9666](https://github.com/ClickHouse/ClickHouse/issues/9666). [#10687](https://github.com/ClickHouse/ClickHouse/pull/10687) ([Andrew Onyshchuk](https://github.com/oandrew)). +* Fix multiple usages of `IN` operator with the identical set in one query. [#10686](https://github.com/ClickHouse/ClickHouse/pull/10686) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed bug, which causes http requests stuck on client close when `readonly=2` and `cancel_http_readonly_queries_on_client_close=1`. Fixes [#7939](https://github.com/ClickHouse/ClickHouse/issues/7939), [#7019](https://github.com/ClickHouse/ClickHouse/issues/7019), [#7736](https://github.com/ClickHouse/ClickHouse/issues/7736), [#7091](https://github.com/ClickHouse/ClickHouse/issues/7091). [#10684](https://github.com/ClickHouse/ClickHouse/pull/10684) ([tavplubix](https://github.com/tavplubix)). +* Fix order of parameters in AggregateTransform constructor. [#10667](https://github.com/ClickHouse/ClickHouse/pull/10667) ([palasonic1](https://github.com/palasonic1)). +* Fix the lack of parallel execution of remote queries with `distributed_aggregation_memory_efficient` enabled. Fixes [#10655](https://github.com/ClickHouse/ClickHouse/issues/10655). [#10664](https://github.com/ClickHouse/ClickHouse/pull/10664) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible incorrect number of rows for queries with `LIMIT`. Fixes [#10566](https://github.com/ClickHouse/ClickHouse/issues/10566), [#10709](https://github.com/ClickHouse/ClickHouse/issues/10709). [#10660](https://github.com/ClickHouse/ClickHouse/pull/10660) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug which locks concurrent alters when table has a lot of parts. [#10659](https://github.com/ClickHouse/ClickHouse/pull/10659) ([alesapin](https://github.com/alesapin)). +* Fix nullptr dereference in StorageBuffer if server was shutdown before table startup. [#10641](https://github.com/ClickHouse/ClickHouse/pull/10641) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix predicates optimization for distributed queries (`enable_optimize_predicate_expression=1`) for queries with `HAVING` section (i.e. when filtering on the server initiator is required), by preserving the order of expressions (and this is enough to fix), and also force aggregator use column names over indexes. Fixes: [#10613](https://github.com/ClickHouse/ClickHouse/issues/10613), [#11413](https://github.com/ClickHouse/ClickHouse/issues/11413). [#10621](https://github.com/ClickHouse/ClickHouse/pull/10621) ([Azat Khuzhin](https://github.com/azat)). +* Fix optimize_skip_unused_shards with LowCardinality. [#10611](https://github.com/ClickHouse/ClickHouse/pull/10611) ([Azat Khuzhin](https://github.com/azat)). +* Fix segfault in StorageBuffer when exception on server startup. Fixes [#10550](https://github.com/ClickHouse/ClickHouse/issues/10550). [#10609](https://github.com/ClickHouse/ClickHouse/pull/10609) ([tavplubix](https://github.com/tavplubix)). +* On `SYSTEM DROP DNS CACHE` query also drop caches, which are used to check if user is allowed to connect from some IP addresses. [#10608](https://github.com/ClickHouse/ClickHouse/pull/10608) ([tavplubix](https://github.com/tavplubix)). +* Fixed incorrect scalar results inside inner query of `MATERIALIZED VIEW` in case if this query contained dependent table. [#10603](https://github.com/ClickHouse/ClickHouse/pull/10603) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed handling condition variable for synchronous mutations. In some cases signals to that condition variable could be lost. [#10588](https://github.com/ClickHouse/ClickHouse/pull/10588) ([Vladimir Chebotarev](https://github.com/excitoon)). +* This PR fixes possible crash when `createDictionary()` is called before `loadStoredObject()` has finished. [#10587](https://github.com/ClickHouse/ClickHouse/pull/10587) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)). +* Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)). +* Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)). +* Disable GROUP BY sharding_key optimization by default (`optimize_distributed_group_by_sharding_key` had been introduced and turned of by default, due to trickery of sharding_key analyzing, simple example is `if` in sharding key) and fix it for WITH ROLLUP/CUBE/TOTALS. [#10516](https://github.com/ClickHouse/ClickHouse/pull/10516) ([Azat Khuzhin](https://github.com/azat)). +* Fixes: [#10263](https://github.com/ClickHouse/ClickHouse/issues/10263) (after that PR dist send via INSERT had been postponing on each INSERT) Fixes: [#8756](https://github.com/ClickHouse/ClickHouse/issues/8756) (that PR breaks distributed sends with all of the following conditions met (unlikely setup for now I guess): `internal_replication == false`, multiple local shards (activates the hardlinking code) and `distributed_storage_policy` (makes `link(2)` fails on `EXDEV`)). [#10486](https://github.com/ClickHouse/ClickHouse/pull/10486) ([Azat Khuzhin](https://github.com/azat)). +* Fixed error with "max_rows_to_sort" limit. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Improvement + +* Rewrite code for `optimize_arithmetic_operations_in_aggregate_functions` optimisation. [#11899](https://github.com/ClickHouse/ClickHouse/pull/11899) ([Artem Zuikov](https://github.com/4ertus2)). +* When reading Decimal value, cut extra digits after point. This behaviour is more compatible with MySQL and PostgreSQL. This fixes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11831](https://github.com/ClickHouse/ClickHouse/pull/11831) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Slightly improve diagnostic of reading decimal from string. This closes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11829](https://github.com/ClickHouse/ClickHouse/pull/11829) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix sleep invocation in signal handler. It was sleeping for less amount of time than expected. [#11825](https://github.com/ClickHouse/ClickHouse/pull/11825) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added `hostname` as an alias to function `hostName`. This feature was suggested by Victor Tarnavskiy from Yandex.Metrica. [#11821](https://github.com/ClickHouse/ClickHouse/pull/11821) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove leader election, step 3: remove yielding of leadership; remove sending queries to leader. [#11795](https://github.com/ClickHouse/ClickHouse/pull/11795) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add more `jemalloc` statistics to `system.asynchronous_metrics`, and ensure that we see up-to-date values for them. [#11748](https://github.com/ClickHouse/ClickHouse/pull/11748) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Add `cast_keep_nullable` setting. If set `CAST(something_nullable AS Type)` return `Nullable(Type)`. [#11733](https://github.com/ClickHouse/ClickHouse/pull/11733) ([Artem Zuikov](https://github.com/4ertus2)). +* Added support for distributed `DDL` (update/delete/drop partition) on cross replication clusters. [#11703](https://github.com/ClickHouse/ClickHouse/pull/11703) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Emit warning instead of error in server log at startup if we cannot listen one of the listen addresses (e.g. IPv6 is unavailable inside Docker). Note that if server fails to listen all listed addresses, it will refuse to startup as before. This fixes [#4406](https://github.com/ClickHouse/ClickHouse/issues/4406). [#11687](https://github.com/ClickHouse/ClickHouse/pull/11687) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* When multiline query is printed to server log, the lines are joined. Make it to work correct in case of multiline string literals, identifiers and single-line comments. This fixes [#3853](https://github.com/ClickHouse/ClickHouse/issues/3853). [#11686](https://github.com/ClickHouse/ClickHouse/pull/11686) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Multiple names are now allowed in commands: CREATE USER, CREATE ROLE, ALTER USER, SHOW CREATE USER, SHOW GRANTS and so on. [#11670](https://github.com/ClickHouse/ClickHouse/pull/11670) ([Vitaly Baranov](https://github.com/vitlibar)). +* Clear password from command line in `clickhouse-client` and `clickhouse-benchmark` if the user has specified it with explicit value. This prevents password exposure by `ps` and similar tools. [#11665](https://github.com/ClickHouse/ClickHouse/pull/11665) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Don't use debug info from ELF file if it doesn't correspond to the running binary. It is needed to avoid printing wrong function names and source locations in stack traces. This fixes [#7514](https://github.com/ClickHouse/ClickHouse/issues/7514). [#11657](https://github.com/ClickHouse/ClickHouse/pull/11657) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added column `position` to `system.columns` table and `column_position` to `system.parts_columns` table. It contains ordinal position of a column in a table starting with 1. This closes [#7744](https://github.com/ClickHouse/ClickHouse/issues/7744). [#11655](https://github.com/ClickHouse/ClickHouse/pull/11655) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow to DROP replicated table if the metadata in ZooKeeper was already removed and does not exist (this is also the case when using TestKeeper for testing and the server was restarted). Allow to RENAME replicated table even if there is an error communicating with ZooKeeper. This fixes [#10720](https://github.com/ClickHouse/ClickHouse/issues/10720). [#11652](https://github.com/ClickHouse/ClickHouse/pull/11652) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Skip empty parameters in requested URL. They may appear when you write `http://localhost:8123/?&a=b` or `http://localhost:8123/?a=b&&c=d`. This closes [#10749](https://github.com/ClickHouse/ClickHouse/issues/10749). [#11651](https://github.com/ClickHouse/ClickHouse/pull/11651) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow using `groupArrayArray` and `groupUniqArrayArray` as `SimpleAggregateFunction`. [#11650](https://github.com/ClickHouse/ClickHouse/pull/11650) ([Volodymyr Kuznetsov](https://github.com/ksvladimir)). +* Allow comparison with constant strings by implicit conversions when analysing index conditions on other types. This may close [#11630](https://github.com/ClickHouse/ClickHouse/issues/11630). [#11648](https://github.com/ClickHouse/ClickHouse/pull/11648) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow comparison of numbers with constant string in comparison operators, IN and VALUES sections. [#11647](https://github.com/ClickHouse/ClickHouse/pull/11647) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add round_robin load_balancing. [#11645](https://github.com/ClickHouse/ClickHouse/pull/11645) ([Azat Khuzhin](https://github.com/azat)). +* https://github.com/ClickHouse/ClickHouse/pull/7572#issuecomment-642815377 Support config default HTTPHandlers. [#11628](https://github.com/ClickHouse/ClickHouse/pull/11628) ([Winter Zhang](https://github.com/zhang2014)). +* Make more input format work with Kafka engine. Fix the issue with premature flushes. Fix the performance issue when `kafka_num_consumers` is greater than number of partitions in topic. [#11599](https://github.com/ClickHouse/ClickHouse/pull/11599) ([filimonov](https://github.com/filimonov)). +* Improve `multiple_joins_rewriter_version=2` logic. Fix unknown columns error for lambda aliases. [#11587](https://github.com/ClickHouse/ClickHouse/pull/11587) ([Artem Zuikov](https://github.com/4ertus2)). +* Optimize memory usage when reading a response from an S3 HTTP client. [#11561](https://github.com/ClickHouse/ClickHouse/pull/11561) ([Pavel Kovalenko](https://github.com/Jokser)). +* Better exception message when cannot parse columns declaration list. This closes [#10403](https://github.com/ClickHouse/ClickHouse/issues/10403). [#11537](https://github.com/ClickHouse/ClickHouse/pull/11537) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve `enable_optimize_predicate_expression=1` logic for VIEW. [#11513](https://github.com/ClickHouse/ClickHouse/pull/11513) ([Artem Zuikov](https://github.com/4ertus2)). +* Adding support for PREWHERE in live view tables. [#11495](https://github.com/ClickHouse/ClickHouse/pull/11495) ([vzakaznikov](https://github.com/vzakaznikov)). +* Automatically update DNS cache, which is used to check if user is allowed to connect from an address. [#11487](https://github.com/ClickHouse/ClickHouse/pull/11487) ([tavplubix](https://github.com/tavplubix)). +* ON CLUSTER support for SYSTEM {FLUSH DISTRIBUTED,STOP/START DISTRIBUTED SEND}. [#11415](https://github.com/ClickHouse/ClickHouse/pull/11415) ([Azat Khuzhin](https://github.com/azat)). +* Add system.distribution_queue table. [#11394](https://github.com/ClickHouse/ClickHouse/pull/11394) ([Azat Khuzhin](https://github.com/azat)). +* Support for all format settings in Kafka, expose some setting on table level, adjust the defaults for better performance. [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388) ([filimonov](https://github.com/filimonov)). +* OPTIMIZE FINAL will force merge even if concurrent merges are performed. This closes [#11309](https://github.com/ClickHouse/ClickHouse/issues/11309) and closes [#11322](https://github.com/ClickHouse/ClickHouse/issues/11322). [#11346](https://github.com/ClickHouse/ClickHouse/pull/11346) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Suppress output of cancelled queries in clickhouse-client. In previous versions result may continue to print in terminal even after you press Ctrl+C to cancel query. This closes [#9473](https://github.com/ClickHouse/ClickHouse/issues/9473). [#11342](https://github.com/ClickHouse/ClickHouse/pull/11342) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better log messages in while reloading configuration. [#11341](https://github.com/ClickHouse/ClickHouse/pull/11341) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove trailing whitespaces from formatted queries in `clickhouse-client` or `clickhouse-format` in some cases. [#11325](https://github.com/ClickHouse/ClickHouse/pull/11325) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add setting "output_format_pretty_max_value_width". If value is longer, it will be cut to avoid output of too large values in terminal. This closes [#11140](https://github.com/ClickHouse/ClickHouse/issues/11140). [#11324](https://github.com/ClickHouse/ClickHouse/pull/11324) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better exception message in case when there is shortage of memory mappings. This closes [#11027](https://github.com/ClickHouse/ClickHouse/issues/11027). [#11316](https://github.com/ClickHouse/ClickHouse/pull/11316) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support (U)Int8, (U)Int16, Date in ASOF JOIN. [#11301](https://github.com/ClickHouse/ClickHouse/pull/11301) ([Artem Zuikov](https://github.com/4ertus2)). +* Update librdkafka to version [1.4.2](https://github.com/edenhill/librdkafka/releases/tag/v1.4.2). [#11256](https://github.com/ClickHouse/ClickHouse/pull/11256) ([filimonov](https://github.com/filimonov)). +* Support kafka_client_id parameter for Kafka tables. It also changes the default `client.id` used by ClickHouse when communicating with Kafka to be more verbose and usable. [#11252](https://github.com/ClickHouse/ClickHouse/pull/11252) ([filimonov](https://github.com/filimonov)). +* Keep the value of `DistributedFilesToInsert` metric on exceptions. In previous versions, the value was set when we are going to send some files, but it is zero, if there was an exception and some files are still pending. Now it corresponds to the number of pending files in filesystem. [#11220](https://github.com/ClickHouse/ClickHouse/pull/11220) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add support for multi-word data type names (such as `DOUBLE PRECISION` and `CHAR VARYING`) for better SQL compatibility. [#11214](https://github.com/ClickHouse/ClickHouse/pull/11214) ([Павел Потемкин](https://github.com/Potya)). +* When parsing C-style backslash escapes in string literals, VALUES and various text formats (this is an extension to SQL standard that is endemic for ClickHouse and MySQL), keep backslash if unknown escape sequence is found (e.g. `\%` or `\w`) that will make usage of `LIKE` and `match` regular expressions more convenient (it's enough to write `name LIKE 'used\_cars'` instead of `name LIKE 'used\\_cars'`) and more compatible at the same time. This fixes [#10922](https://github.com/ClickHouse/ClickHouse/issues/10922). [#11208](https://github.com/ClickHouse/ClickHouse/pull/11208) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* The query log is now enabled by default. [#11184](https://github.com/ClickHouse/ClickHouse/pull/11184) ([Ivan Blinkov](https://github.com/blinkov)). +* Resolved [#7224](https://github.com/ClickHouse/ClickHouse/issues/7224): added `FailedQuery`, `FailedSelectQuery` and `FailedInsertQuery` metrics to `system.events` table. [#11151](https://github.com/ClickHouse/ClickHouse/pull/11151) ([Nikita Orlov](https://github.com/naorlov)). +* Add port() function (to extract port from URL). [#11120](https://github.com/ClickHouse/ClickHouse/pull/11120) ([Azat Khuzhin](https://github.com/azat)). +* Enable percpu_arena:percpu for jemalloc (This will reduce memory fragmentation due to thread pool). [#11084](https://github.com/ClickHouse/ClickHouse/pull/11084) ([Azat Khuzhin](https://github.com/azat)). +* Show authentication type in table system.users and while executing SHOW CREATE USER query. [#11080](https://github.com/ClickHouse/ClickHouse/pull/11080) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now `dictGet*` functions accept table names. [#11050](https://github.com/ClickHouse/ClickHouse/pull/11050) ([Vitaly Baranov](https://github.com/vitlibar)). +* Apply `TTL` for old data, after `ALTER MODIFY TTL` query. This behaviour is controlled by setting `materialize_ttl_after_modify`, which is enabled by default. [#11042](https://github.com/ClickHouse/ClickHouse/pull/11042) ([Anton Popov](https://github.com/CurtizJ)). +* Add `NCHAR` and `NVARCHAR` synonims for data types. [#11025](https://github.com/ClickHouse/ClickHouse/pull/11025) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove data on explicit `DROP DATABASE` for `Memory` database engine. Fixes [#10557](https://github.com/ClickHouse/ClickHouse/issues/10557). [#11021](https://github.com/ClickHouse/ClickHouse/pull/11021) ([tavplubix](https://github.com/tavplubix)). +* Set thread names for internal threads of rdkafka library. Make logs from rdkafka available in server logs. [#10983](https://github.com/ClickHouse/ClickHouse/pull/10983) ([Azat Khuzhin](https://github.com/azat)). +* Some additions and cleanup for [#10232](https://github.com/ClickHouse/ClickHouse/issues/10232). [#10934](https://github.com/ClickHouse/ClickHouse/pull/10934) ([Artem Zuikov](https://github.com/4ertus2)). +* Support for unicode whitespaces in queries. This helps when queries are copy-pasted from Word or from web page. This fixes [#10896](https://github.com/ClickHouse/ClickHouse/issues/10896). [#10903](https://github.com/ClickHouse/ClickHouse/pull/10903) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow large UInt types as the index in function `tupleElement`. [#10874](https://github.com/ClickHouse/ClickHouse/pull/10874) ([hcz](https://github.com/hczhcz)). +* Respect prefer_localhost_replica/load_balancing on INSERT into Distributed. [#10867](https://github.com/ClickHouse/ClickHouse/pull/10867) ([Azat Khuzhin](https://github.com/azat)). +* Introduce `min_insert_block_size_rows_for_materialized_views ` , `min_insert_block_size_bytes_for_materialized_views` settings. This settings are similar to `min_insert_block_size_rows` and `min_insert_block_size_bytes`, but applied only for blocks inserted into `MATERIALIZED VIEW`. It helps to control blocks squashing while pushing to MVs and avoid excessive memory usage. [#10858](https://github.com/ClickHouse/ClickHouse/pull/10858) ([Azat Khuzhin](https://github.com/azat)). +* The `clickhouse-format` tool is now able to format multiple queries when the `-n` argument is used. [#10852](https://github.com/ClickHouse/ClickHouse/pull/10852) ([Darío](https://github.com/dgrr)). +* Get rid of exception from replicated queue during server shutdown. Fixes [#10819](https://github.com/ClickHouse/ClickHouse/issues/10819). [#10841](https://github.com/ClickHouse/ClickHouse/pull/10841) ([alesapin](https://github.com/alesapin)). +* Ensure that `varSamp`, `varPop` cannot return negative results due to numerical errors and that `stddevSamp`, `stddevPop` cannot be calculated from negative variance. This fixes [#10532](https://github.com/ClickHouse/ClickHouse/issues/10532). [#10829](https://github.com/ClickHouse/ClickHouse/pull/10829) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better DNS exception message. This fixes [#10813](https://github.com/ClickHouse/ClickHouse/issues/10813). [#10828](https://github.com/ClickHouse/ClickHouse/pull/10828) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Possibility to configure proxy-resolver for DiskS3. [#10744](https://github.com/ClickHouse/ClickHouse/pull/10744) ([Pavel Kovalenko](https://github.com/Jokser)). +* Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Change HTTP response code in case of some parse errors to 400 Bad Request. This fix [#10636](https://github.com/ClickHouse/ClickHouse/issues/10636). [#10640](https://github.com/ClickHouse/ClickHouse/pull/10640) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Print a message if clickhouse-client is newer than clickhouse-server. [#10627](https://github.com/ClickHouse/ClickHouse/pull/10627) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make pointInPolygon work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)). +* Added `move_ttl_info` to `system.parts` in order to provide introspection of move TTL functionality. [#10591](https://github.com/ClickHouse/ClickHouse/pull/10591) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Possibility to work with S3 through proxies. [#10576](https://github.com/ClickHouse/ClickHouse/pull/10576) ([Pavel Kovalenko](https://github.com/Jokser)). +* - Adding support for `INSERT INTO [db.]table WATCH` query. [#10498](https://github.com/ClickHouse/ClickHouse/pull/10498) ([vzakaznikov](https://github.com/vzakaznikov)). +* Added system tables for users, roles, grants, settings profiles, quotas, row policies; added commands SHOW USER, SHOW [CURRENT|ENABLED] ROLES, SHOW SETTINGS PROFILES. [#10387](https://github.com/ClickHouse/ClickHouse/pull/10387) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow to pass quota_key in clickhouse-client. This closes [#10227](https://github.com/ClickHouse/ClickHouse/issues/10227). [#10270](https://github.com/ClickHouse/ClickHouse/pull/10270) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Performance Improvement + +* This optimization eliminates min/max/any aggregators of GROUP BY keys in SELECT section. [#11667](https://github.com/ClickHouse/ClickHouse/pull/11667) ([xPoSx](https://github.com/xPoSx)). +* Allow multiple replicas to assign merges, mutations, partition drop, move and replace concurrently. This closes [#10367](https://github.com/ClickHouse/ClickHouse/issues/10367). [#11639](https://github.com/ClickHouse/ClickHouse/pull/11639) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* New optimization that takes all operations out of "any" function. [#11529](https://github.com/ClickHouse/ClickHouse/pull/11529) ([Ruslan](https://github.com/kamalov-ruslan)). +* Improve performance of `clickhouse-client` in interactive mode when Pretty formats are used. In previous versions, significant amount of time can be spent calculating visible width of UTF-8 string. This closes [#11323](https://github.com/ClickHouse/ClickHouse/issues/11323). [#11323](https://github.com/ClickHouse/ClickHouse/pull/11323) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance for INSERT queries via INSERT SELECT or INSERT with clickhouse-client when small blocks are generated (typical case with parallel parsing). This fixes [#11275](https://github.com/ClickHouse/ClickHouse/issues/11275). Fix the issue that CONSTRAINTs were not working for DEFAULT fields. This fixes [#11273](https://github.com/ClickHouse/ClickHouse/issues/11273). Fix the issue that CONSTRAINTS were ignored for TEMPORARY tables. This fixes [#11274](https://github.com/ClickHouse/ClickHouse/issues/11274). [#11276](https://github.com/ClickHouse/ClickHouse/pull/11276) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improved performance for queries with `ORDER BY` and small `LIMIT` (less, then `max_block_size`). [#11171](https://github.com/ClickHouse/ClickHouse/pull/11171) ([Albert Kidrachev](https://github.com/Provet)). +* Enable mlock of clickhouse binary by default. It will prevent clickhouse executable from being paged out under high IO load. [#11139](https://github.com/ClickHouse/ClickHouse/pull/11139) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make queries with `sum` aggregate function and without GROUP BY keys to run multiple times faster. [#10992](https://github.com/ClickHouse/ClickHouse/pull/10992) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improving radix sort by removing some redundant data moves. [#10981](https://github.com/ClickHouse/ClickHouse/pull/10981) ([Arslan Gumerov](https://github.com/g-arslan)). +* Get dictionary and check access rights only once per each call of any function reading external dictionaries. [#10928](https://github.com/ClickHouse/ClickHouse/pull/10928) ([Vitaly Baranov](https://github.com/vitlibar)). +* Sort bigger parts of the left table in MergeJoin. Buffer left blocks in memory. Add `partial_merge_join_left_table_buffer_bytes` setting to manage the left blocks buffers sizes. [#10601](https://github.com/ClickHouse/ClickHouse/pull/10601) ([Artem Zuikov](https://github.com/4ertus2)). +* Remove duplicate ORDER BY and DISTINCT from subqueries. [#10067](https://github.com/ClickHouse/ClickHouse/pull/10067) ([Mikhail Malafeev](https://github.com/demo-99)). +* This feature eliminates functions of other keys in GROUP BY section. [#10051](https://github.com/ClickHouse/ClickHouse/pull/10051) ([xPoSx](https://github.com/xPoSx)). +* New optimization that takes arithmetic operations out of aggregate functions. [#10047](https://github.com/ClickHouse/ClickHouse/pull/10047) ([Ruslan](https://github.com/kamalov-ruslan)). +* Optimization of GROUP BY with respect to table sorting key. [#9113](https://github.com/ClickHouse/ClickHouse/pull/9113) ([dimarub2000](https://github.com/dimarub2000)). + +#### Build/Testing/Packaging Improvement + +* Remove dependency on `tzdata`: do not fail if `/usr/share/zoneinfo` directory does not exist. Note that all timezones work in ClickHouse even without tzdata installed in system. [#11827](https://github.com/ClickHouse/ClickHouse/pull/11827) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Print compiler build id in crash messages. It will make us slightly more certain about what binary has crashed. Added new function `buildId`. [#11824](https://github.com/ClickHouse/ClickHouse/pull/11824) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added a test to ensure that mutations continue to work after FREEZE query. [#11820](https://github.com/ClickHouse/ClickHouse/pull/11820) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Don't allow tests with "fail" substring in their names because it makes looking at the tests results in browser less convenient when you type Ctrl+F and search for "fail". [#11817](https://github.com/ClickHouse/ClickHouse/pull/11817) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Removes unused imports from HTTPHandlerFactory. [#11660](https://github.com/ClickHouse/ClickHouse/pull/11660) ([Bharat Nallan](https://github.com/bharatnc)). +* Added a random sampling of instances where copier is executed. It is needed to avoid `Too many simultaneous queries` error. Also increased timeout and decreased fault probability. [#11573](https://github.com/ClickHouse/ClickHouse/pull/11573) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix missed include for `std::move` used at line 17. [#11525](https://github.com/ClickHouse/ClickHouse/pull/11525) ([Matwey V. Kornilov](https://github.com/matwey)). +* Speed up build by removing old example programs. Also found some orphan functional test. [#11486](https://github.com/ClickHouse/ClickHouse/pull/11486) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Increase ccache size for builds in CI. [#11450](https://github.com/ClickHouse/ClickHouse/pull/11450) ([alesapin](https://github.com/alesapin)). +* Leave only unit_tests_dbms in deb build. [#11429](https://github.com/ClickHouse/ClickHouse/pull/11429) ([Ilya Yatsishin](https://github.com/qoega)). +* Refactor CMake build files. [#11390](https://github.com/ClickHouse/ClickHouse/pull/11390) ([Ivan](https://github.com/abyss7)). +* Fix several flaky integration tests. [#11355](https://github.com/ClickHouse/ClickHouse/pull/11355) ([alesapin](https://github.com/alesapin)). +* Add support for unit tests run with UBSan. [#11345](https://github.com/ClickHouse/ClickHouse/pull/11345) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove redundant timeout from integration test `test_insertion_sync_fails_with_timeout`. [#11343](https://github.com/ClickHouse/ClickHouse/pull/11343) ([alesapin](https://github.com/alesapin)). +* Better check for hung queries in clickhouse-test. [#11321](https://github.com/ClickHouse/ClickHouse/pull/11321) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Emit a warning if server was build in debug or with sanitizers. [#11304](https://github.com/ClickHouse/ClickHouse/pull/11304) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now clickhouse-test check the server aliveness before tests run. [#11285](https://github.com/ClickHouse/ClickHouse/pull/11285) ([alesapin](https://github.com/alesapin)). +* Fix potentially flacky test `00731_long_merge_tree_select_opened_files.sh`. It does not fail frequently but we have discovered potential race condition in this test while experimenting with ThreadFuzzer: [#9814](https://github.com/ClickHouse/ClickHouse/issues/9814) See [link](https://clickhouse-test-reports.s3.yandex.net/9814/40e3023e215df22985d275bf85f4d2290897b76b/functional_stateless_tests_(unbundled).html#fail1) for the example. [#11270](https://github.com/ClickHouse/ClickHouse/pull/11270) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Repeat test in CI if `curl` invocation was timed out. It is possible due to system hangups for 10+ seconds that are typical in our CI infrastructure. This fixes [#11267](https://github.com/ClickHouse/ClickHouse/issues/11267). [#11268](https://github.com/ClickHouse/ClickHouse/pull/11268) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add a test for Join table engine from @donmikel. This closes [#9158](https://github.com/ClickHouse/ClickHouse/issues/9158). [#11265](https://github.com/ClickHouse/ClickHouse/pull/11265) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix several non significant errors in unit tests. [#11262](https://github.com/ClickHouse/ClickHouse/pull/11262) ([alesapin](https://github.com/alesapin)). +* Now parts of linker command for `cctz` library will not be shuffled with other libraries. [#11213](https://github.com/ClickHouse/ClickHouse/pull/11213) ([alesapin](https://github.com/alesapin)). +* Split /programs/server into actual program and library. [#11186](https://github.com/ClickHouse/ClickHouse/pull/11186) ([Ivan](https://github.com/abyss7)). +* Improve build scripts for protobuf & gRPC. [#11172](https://github.com/ClickHouse/ClickHouse/pull/11172) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable performance test that was not working. [#11158](https://github.com/ClickHouse/ClickHouse/pull/11158) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Create root S3 bucket for tests before any CH instance is started. [#11142](https://github.com/ClickHouse/ClickHouse/pull/11142) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add performance test for non-constant polygons. [#11141](https://github.com/ClickHouse/ClickHouse/pull/11141) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make `system_tables_lazy_load` false by default. [#11029](https://github.com/ClickHouse/ClickHouse/pull/11029) ([Azat Khuzhin](https://github.com/azat)). +* Fixing 00979_live_view_watch_continuous_aggregates test. [#11024](https://github.com/ClickHouse/ClickHouse/pull/11024) ([vzakaznikov](https://github.com/vzakaznikov)). +* Add ability to run zookeeper in integration tests over tmpfs. [#11002](https://github.com/ClickHouse/ClickHouse/pull/11002) ([alesapin](https://github.com/alesapin)). +* Enable clang-tidy for programs and utils. [#10991](https://github.com/ClickHouse/ClickHouse/pull/10991) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Wait for odbc-bridge with exponential backoff. Previous wait time of 200 ms was not enough in our CI environment. [#10990](https://github.com/ClickHouse/ClickHouse/pull/10990) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix non-deterministic test. [#10989](https://github.com/ClickHouse/ClickHouse/pull/10989) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Return tzdata to build images and as dependency to .deb package. [#10929](https://github.com/ClickHouse/ClickHouse/pull/10929) ([alesapin](https://github.com/alesapin)). +* Added a test for empty external data. [#10926](https://github.com/ClickHouse/ClickHouse/pull/10926) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Database is recreated for every test. This improves separation of tests. [#10902](https://github.com/ClickHouse/ClickHouse/pull/10902) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Prepare to add MSan and UBSan stress tests. [#10871](https://github.com/ClickHouse/ClickHouse/pull/10871) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added more asserts in columns code. [#10833](https://github.com/ClickHouse/ClickHouse/pull/10833) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better cooperation with sanitizers. Print information about query_id in the message of sanitizer failure. [#10832](https://github.com/ClickHouse/ClickHouse/pull/10832) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix obvious race condition in "Split build smoke test" check. [#10820](https://github.com/ClickHouse/ClickHouse/pull/10820) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix (false) MSan report in MergeTreeIndexFullText. The issue first appeared in [#9968](https://github.com/ClickHouse/ClickHouse/issues/9968). [#10801](https://github.com/ClickHouse/ClickHouse/pull/10801) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add MSan suppression for MariaDB Client library. [#10800](https://github.com/ClickHouse/ClickHouse/pull/10800) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* GRPC make couldn't find protobuf files, changed make file by adding the right link. [#10794](https://github.com/ClickHouse/ClickHouse/pull/10794) ([mnkonkova](https://github.com/mnkonkova)). +* Enable extra warnings for base, utils, programs. [#10779](https://github.com/ClickHouse/ClickHouse/pull/10779) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Suppressions of warnings from libraries was mistakenly declared as public in [#10396](https://github.com/ClickHouse/ClickHouse/issues/10396). [#10776](https://github.com/ClickHouse/ClickHouse/pull/10776) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Restore a patch that was accidentially deleted in [#10396](https://github.com/ClickHouse/ClickHouse/issues/10396). [#10774](https://github.com/ClickHouse/ClickHouse/pull/10774) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix performance tests errors, part 2. [#10773](https://github.com/ClickHouse/ClickHouse/pull/10773) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix performance test errors. [#10766](https://github.com/ClickHouse/ClickHouse/pull/10766) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update cross-builds to use clang-10 compiler. [#10724](https://github.com/ClickHouse/ClickHouse/pull/10724) ([Ivan](https://github.com/abyss7)). +* Update instruction to install RPM packages. This was suggested by Denis (TG login @ldviolet) and implemented by Arkady Shejn. [#10707](https://github.com/ClickHouse/ClickHouse/pull/10707) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Trying to fix tests/queries/0_stateless/01246_insert_into_watch_live_view.py test. [#10670](https://github.com/ClickHouse/ClickHouse/pull/10670) ([vzakaznikov](https://github.com/vzakaznikov)). +* Volumes and storages refactoring. [#10666](https://github.com/ClickHouse/ClickHouse/pull/10666) ([Gleb Novikov](https://github.com/NanoBjorn)). +* Update zstd to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixing and re-enabling 00979_live_view_watch_continuous_aggregates.py test. [#10658](https://github.com/ClickHouse/ClickHouse/pull/10658) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fix OOM in ASan stress test. [#10646](https://github.com/ClickHouse/ClickHouse/pull/10646) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report (adding zero to nullptr) in HashTable that appeared after migration to clang-10. [#10638](https://github.com/ClickHouse/ClickHouse/pull/10638) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove external call to `ld` (bfd) linker during tzdata processing in compile time. [#10634](https://github.com/ClickHouse/ClickHouse/pull/10634) ([alesapin](https://github.com/alesapin)). +* Allow to use lld to link blobs (resources). [#10632](https://github.com/ClickHouse/ClickHouse/pull/10632) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in LZ4 library. [#10631](https://github.com/ClickHouse/ClickHouse/pull/10631) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update LZ4 to the latest dev branch. It may fix the error under UBSan. [#10630](https://github.com/ClickHouse/ClickHouse/pull/10630) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added auto-generated machine-readable file with list of stable versions. [#10628](https://github.com/ClickHouse/ClickHouse/pull/10628) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `capnproto` version check for `capnp::UnalignedFlatArrayMessageReader`. [#10618](https://github.com/ClickHouse/ClickHouse/pull/10618) ([Matwey V. Kornilov](https://github.com/matwey)). +* Lower memory usage in tests. It may fix the issue that "address sanitizer is out of memory" in stress test. [#10617](https://github.com/ClickHouse/ClickHouse/pull/10617) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixing hard coded timeouts in new live view tests. [#10604](https://github.com/ClickHouse/ClickHouse/pull/10604) ([vzakaznikov](https://github.com/vzakaznikov)). +* Increasing timeout when opening a client in tests/queries/0_stateless/helpers/client.py. [#10599](https://github.com/ClickHouse/ClickHouse/pull/10599) ([vzakaznikov](https://github.com/vzakaznikov)). +* Enable ThinLTO for clang builds, continuation of https://github.com/ClickHouse/ClickHouse/pull/10435. [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)). +* Adding fuzzers and preparing for oss-fuzz integration. [#10546](https://github.com/ClickHouse/ClickHouse/pull/10546) ([kyprizel](https://github.com/kyprizel)). +* Fix UBSan report in Decimal parse. This fixes [#7540](https://github.com/ClickHouse/ClickHouse/issues/7540). [#10512](https://github.com/ClickHouse/ClickHouse/pull/10512) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix FreeBSD build. [#10150](https://github.com/ClickHouse/ClickHouse/pull/10150) ([Ivan](https://github.com/abyss7)). +* Add new build for query tests using pytest framework. [#10039](https://github.com/ClickHouse/ClickHouse/pull/10039) ([Ivan](https://github.com/abyss7)). + ## ClickHouse release v20.4 ### ClickHouse release v20.4.3.16-stable 2020-05-23 diff --git a/utils/simple-backport/changelog.sh b/utils/simple-backport/changelog.sh index 4b898f4c75c..c49783519c7 100755 --- a/utils/simple-backport/changelog.sh +++ b/utils/simple-backport/changelog.sh @@ -26,7 +26,9 @@ find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; s/^.*back[- ]*port[ed of]*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*cherry[- ]*pick[ed of]*#\([[:digit:]]\+\).*$/\1/Ip") -"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq > "changelog-prs.txt" +# awk is to filter out small task numbers from different task tracker, which are +# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. +"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." diff --git a/utils/simple-backport/format-changelog.py b/utils/simple-backport/format-changelog.py index 705d1903c78..ccda88c6809 100755 --- a/utils/simple-backport/format-changelog.py +++ b/utils/simple-backport/format-changelog.py @@ -1,12 +1,13 @@ #!/usr/bin/python3 -import os -import sys -import itertools import argparse -import json import collections +import fuzzywuzzy.fuzz +import itertools +import json +import os import re +import sys parser = argparse.ArgumentParser(description='Format changelog for given PRs.') parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs='?', default=sys.stdin, help='File with PR numbers, one per line.') @@ -26,7 +27,7 @@ def parse_one_pull_request(item): if lines: i = 0 while i < len(lines): - if re.match(r'(?i).*category.*:$', lines[i]): + if re.match(r'(?i)^[>*_ ]*change\s*log\s*category', lines[i]): i += 1 if i >= len(lines): break @@ -37,7 +38,7 @@ def parse_one_pull_request(item): break category = re.sub(r'^[-*\s]*', '', lines[i]) i += 1 - elif re.match(r'(?i)^\**\s*(Short description|Change\s*log entry)', lines[i]): + elif re.match(r'(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)', lines[i]): i += 1 # Can have one empty line between header and the entry itself. Filter it out. if i < len(lines) and not lines[i]: @@ -74,6 +75,11 @@ def parse_one_pull_request(item): return True +# This array gives the preferred category order, and is also used to +# normalize category names. +categories_preferred_order = ['Backward Incompatible Change', + 'New Feature', 'Bug Fix', 'Improvement', 'Performance Improvement', + 'Build/Testing/Packaging Improvement', 'Other'] category_to_pr = collections.defaultdict(lambda: []) users = {} @@ -84,6 +90,13 @@ for line in args.file: continue assert(pr['category']) + + # Normalize category name + for c in categories_preferred_order: + if fuzzywuzzy.fuzz.ratio(pr['category'], c) >= 90: + pr['category'] = c + break + category_to_pr[pr['category']].append(pr) user_id = pr['user']['id'] users[user_id] = json.loads(open(f'user{user_id}.json').read()) @@ -103,7 +116,6 @@ def print_category(category): print() # Print categories in preferred order -categories_preferred_order = ['Backward Incompatible Change', 'New Feature', 'Bug Fix', 'Improvement', 'Performance Improvement', 'Build/Testing/Packaging Improvement', 'Other'] for category in categories_preferred_order: if category in category_to_pr: print_category(category) From c9a41ba4ba3f0faa29a9f9b227ae805caa736080 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Jul 2020 12:09:27 +0300 Subject: [PATCH 201/330] Update CHANGELOG.md --- CHANGELOG.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a7b1d44b2c..23a5aa68116 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ * Use HTTP client for S3 based on Poco. [#11230](https://github.com/ClickHouse/ClickHouse/pull/11230) ([Pavel Kovalenko](https://github.com/Jokser)). * Add query performance metrics based on Linux `perf_events`. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Default S3 credentials and custom auth headers. [#11134](https://github.com/ClickHouse/ClickHouse/pull/11134) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). -* - Add's a hasSubStr function that allows for look for sub sequences in arrays. [#11071](https://github.com/ClickHouse/ClickHouse/pull/11071) ([Ryad Zenine](https://github.com/r-zenine)). +* Add's a hasSubStr function that allows for look for sub sequences in arrays. [#11071](https://github.com/ClickHouse/ClickHouse/pull/11071) ([Ryad Zenine](https://github.com/r-zenine)). * Now support NULL and NOT NULL modifiers for data types in create query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). * Added new functions to import/export DateTime64 as Int64 with various precision: `to-/fromUnixTimestamp64Milli/-Micro/-Nano`. [#10923](https://github.com/ClickHouse/ClickHouse/pull/10923) ([Vasily Nemkov](https://github.com/Enmk)). * Allow specifying `mongodb://` URI for MongoDB dictionaries. [#10915](https://github.com/ClickHouse/ClickHouse/pull/10915) ([Alexander Kuzmenkov](https://github.com/akuzm)). @@ -48,9 +48,8 @@ #### Bug Fix -* Fix rare crash caused by using Nullable column in prewhere condition. Continuation of [#11869](https://github.com/ClickHouse/ClickHouse/issues/11869). [#11895](https://github.com/ClickHouse/ClickHouse/pull/11895) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix potential floating point exception when parsing DateTime64. This fixes [#11374](https://github.com/ClickHouse/ClickHouse/issues/11374). [#11875](https://github.com/ClickHouse/ClickHouse/pull/11875) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix rare crash caused by using `Nullable` column in prewhere condition. Continuation of [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608). [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix rare crash caused by using `Nullable` column in prewhere condition. [#11895](https://github.com/ClickHouse/ClickHouse/pull/11895) [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608) [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix wrong result for `if()` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). @@ -72,7 +71,6 @@ * Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). -* Fix rare crash caused by using `Nullable` column in prewhere condition. (Probably it is connected with [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572) somehow). [#11608](https://github.com/ClickHouse/ClickHouse/pull/11608) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * without -q option the database does not get created at startup. [#11604](https://github.com/ClickHouse/ClickHouse/pull/11604) ([giordyb](https://github.com/giordyb)). * Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). From e7acda764299bb121de6b8c503d98b31966675e4 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 3 Jul 2020 12:16:44 +0300 Subject: [PATCH 202/330] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index ef39a163807..46154ea3af8 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,7 @@ ClickHouse is an open-source column-oriented database management system that all * [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. + +## Upcoming Events + +* [ClickHouse at Yandex Cloud Webinar (in Russian)](https://cloud.yandex.ru/events/144) on July 7, 2020. From b3ec449e352c3f74e4491d86ee233936d464a8ed Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Jul 2020 12:18:59 +0300 Subject: [PATCH 203/330] Fix test (#12088) --- ...=> 01267_alter_default_key_columns_zookeeper.reference} | 0 ...s.sql => 01267_alter_default_key_columns_zookeeper.sql} | 7 ++----- 2 files changed, 2 insertions(+), 5 deletions(-) rename tests/queries/0_stateless/{01267_alter_default_key_columns.reference => 01267_alter_default_key_columns_zookeeper.reference} (100%) rename tests/queries/0_stateless/{01267_alter_default_key_columns.sql => 01267_alter_default_key_columns_zookeeper.sql} (79%) diff --git a/tests/queries/0_stateless/01267_alter_default_key_columns.reference b/tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.reference similarity index 100% rename from tests/queries/0_stateless/01267_alter_default_key_columns.reference rename to tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.reference diff --git a/tests/queries/0_stateless/01267_alter_default_key_columns.sql b/tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.sql similarity index 79% rename from tests/queries/0_stateless/01267_alter_default_key_columns.sql rename to tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.sql index 16d6065516f..827161d4d57 100644 --- a/tests/queries/0_stateless/01267_alter_default_key_columns.sql +++ b/tests/queries/0_stateless/01267_alter_default_key_columns_zookeeper.sql @@ -11,11 +11,8 @@ DROP TABLE IF EXISTS test_alter_r2; CREATE TABLE test_alter_r1 (x Date, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter', 'r1') ORDER BY s PARTITION BY x; CREATE TABLE test_alter_r2 (x Date, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter', 'r2') ORDER BY s PARTITION BY x; -ALTER TABLE test_alter_r1 MODIFY COLUMN s DEFAULT 'Hello'; -ALTER TABLE test_alter_r2 MODIFY COLUMN x DEFAULT '2000-01-01'; - -SYSTEM SYNC REPLICA test_alter_r1; -SYSTEM SYNC REPLICA test_alter_r2; +ALTER TABLE test_alter_r1 MODIFY COLUMN s DEFAULT 'Hello' SETTINGS replication_alter_partitions_sync = 2; +ALTER TABLE test_alter_r2 MODIFY COLUMN x DEFAULT '2000-01-01' SETTINGS replication_alter_partitions_sync = 2; DESCRIBE TABLE test_alter_r1; DESCRIBE TABLE test_alter_r2; From 0ce2d48af633e8b58bce175d881ada99d20d125a Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 3 Jul 2020 11:24:03 +0200 Subject: [PATCH 204/330] Test for a fixed issue #10668, related to input_format_allow_errors_num in CSV --- ...55_CSV_input_format_allow_errors.reference | 40 +++++++++++++++++ .../01355_CSV_input_format_allow_errors.sh | 45 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference create mode 100755 tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh diff --git a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference new file mode 100644 index 00000000000..5d20150fdc1 --- /dev/null +++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference @@ -0,0 +1,40 @@ +File generated: +0,0 +1,0 +2,0 +3,0 +4,AAAAAAA +5,0 +6,AAAAAAA +****************** +attempt to parse w/o flags +Return code: 117 +OK: stderr contains a message 'is not like Int64' +****************** +attempt to parse with input_format_allow_errors_ratio=0.1 +Return code: 117 +OK: stderr contains a message 'Already have 1 errors out of 5 rows, which is 0.2' +****************** +attempt to parse with input_format_allow_errors_ratio=0.3 +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +Return code: 0 +****************** +attempt to parse with input_format_allow_errors_num=1 +Return code: 117 +OK: stderr contains a message 'Already have 2 errors out of 7 rows' +****************** +attempt to parse with input_format_allow_errors_num=2 +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +Return code: 0 diff --git a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh new file mode 100755 index 00000000000..300292dd08d --- /dev/null +++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR/../shell_config.sh" + +SAMPLE_FILE="$CURDIR/01355_sample_data.csv" +STD_ERROR_CAPTURED="$CURDIR/01355_std_error_captured.log" + +echo 'File generated:' +${CLICKHOUSE_LOCAL} -q "SELECT number, if(number in (4,6), 'AAAAAAA', '0') from numbers(7) FORMAT TSV" | tr '\t' ',' >"$SAMPLE_FILE" +cat "$SAMPLE_FILE" + +echo '******************' +echo 'attempt to parse w/o flags' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +expected_error_message='is not like Int64' +cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong" + +echo '******************' +echo 'attempt to parse with input_format_allow_errors_ratio=0.1' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.1 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +expected_error_message='Already have 1 errors out of 5 rows, which is 0.2' +cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong" + +echo '******************' +echo 'attempt to parse with input_format_allow_errors_ratio=0.3' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.3 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +cat "$STD_ERROR_CAPTURED" + +echo '******************' +echo 'attempt to parse with input_format_allow_errors_num=1' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=1 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +expected_error_message='Already have 2 errors out of 7 rows' +cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong" + +echo '******************' +echo 'attempt to parse with input_format_allow_errors_num=2' +cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=2 2>"$STD_ERROR_CAPTURED" +echo "Return code: $?" +cat "$STD_ERROR_CAPTURED" + +rm "$STD_ERROR_CAPTURED" "$SAMPLE_FILE" \ No newline at end of file From 0a6f3ca9fd30101c380f37d870a9136a436d3152 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Jul 2020 13:11:16 +0300 Subject: [PATCH 205/330] Add unbundled mode flag --- src/Storages/System/StorageSystemBuildOptions.generated.cpp.in | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 97998e11ea5..3c3b96b9cff 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -32,6 +32,7 @@ const char * auto_config_build[] "BUILD_INCLUDE_DIRECTORIES", "@BUILD_INCLUDE_DIRECTORIES@", "STATIC", "@USE_STATIC_LIBRARIES@", "SPLIT_BINARY", "@CLICKHOUSE_SPLIT_BINARY@", + "UNBUNDLED", "@UNBUNDLED@", "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", "USE_JEMALLOC", "@ENABLE_JEMALLOC@", From b3f1842035edab7639597ef55c4fdc2c4b59328e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Jul 2020 13:35:44 +0300 Subject: [PATCH 206/330] Remove LC converting to Arrow. --- src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 29268d3894c..cec722d51bc 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB @@ -220,7 +221,7 @@ namespace DB { // TODO: constructed every iteration ColumnWithTypeAndName column = header.safeGetByPosition(column_i); - column.column = chunk.getColumns()[column_i]; + column.column = recursiveRemoveLowCardinality(chunk.getColumns()[column_i]); const bool is_column_nullable = column.type->isNullable(); const auto & column_nested_type From 3d37a632474fbc3de11b0ff885d8a12089f81db7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Jul 2020 13:37:55 +0300 Subject: [PATCH 207/330] Remove LC converting to Arrow. --- src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index cec722d51bc..c1fce04f3b2 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -222,6 +222,7 @@ namespace DB // TODO: constructed every iteration ColumnWithTypeAndName column = header.safeGetByPosition(column_i); column.column = recursiveRemoveLowCardinality(chunk.getColumns()[column_i]); + column.type = recursiveRemoveLowCardinality(column.type); const bool is_column_nullable = column.type->isNullable(); const auto & column_nested_type From d015a4d64685bf79e149a2a730ab91a4c5a1174f Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Jul 2020 13:57:16 +0300 Subject: [PATCH 208/330] Move skip lists to clickhouse-test --- tests/clickhouse-test | 93 +++++++++++++++++++++++++ tests/queries/skip_list.json | 130 +++++++++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+) create mode 100644 tests/queries/skip_list.json diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 79fa2c15068..71e93e7b1bb 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -4,6 +4,7 @@ import sys import os import os.path import re +import json from argparse import ArgumentParser from argparse import FileType @@ -377,6 +378,76 @@ def check_server_started(client, retry_count): return False +class BuildFlags(object): + THREAD = 'thread-sanitizer' + ADDRESS = 'address-sanitizer' + UNDEFINED = 'ub-sanitizer' + MEMORY = 'memory-sanitizer' + DEBUG = 'debug-build' + UNBUNDLED = 'unbundled-build' + RELEASE = 'release-build' + DATABASE_ATOMIC = 'database-atomic' + POLYMORPHIC_PARTS = 'polymorphic-parts' + + +def collect_build_flags(client): + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'") + result = [] + + if clickhouse_proc.returncode == 0: + if '-fsanitize=thread' in stdout: + result.append(BuildFlags.THREAD) + elif '-fsanitize=address' in stdout: + result.append(BuildFlags.ADDRESS) + elif '-fsanitize=undefined' in stdout: + result.append(BuildFlags.UNDEFINED) + elif '-fsanitize=memory' in stdout: + result.append(BuildFlags.MEMORY) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.build_options WHERE name = 'BUILD_TYPE'") + + if clickhouse_proc.returncode == 0: + if 'Debug' in stdout: + result.append(BuildFlags.DEBUG) + elif 'RelWithDebInfo' in stdout or 'Release' in stdout: + result.append(BuildFlags.RELEASE) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'") + + if clickhouse_proc.returncode == 0: + if 'ON' in stdout: + result.append(BuildFlags.UNBUNDLED) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.settings WHERE name = 'default_database_engine'") + + if clickhouse_proc.returncode == 0: + if 'Atomic' in stdout: + result.append(BuildFlags.DATABASE_ATOMIC) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'") + + if clickhouse_proc.returncode == 0: + if '10485760' in stdout: + result.append(BuildFlags.POLYMORPHIC_PARTS) + else: + raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) + + return result + + def main(args): global SERVER_DIED global exit_code @@ -392,6 +463,12 @@ def main(args): if not check_server_started(args.client, args.server_check_retries): raise Exception("clickhouse-server is not responding. Cannot execute 'SELECT 1' query.") + build_flags = collect_build_flags(args.client) + tests_to_skip_from_list = collect_tests_to_skip(args.skip_list, build_flags) + if args.skip: + args.skip = set(args.skip) | tests_to_skip_from_list + else: + args.skip = tests_to_skip_from_list base_dir = os.path.abspath(args.queries) tmp_dir = os.path.abspath(args.tmp) @@ -604,6 +681,17 @@ def get_additional_client_options_url(args): return '' +def collect_tests_to_skip(skip_list_path, build_flags): + result = set([]) + if not os.path.exists(skip_list_path): + return result + + with open(skip_list_path, 'r') as skip_list_file: + skip_dict = json.load(skip_list_file) + for build_flag in build_flags: + result |= set(skip_dict[build_flag]) + return result + if __name__ == '__main__': parser=ArgumentParser(description='ClickHouse functional tests') parser.add_argument('-q', '--queries', help='Path to queries dir') @@ -627,6 +715,7 @@ if __name__ == '__main__': parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel') parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context') parser.add_argument('-r', '--server-check-retries', default=30, type=int, help='Num of tries to execute SELECT 1 before tests started') + parser.add_argument('--skip-list', help="Path to skip-list file") parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests') parser.add_argument('--no-stateful', action='store_true', help='Disable all stateful tests') @@ -655,6 +744,10 @@ if __name__ == '__main__': if args.queries is None: print("Failed to detect path to the queries directory. Please specify it with '--queries' option.", file=sys.stderr) exit(1) + + if args.skip_list is None: + args.skip_list = os.path.join(args.queries, 'skip_list.json') + if args.tmp is None: args.tmp = args.queries if args.client is None: diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json new file mode 100644 index 00000000000..9ad291928c0 --- /dev/null +++ b/tests/queries/skip_list.json @@ -0,0 +1,130 @@ +{ + "thread-sanitizer": [ + "00281", + "00877", + "00985", + "avx2", + "query_profiler", + "memory_profiler", + "01083_expressions_in_engine_arguments", + "00505_shard_secure", + "00505_secure", + "01103_check_cpu_instructions_at_startup", + "01098_temporary_and_external_tables", + "00152_insert_different_granularity", + "00151_replace_partition_with_different_granularity" + ], + "address-sanitizer": [ + "00281", + "00877", + "avx2", + "query_profiler", + "memory_profiler", + "odbc_roundtrip", + "01103_check_cpu_instructions_at_startup" + ], + "ub-sanitizer": [ + "00281", + "capnproto", + "avx2", + "query_profiler", + "memory_profiler", + "01103_check_cpu_instructions_at_startup", + "00900_orc_load" + ], + "memory-sanitizer": [ + "00281", + "capnproto", + "avx2", + "query_profiler", + "memory_profiler", + "01103_check_cpu_instructions_at_startup", + "01086_odbc_roundtrip", + "00877_memory_limit_for_new_delete", + "01114_mysql_database_engine_segfault" + ], + "debug-build": [ + "00281", + "avx2", + "query_profiler", + "memory_profiler", + "00899_long_attach", + "00980_alter_settings_race", + "00834_kill_mutation_replicated_zookeeper", + "00834_kill_mutation", + "01200_mutations_memory_consumption", + "01103_check_cpu_instructions_at_startup", + "01037_polygon_dicts_", + "hyperscan" + ], + "unbundled-build": [ + "00429", + "00428", + "00877", + "pocopatch", + "parquet", + "xxhash", + "avx2", + "_h3", + "query_profiler", + "memory_profiler", + "orc_load", + "01033_storage_odbc_parsing_exception_check", + "avro", + "01072_optimize_skip_unused_shards_const_expr_eval", + "00505_secure", + "00505_shard_secure", + "odbc_roundtrip", + "01103_check_cpu_instructions_at_startup", + "01114_mysql_database_engine_segfault", + "00834_cancel_http_readonly_queries_on_client_close", + "_arrow", + "01099_parallel_distributed_insert_select", + "01300_client_save_history_when_terminated", + "orc_output" + ], + "release-build": [ + "avx2" + ], + "database-atomic": [ + "00065_loyalty_with_storage_join", + "avx", + "00738_lock_for_inner_table", + "00699_materialized_view_mutations", + "00609_mv_index_in_in", + "00510_materizlized_view_and_deduplication_zookeeper", + "00604_show_create_database", + "00080_show_tables_and_system_tables", + "01272_suspicious_codecs", + "01249_bad_arguments_for_bloom_filter", + "00423_storage_log_single_thread", + "00311_array_primary_key", + "00226_zookeeper_deduplication_and_unexpected_parts", + "00180_attach_materialized_view", + "00116_storage_set", + "00816_long_concurrent_alter_column", + "00992_system_parts_race_condition_zookeeper" + ], + "polymorphic-parts": [ + "avx", + "01045_order_by_pk_special_storages", + "01042_check_query_and_last_granule_size", + "00961_checksums_in_system_parts_columns_table", + "00933_test_fix_extra_seek_on_compressed_cache", + "00926_adaptive_index_granularity_collapsing_merge_tree", + "00926_adaptive_index_granularity_merge_tree", + "00926_adaptive_index_granularity_replacing_merge_tree", + "00926_adaptive_index_granularity_versioned_collapsing_merge_tree", + "00804_test_delta_codec_compression", + "00731_long_merge_tree_select_opened_files", + "00653_verification_monotonic_data_load", + "00484_preferred_max_column_in_block_size_bytes", + "00446_clear_column_in_partition_zookeeper", + "00443_preferred_block_size_bytes", + "00160_merge_and_index_in_in", + "01055_compact_parts", + "01039_mergetree_exec_time", + "00933_ttl_simple", + "00753_system_columns_and_system_tables" + ] +} From 6d08b7b45a34c0db415fd3b1bc86954c1a59c2a4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Jul 2020 14:04:36 +0300 Subject: [PATCH 209/330] Remove LC converting to Arrow. --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index a8aac2d51ee..a998378125f 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -263,6 +264,7 @@ namespace DB for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) { ColumnWithTypeAndName header_column = header.getByPosition(column_i); + const auto column_type = recursiveRemoveLowCardinality(header_column.type); if (name_to_column_ptr.find(header_column.name) == name_to_column_ptr.end()) // TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable? @@ -273,13 +275,13 @@ namespace DB arrow::Type::type arrow_type = arrow_column->type()->id(); // TODO: check if a column is const? - if (!header_column.type->isNullable() && arrow_column->null_count()) + if (!column_type->isNullable() && arrow_column->null_count()) { throw Exception{"Can not insert NULL data into non-nullable column \"" + header_column.name + "\"", ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN}; } - const bool target_column_is_nullable = header_column.type->isNullable() || arrow_column->null_count(); + const bool target_column_is_nullable = column_type->isNullable() || arrow_column->null_count(); DataTypePtr internal_nested_type; @@ -304,15 +306,6 @@ namespace DB const DataTypePtr internal_type = target_column_is_nullable ? makeNullable(internal_nested_type) : internal_nested_type; - const std::string internal_nested_type_name = internal_nested_type->getName(); - - const DataTypePtr column_nested_type = header_column.type->isNullable() - ? static_cast(header_column.type.get())->getNestedType() - : header_column.type; - - const DataTypePtr column_type = header_column.type; - - const std::string column_nested_type_name = column_nested_type->getName(); ColumnWithTypeAndName column; column.name = header_column.name; @@ -373,8 +366,8 @@ namespace DB else column.column = std::move(read_column); - column.column = castColumn(column, column_type); - column.type = column_type; + column.column = castColumn(column, header_column.type); + column.type = header_column.type; num_rows = column.column->size(); columns_list.push_back(std::move(column.column)); } From 07cbb427a8b301527fef0d70cf6d0b8de083b462 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Jul 2020 14:05:27 +0300 Subject: [PATCH 210/330] More verbose message about skip --- tests/clickhouse-test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 71e93e7b1bb..0e4e20dfe21 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -690,6 +690,10 @@ def collect_tests_to_skip(skip_list_path, build_flags): skip_dict = json.load(skip_list_file) for build_flag in build_flags: result |= set(skip_dict[build_flag]) + + if len(result) > 0: + print("Found file with skip-list {}, {} test will be skipped".format(skip_list_path, len(result))) + return result if __name__ == '__main__': From 5098df8d0bb4ba08b4ab0fad35575a3308aa3051 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Jul 2020 14:15:30 +0300 Subject: [PATCH 211/330] Make skip-list optional --- docker/test/stateful/Dockerfile | 2 +- docker/test/stateful_with_coverage/run.sh | 2 +- docker/test/stateless/Dockerfile | 2 +- docker/test/stateless_with_coverage/run.sh | 2 +- docker/test/stress/stress | 4 ++-- tests/clickhouse-test | 13 +++++++++---- 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 3aff49bf5a1..bdf397a70e0 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -53,4 +53,4 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ && clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" \ && clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" \ && clickhouse-client --query "SHOW TABLES FROM test" \ - && clickhouse-test --testname --shard --zookeeper --no-stateless $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + && clickhouse-test --testname --shard --zookeeper --no-stateless --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index b946f5b187d..13b69c73b89 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -105,7 +105,7 @@ LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABL LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 41a53f8a3f5..eee493d4430 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -83,4 +83,4 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/; fi; \ ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml; \ service zookeeper start; sleep 5; \ - service clickhouse-server start && sleep 5 && clickhouse-test --testname --shard --zookeeper $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + service clickhouse-server start && sleep 5 && clickhouse-test --testname --shard --zookeeper --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 185dc95c783..37ad286e004 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -76,7 +76,7 @@ start_clickhouse sleep 10 -LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 92dac8bc2d1..46fa3e95f55 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -17,13 +17,13 @@ def run_perf_test(cmd, xmls_path, output_folder): def run_func_test(cmd, output_prefix, num_processes, skip_tests_option): output_paths = [os.path.join(output_prefix, "stress_test_run_{}.txt".format(i)) for i in range(num_processes)] f = open(output_paths[0], 'w') - main_command = "{} {}".format(cmd, skip_tests_option) + main_command = "{} --use-skip-list {}".format(cmd, skip_tests_option) logging.info("Run func tests main cmd '%s'", main_command) pipes = [Popen(main_command, shell=True, stdout=f, stderr=f)] for output_path in output_paths[1:]: time.sleep(0.5) f = open(output_path, 'w') - full_command = "{} --order=random {}".format(cmd, skip_tests_option) + full_command = "{} --use-skip-list --order=random {}".format(cmd, skip_tests_option) logging.info("Run func tests '%s'", full_command) p = Popen(full_command, shell=True, stdout=f, stderr=f) pipes.append(p) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 0e4e20dfe21..64ca3031fb6 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -464,7 +464,11 @@ def main(args): if not check_server_started(args.client, args.server_check_retries): raise Exception("clickhouse-server is not responding. Cannot execute 'SELECT 1' query.") build_flags = collect_build_flags(args.client) - tests_to_skip_from_list = collect_tests_to_skip(args.skip_list, build_flags) + if args.use_skip_list: + tests_to_skip_from_list = collect_tests_to_skip(args.skip_list_path, build_flags) + else: + tests_to_skip_from_list = {} + if args.skip: args.skip = set(args.skip) | tests_to_skip_from_list else: @@ -719,7 +723,8 @@ if __name__ == '__main__': parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel') parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context') parser.add_argument('-r', '--server-check-retries', default=30, type=int, help='Num of tries to execute SELECT 1 before tests started') - parser.add_argument('--skip-list', help="Path to skip-list file") + parser.add_argument('--skip-list-path', help="Path to skip-list file") + parser.add_argument('--use-skip-list', action='store_true', default=False, help="Use skip list to skip tests if found") parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests') parser.add_argument('--no-stateful', action='store_true', help='Disable all stateful tests') @@ -749,8 +754,8 @@ if __name__ == '__main__': print("Failed to detect path to the queries directory. Please specify it with '--queries' option.", file=sys.stderr) exit(1) - if args.skip_list is None: - args.skip_list = os.path.join(args.queries, 'skip_list.json') + if args.skip_list_path is None: + args.skip_list_path = os.path.join(args.queries, 'skip_list.json') if args.tmp is None: args.tmp = args.queries From 8ec70c2c3c0b7a5a0e0ae0f420f80865c0bb0b7c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Jul 2020 14:45:57 +0300 Subject: [PATCH 212/330] Added test. --- .../0_stateless/01358_lc_parquet.reference | 3 +++ tests/queries/0_stateless/01358_lc_parquet.sh | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/01358_lc_parquet.reference create mode 100755 tests/queries/0_stateless/01358_lc_parquet.sh diff --git a/tests/queries/0_stateless/01358_lc_parquet.reference b/tests/queries/0_stateless/01358_lc_parquet.reference new file mode 100644 index 00000000000..450af4cf648 --- /dev/null +++ b/tests/queries/0_stateless/01358_lc_parquet.reference @@ -0,0 +1,3 @@ +abc +ghi +\N diff --git a/tests/queries/0_stateless/01358_lc_parquet.sh b/tests/queries/0_stateless/01358_lc_parquet.sh new file mode 100755 index 00000000000..859bb2ebac7 --- /dev/null +++ b/tests/queries/0_stateless/01358_lc_parquet.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_lc" +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test (a String) Engine = MergeTree order by tuple()" +$CLICKHOUSE_CLIENT -q "create table test_lc (a LowCardinality(String)) Engine = MergeTree order by tuple()" +$CLICKHOUSE_CLIENT -q "select 'abc' as a format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test_lc format Parquet" +$CLICKHOUSE_CLIENT -q "select a from test_lc format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test format Parquet" +$CLICKHOUSE_CLIENT -q "select a from test order by a" +$CLICKHOUSE_CLIENT -q "drop table if exists test_lc" +$CLICKHOUSE_CLIENT -q "drop table if exists test" + +$CLICKHOUSE_CLIENT -q "drop table if exists test_lc" +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test (a Nullable(String)) Engine = MergeTree order by tuple()" +$CLICKHOUSE_CLIENT -q "create table test_lc (a LowCardinality(Nullable(String))) Engine = MergeTree order by tuple()" +$CLICKHOUSE_CLIENT -q "select 'ghi' as a format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test_lc format Parquet" +$CLICKHOUSE_CLIENT -q "select cast(Null as Nullable(String)) as a format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test_lc format Parquet" +$CLICKHOUSE_CLIENT -q "select a from test_lc format Parquet" | $CLICKHOUSE_CLIENT -q "insert into test format Parquet" +$CLICKHOUSE_CLIENT -q "select a from test order by a" +$CLICKHOUSE_CLIENT -q "drop table if exists test_lc" +$CLICKHOUSE_CLIENT -q "drop table if exists test" From 027c6b705f9691fe25dbbe55b60744417e0cc0cf Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Fri, 3 Jul 2020 19:18:47 +0700 Subject: [PATCH 213/330] Create initializeAggregation to initialize an aggregation function based on a value --- src/Functions/initializeAggregation.cpp | 161 ++++++++++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 + .../01356_initialize_aggregation.reference | 4 + .../01356_initialize_aggregation.sql | 4 + 4 files changed, 171 insertions(+) create mode 100644 src/Functions/initializeAggregation.cpp create mode 100644 tests/queries/0_stateless/01356_initialize_aggregation.reference create mode 100644 tests/queries/0_stateless/01356_initialize_aggregation.sql diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp new file mode 100644 index 00000000000..81bfa19a55a --- /dev/null +++ b/src/Functions/initializeAggregation.cpp @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + + +class FunctionInitializeAggregation : public IFunction +{ +public: + static constexpr auto name = "initializeAggregation"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; + +private: + mutable AggregateFunctionPtr aggregate_function; +}; + + +DataTypePtr FunctionInitializeAggregation::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +{ + if (arguments.size() < 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be at least 2.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const ColumnConst * aggregate_function_name_column = checkAndGetColumnConst(arguments[0].column.get()); + if (!aggregate_function_name_column) + throw Exception("First argument for function " + getName() + " must be constant string: name of aggregate function.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + DataTypes argument_types(arguments.size() - 1); + for (size_t i = 1, size = arguments.size(); i < size; ++i) + { + argument_types[i - 1] = arguments[i].type; + } + + if (!aggregate_function) + { + String aggregate_function_name_with_params = aggregate_function_name_column->getValue(); + + if (aggregate_function_name_with_params.empty()) + throw Exception("First argument for function " + getName() + " (name of aggregate function) cannot be empty.", + ErrorCodes::BAD_ARGUMENTS); + + String aggregate_function_name; + Array params_row; + getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, + aggregate_function_name, params_row, "function " + getName()); + + AggregateFunctionProperties properties; + aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); + } + + return aggregate_function->getReturnType(); +} + + +void FunctionInitializeAggregation::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) +{ + IAggregateFunction & agg_func = *aggregate_function; + std::unique_ptr arena = std::make_unique(); + + const size_t num_arguments_columns = arguments.size() - 1; + + std::vector materialized_columns(num_arguments_columns); + std::vector aggregate_arguments_vec(num_arguments_columns); + + for (size_t i = 0; i < num_arguments_columns; ++i) + { + const IColumn * col = block.getByPosition(arguments[i + 1]).column.get(); + materialized_columns.emplace_back(col->convertToFullColumnIfConst()); + aggregate_arguments_vec[i] = &(*materialized_columns.back()); + } + + const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); + + MutableColumnPtr result_holder = block.getByPosition(result).type->createColumn(); + IColumn & res_col = *result_holder; + + /// AggregateFunction's states should be inserted into column using specific way + auto * res_col_aggregate_function = typeid_cast(&res_col); + + if (!res_col_aggregate_function && agg_func.isState()) + throw Exception("State function " + agg_func.getName() + " inserts results into non-state column " + + block.getByPosition(result).type->getName(), ErrorCodes::ILLEGAL_COLUMN); + + PODArray places(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) + { + places[i] = arena->alignedAlloc(agg_func.sizeOfData(), agg_func.alignOfData()); + try + { + agg_func.create(places[i]); + } + catch (...) + { + for (size_t j = 0; j < i; ++j) + agg_func.destroy(places[j]); + throw; + } + } + + SCOPE_EXIT({ + for (size_t i = 0; i < input_rows_count; ++i) + agg_func.destroy(places[i]); + }); + + { + auto * that = &agg_func; + /// Unnest consecutive trailing -State combinators + while (auto * func = typeid_cast(that)) + that = func->getNestedFunction().get(); + that->addBatch(input_rows_count, places.data(), 0, aggregate_arguments, arena.get()); + } + + for (size_t i = 0; i < input_rows_count; ++i) + if (!res_col_aggregate_function) + agg_func.insertResultInto(places[i], res_col, arena.get()); + else + res_col_aggregate_function->insertFrom(places[i]); + block.getByPosition(result).column = std::move(result_holder); +} + + +void registerFunctionInitializeAggregation(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 5eb1e3e47c0..697eb5ecb64 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -58,6 +58,7 @@ void registerFunctionGetMacro(FunctionFactory &); void registerFunctionGetScalar(FunctionFactory &); void registerFunctionIsConstant(FunctionFactory &); void registerFunctionGlobalVariable(FunctionFactory &); +void registerFunctionInitializeAggregation(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -116,6 +117,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionGetScalar(factory); registerFunctionIsConstant(factory); registerFunctionGlobalVariable(factory); + registerFunctionInitializeAggregation(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/tests/queries/0_stateless/01356_initialize_aggregation.reference b/tests/queries/0_stateless/01356_initialize_aggregation.reference new file mode 100644 index 00000000000..63ebb1717d6 --- /dev/null +++ b/tests/queries/0_stateless/01356_initialize_aggregation.reference @@ -0,0 +1,4 @@ +3 +[999,998,997,996,995,994,993,992,991,990] +[1] +[990,991,992,993,994,995,996,997,998,999] diff --git a/tests/queries/0_stateless/01356_initialize_aggregation.sql b/tests/queries/0_stateless/01356_initialize_aggregation.sql new file mode 100644 index 00000000000..07a5ca1892b --- /dev/null +++ b/tests/queries/0_stateless/01356_initialize_aggregation.sql @@ -0,0 +1,4 @@ +SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000); +SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', number, number) AS state FROM system.numbers LIMIT 1000); +SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', 1, number) AS state FROM system.numbers LIMIT 1000); +SELECT topKWeightedMerge(10)(state) FROM (SELECT initializeAggregation('topKWeightedState(10)', number, 1) AS state FROM system.numbers LIMIT 1000); From 23b44ca6fe6f374d0e4deb3962e7f8ab65f79411 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Fri, 3 Jul 2020 15:45:01 +0300 Subject: [PATCH 214/330] Add type column in system.disks --- src/Disks/DiskLocal.h | 2 + src/Disks/DiskMemory.h | 2 + src/Disks/IDisk.h | 3 ++ src/Disks/S3/DiskS3.h | 2 + src/Storages/System/StorageSystemDisks.cpp | 4 ++ tests/integration/test_disk_types/__init__.py | 0 .../test_disk_types/configs/config.xml | 42 +++++++++++++++++++ .../test_disk_types/configs/users.xml | 23 ++++++++++ tests/integration/test_disk_types/test.py | 36 ++++++++++++++++ 9 files changed, 114 insertions(+) create mode 100644 tests/integration/test_disk_types/__init__.py create mode 100644 tests/integration/test_disk_types/configs/config.xml create mode 100644 tests/integration/test_disk_types/configs/users.xml create mode 100644 tests/integration/test_disk_types/test.py diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 61a3994b655..3dab4614d5d 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -99,6 +99,8 @@ public: void createHardLink(const String & src_path, const String & dst_path) override; + const String getType() const override { return "local"; } + private: bool tryReserve(UInt64 bytes); diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index b0c1d30c61d..f7948019fe8 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -90,6 +90,8 @@ public: void createHardLink(const String & src_path, const String & dst_path) override; + const String getType() const override { return "memory"; } + private: void createDirectoriesImpl(const String & path); void replaceFileImpl(const String & from_path, const String & to_path); diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 011c75402f4..77a52a7a5d6 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -171,6 +171,9 @@ public: /// Create hardlink from `src_path` to `dst_path`. virtual void createHardLink(const String & src_path, const String & dst_path) = 0; + + /// Return disk type - "local", "s3", etc. + virtual const String getType() const = 0; }; using DiskPtr = std::shared_ptr; diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 5fa8e8358a6..82168c55bb5 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -96,6 +96,8 @@ public: void setReadOnly(const String & path) override; + const String getType() const override { return "s3"; } + private: bool tryReserve(UInt64 bytes); diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index cf00bbb5254..554b8cfd1eb 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -22,6 +22,7 @@ StorageSystemDisks::StorageSystemDisks(const std::string & name_) {"free_space", std::make_shared()}, {"total_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, + {"type", std::make_shared()}, })); setInMemoryMetadata(storage_metadata); } @@ -42,6 +43,7 @@ Pipes StorageSystemDisks::read( MutableColumnPtr col_free = ColumnUInt64::create(); MutableColumnPtr col_total = ColumnUInt64::create(); MutableColumnPtr col_keep = ColumnUInt64::create(); + MutableColumnPtr col_type = ColumnString::create(); for (const auto & [disk_name, disk_ptr] : context.getDisksMap()) { @@ -50,6 +52,7 @@ Pipes StorageSystemDisks::read( col_free->insert(disk_ptr->getAvailableSpace()); col_total->insert(disk_ptr->getTotalSpace()); col_keep->insert(disk_ptr->getKeepingFreeSpace()); + col_type->insert(disk_ptr->getType()); } Columns res_columns; @@ -58,6 +61,7 @@ Pipes StorageSystemDisks::read( res_columns.emplace_back(std::move(col_free)); res_columns.emplace_back(std::move(col_total)); res_columns.emplace_back(std::move(col_keep)); + res_columns.emplace_back(std::move(col_type)); UInt64 num_rows = res_columns.at(0)->size(); Chunk chunk(std::move(res_columns), num_rows); diff --git a/tests/integration/test_disk_types/__init__.py b/tests/integration/test_disk_types/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disk_types/configs/config.xml b/tests/integration/test_disk_types/configs/config.xml new file mode 100644 index 00000000000..a3ec8b3a58a --- /dev/null +++ b/tests/integration/test_disk_types/configs/config.xml @@ -0,0 +1,42 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + memory + + + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + diff --git a/tests/integration/test_disk_types/configs/users.xml b/tests/integration/test_disk_types/configs/users.xml new file mode 100644 index 00000000000..6061af8e33d --- /dev/null +++ b/tests/integration/test_disk_types/configs/users.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py new file mode 100644 index 00000000000..04346388b47 --- /dev/null +++ b/tests/integration/test_disk_types/test.py @@ -0,0 +1,36 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +disk_types = { + "default" : "local", + "disk_s3" : "s3", + "disk_memory" : "memory", +} + +@pytest.fixture(scope="module") + +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance("node", config_dir="configs", with_minio=True) + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_different_types(cluster): + node = cluster.instances["node"] + responce = node.query("SELECT * FROM system.disks") + disks = responce.split("\n") + for disk in disks: + if disk == '': # skip empty line (after split at last position) + continue + fields = disk.split("\t") + assert len(fields) >= 6 + assert disk_types.get(fields[0], "UNKNOWN") == fields[5] + +def test_select_by_type(cluster): + node = cluster.instances["node"] + for name, disk_type in disk_types.items(): + assert node.query("SELECT name FROM system.disks WHERE type='" + disk_type + "'") == name + "\n" + From cd9a71b19cc3c4e6257c0d415623d7c4102f4b3a Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Jul 2020 16:01:02 +0300 Subject: [PATCH 215/330] Update clickhouse-test --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 64ca3031fb6..fc3cfb0b432 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -422,7 +422,7 @@ def collect_build_flags(client): (stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.build_options WHERE name = 'UNBUNDLED'") if clickhouse_proc.returncode == 0: - if 'ON' in stdout: + if 'ON' in stdout or '1' in stdout: result.append(BuildFlags.UNBUNDLED) else: raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr)) From fba02badcb8a95eb40724edcea34b0201d3be23d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 3 Jul 2020 17:53:54 +0300 Subject: [PATCH 216/330] fix test with UBSan --- src/Processors/Formats/Impl/ORCBlockInputFormat.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index a12ca09eec0..12f1a27cf36 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -23,7 +23,9 @@ ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_) : IInp { } -Chunk ORCBlockInputFormat::generate() +/// Attribute is used to avoid an error with undefined behaviour sanitizer +/// ../contrib/FastMemcpy/FastMemcpy.h:91:56: runtime error: applying zero offset to null pointer +__attribute__((__no_sanitize__("undefined"))) Chunk ORCBlockInputFormat::generate() { Chunk res; const Block & header = getPort().getHeader(); From 6e599533a72fc6766fa1df7e262911ff5f5bf85f Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 3 Jul 2020 19:19:32 +0200 Subject: [PATCH 217/330] Add missing query context for system logs Needed to allow attaching materialized views with joins or with subqueries to system.logs. --- src/Interpreters/SystemLog.h | 7 ++- ...ized_view_with_join_on_query_log.reference | 10 ++++ ...terialized_view_with_join_on_query_log.sql | 52 +++++++++++++++++++ 3 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference create mode 100644 tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 48cff8bf061..1b5bcbacc6d 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -62,7 +63,6 @@ namespace ErrorCodes #define DBMS_SYSTEM_LOG_QUEUE_SIZE 1048576 -class Context; class QueryLog; class QueryThreadLog; class PartLog; @@ -161,6 +161,7 @@ protected: private: /* Saving thread data */ Context & context; + Context insert_context; const StorageID table_id; const String storage_def; StoragePtr table; @@ -207,11 +208,13 @@ SystemLog::SystemLog(Context & context_, const String & storage_def_, size_t flush_interval_milliseconds_) : context(context_) + , insert_context(Context(context_)) , table_id(database_name_, table_name_) , storage_def(storage_def_) , flush_interval_milliseconds(flush_interval_milliseconds_) { assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE); + insert_context.makeQueryContext(); // we need query context to do inserts to target table with MV containing subqueries or joins log = &Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")"); } @@ -425,7 +428,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert->table_id = table_id; ASTPtr query_ptr(insert.release()); - InterpreterInsertQuery interpreter(query_ptr, context); + InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); io.out->writePrefix(); diff --git a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference new file mode 100644 index 00000000000..dfc14d94653 --- /dev/null +++ b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference @@ -0,0 +1,10 @@ +1 +1 +1 +1 +=== system.query_log === +main_dashboard_bottom_query 2 +main_dashboard_top_query 2 +=== slowlog === +main_dashboard_bottom_query 1 +main_dashboard_top_query 1 diff --git a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql new file mode 100644 index 00000000000..3d1b464f164 --- /dev/null +++ b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql @@ -0,0 +1,52 @@ +DROP TABLE IF EXISTS slow_log; +DROP TABLE IF EXISTS expected_times; + +CREATE TABLE expected_times (QUERY_GROUP_ID String, max_query_duration_ms UInt64) Engine=Memory; +INSERT INTO expected_times VALUES('main_dashboard_top_query', 100), ('main_dashboard_bottom_query', 100); + +CREATE MATERIALIZED VIEW slow_log Engine=Memory AS +( + SELECT * FROM + ( + SELECT + extract(query,'/\\*\\s*QUERY_GROUP_ID:(.*?)\\s*\\*/') as QUERY_GROUP_ID, + * + FROM system.query_log + WHERE type<>1 and event_date >= yesterday() and event_time > now() - 120 + ) as ql + INNER JOIN expected_times USING (QUERY_GROUP_ID) + WHERE query_duration_ms > max_query_duration_ms +); + +SET log_queries=1; + +SELECT 1 /* QUERY_GROUP_ID:main_dashboard_top_query */; +SELECT 1 /* QUERY_GROUP_ID:main_dashboard_bottom_query */; + +SELECT 1 WHERE not ignore(sleep(0.105)) /* QUERY_GROUP_ID:main_dashboard_top_query */; +SELECT 1 WHERE not ignore(sleep(0.105)) /* QUERY_GROUP_ID:main_dashboard_bottom_query */; + +SET log_queries=0; +SYSTEM FLUSH LOGS; + +SELECT '=== system.query_log ==='; + +SELECT + extract(query,'/\\*\\s*QUERY_GROUP_ID:(.*?)\\s*\\*/') as QUERY_GROUP_ID, + count() +FROM system.query_log +WHERE type<>1 and event_date >= yesterday() and event_time > now() - 20 and QUERY_GROUP_ID<>'' +GROUP BY QUERY_GROUP_ID +ORDER BY QUERY_GROUP_ID; + +SELECT '=== slowlog ==='; + +SELECT + QUERY_GROUP_ID, + count() +FROM slow_log +GROUP BY QUERY_GROUP_ID +ORDER BY QUERY_GROUP_ID; + +DROP TABLE slow_log; +DROP TABLE expected_times; From ab343132de4dd258d6bd2ab1f7b18069659e8e3d Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 3 Jul 2020 16:36:08 +0300 Subject: [PATCH 218/330] Fix handling dependency of table with ENGINE=Dictionary on dictionary. --- src/Databases/DatabaseDictionary.cpp | 2 +- src/Databases/DatabaseWithDictionaries.cpp | 3 ++- src/Storages/StorageDictionary.cpp | 27 ++++++++++++++++------ src/Storages/StorageDictionary.h | 12 ++++++++-- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 6c5433cab38..c4df7b0be05 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -28,7 +28,7 @@ namespace if (!load_result.config) return nullptr; DictionaryStructure dictionary_structure = ExternalDictionariesLoader::getDictionaryStructure(*load_result.config); - return StorageDictionary::create(StorageID(database_name, load_result.name), load_result.name, dictionary_structure); + return StorageDictionary::create(StorageID(database_name, load_result.name), load_result.name, dictionary_structure, /* internal */ true); } catch (Exception & e) { diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp index e0f2aa9286b..0f597b7b0b2 100644 --- a/src/Databases/DatabaseWithDictionaries.cpp +++ b/src/Databases/DatabaseWithDictionaries.cpp @@ -49,7 +49,8 @@ void DatabaseWithDictionaries::attachDictionary(const String & dictionary_name, StorageDictionary::create( StorageID(getDatabaseName(), dictionary_name), full_name, - ExternalDictionariesLoader::getDictionaryStructure(*attach_info.config)), + ExternalDictionariesLoader::getDictionaryStructure(*attach_info.config), + /* internal */ true), lock); } catch (...) diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 83a093d5635..ede0e10614b 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -96,19 +96,29 @@ String StorageDictionary::generateNamesAndTypesDescription(const NamesAndTypesLi StorageDictionary::StorageDictionary( const StorageID & table_id_, const String & dictionary_name_, - const DictionaryStructure & dictionary_structure_) + const ColumnsDescription & columns_, + bool internal_) : IStorage(table_id_) , dictionary_name(dictionary_name_) + , internal(internal_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(ColumnsDescription{getNamesAndTypes(dictionary_structure_)}); + storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); } +StorageDictionary::StorageDictionary( + const StorageID & table_id_, const String & dictionary_name_, const DictionaryStructure & dictionary_structure_, bool internal_) + : StorageDictionary(table_id_, dictionary_name_, ColumnsDescription{getNamesAndTypes(dictionary_structure_)}, internal_) +{ +} + + void StorageDictionary::checkTableCanBeDropped() const { - throw Exception("Cannot detach dictionary " + backQuote(dictionary_name) + " as table, use DETACH DICTIONARY query.", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE); + if (internal) + throw Exception("Cannot detach dictionary " + backQuote(dictionary_name) + " as table, use DETACH DICTIONARY query.", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE); } Pipes StorageDictionary::read( @@ -141,11 +151,14 @@ void registerStorageDictionary(StorageFactory & factory) args.engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[0], args.local_context); String dictionary_name = args.engine_args[0]->as().value.safeGet(); - const auto & dictionary = args.context.getExternalDictionariesLoader().getDictionary(dictionary_name); - const DictionaryStructure & dictionary_structure = dictionary->getStructure(); - checkNamesAndTypesCompatibleWithDictionary(dictionary_name, args.columns, dictionary_structure); + if (!args.attach) + { + const auto & dictionary = args.context.getExternalDictionariesLoader().getDictionary(dictionary_name); + const DictionaryStructure & dictionary_structure = dictionary->getStructure(); + checkNamesAndTypesCompatibleWithDictionary(dictionary_name, args.columns, dictionary_structure); + } - return StorageDictionary::create(args.table_id, dictionary_name, dictionary_structure); + return StorageDictionary::create(args.table_id, dictionary_name, args.columns); }); } diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 6175902381b..e0d4803644b 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -31,13 +31,21 @@ public: const String & dictionaryName() const { return dictionary_name; } private: - String dictionary_name; + const String dictionary_name; + const bool internal = false; protected: StorageDictionary( const StorageID & table_id_, const String & dictionary_name_, - const DictionaryStructure & dictionary_structure); + const ColumnsDescription & columns_, + bool internal_ = false); + + StorageDictionary( + const StorageID & table_id_, + const String & dictionary_name_, + const DictionaryStructure & dictionary_structure, + bool internal_ = false); }; } From dad96beb7200ad5d0a23c2cf1b76921b299f571f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 3 Jul 2020 16:36:23 +0300 Subject: [PATCH 219/330] Add test. --- .../__init__.py | 0 .../configs/disable_lazy_load.xml | 4 + .../test_dictionaries_dependency/test.py | 109 ++++++++++++++++++ .../__init__.py | 0 .../configs/config.xml | 0 .../configs/dictionaries/dep_x.xml | 0 .../configs/dictionaries/dep_y.xml | 0 .../configs/dictionaries/dep_z.xml | 0 .../configs/users.xml | 0 .../test.py | 0 10 files changed, 113 insertions(+) rename tests/integration/{test_dictionaries_depend_on_dictionaries => test_dictionaries_dependency}/__init__.py (100%) create mode 100644 tests/integration/test_dictionaries_dependency/configs/disable_lazy_load.xml create mode 100644 tests/integration/test_dictionaries_dependency/test.py create mode 100644 tests/integration/test_dictionaries_dependency_xml/__init__.py rename tests/integration/{test_dictionaries_depend_on_dictionaries => test_dictionaries_dependency_xml}/configs/config.xml (100%) rename tests/integration/{test_dictionaries_depend_on_dictionaries => test_dictionaries_dependency_xml}/configs/dictionaries/dep_x.xml (100%) rename tests/integration/{test_dictionaries_depend_on_dictionaries => test_dictionaries_dependency_xml}/configs/dictionaries/dep_y.xml (100%) rename tests/integration/{test_dictionaries_depend_on_dictionaries => test_dictionaries_dependency_xml}/configs/dictionaries/dep_z.xml (100%) rename tests/integration/{test_dictionaries_depend_on_dictionaries => test_dictionaries_dependency_xml}/configs/users.xml (100%) rename tests/integration/{test_dictionaries_depend_on_dictionaries => test_dictionaries_dependency_xml}/test.py (100%) diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/__init__.py b/tests/integration/test_dictionaries_dependency/__init__.py similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/__init__.py rename to tests/integration/test_dictionaries_dependency/__init__.py diff --git a/tests/integration/test_dictionaries_dependency/configs/disable_lazy_load.xml b/tests/integration/test_dictionaries_dependency/configs/disable_lazy_load.xml new file mode 100644 index 00000000000..d01f7a0155b --- /dev/null +++ b/tests/integration/test_dictionaries_dependency/configs/disable_lazy_load.xml @@ -0,0 +1,4 @@ + + false + + diff --git a/tests/integration/test_dictionaries_dependency/test.py b/tests/integration/test_dictionaries_dependency/test.py new file mode 100644 index 00000000000..31c5a6c549a --- /dev/null +++ b/tests/integration/test_dictionaries_dependency/test.py @@ -0,0 +1,109 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', stay_alive=True) +node2 = cluster.add_instance('node2', stay_alive=True, main_configs=['configs/disable_lazy_load.xml']) +nodes = [node1, node2] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + for node in nodes: + node.query("CREATE DATABASE IF NOT EXISTS test") + node.query("CREATE DATABASE IF NOT EXISTS atest") + node.query("CREATE DATABASE IF NOT EXISTS ztest") + node.query("CREATE TABLE test.source(x UInt64, y UInt64) ENGINE=Log") + node.query("INSERT INTO test.source VALUES (5,6)") + + node.query("CREATE DICTIONARY test.dict(x UInt64, y UInt64) PRIMARY KEY x "\ + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'source' DB 'test')) "\ + "LAYOUT(FLAT()) LIFETIME(0)") + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + for node in nodes: + node.query("DROP DICTIONARY IF EXISTS test.adict") + node.query("DROP DICTIONARY IF EXISTS test.zdict") + node.query("DROP DICTIONARY IF EXISTS atest.dict") + node.query("DROP DICTIONARY IF EXISTS ztest.dict") + node.query("DROP TABLE IF EXISTS test.atbl") + node.query("DROP TABLE IF EXISTS test.ztbl") + node.query("DROP TABLE IF EXISTS atest.tbl") + node.query("DROP TABLE IF EXISTS ztest.tbl") + node.query("DROP DATABASE IF EXISTS dict_db") + + +@pytest.mark.parametrize("node", nodes) +def test_dependency_via_implicit_table(node): + d_names = ["test.adict", "test.zdict", "atest.dict", "ztest.dict"] + for d_name in d_names: + node.query("CREATE DICTIONARY {}(x UInt64, y UInt64) PRIMARY KEY x "\ + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict' DB 'test')) "\ + "LAYOUT(FLAT()) LIFETIME(0)".format(d_name)) + + def check(): + for d_name in d_names: + assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" + + check() + + # Restart must not break anything. + node.restart_clickhouse() + check() + + +@pytest.mark.parametrize("node", nodes) +def test_dependency_via_explicit_table(node): + tbl_names = ["test.atbl", "test.ztbl", "atest.tbl", "ztest.tbl"] + d_names = ["test.other_{}".format(i) for i in range(0, len(tbl_names))] + for i in range(0, len(tbl_names)): + tbl_name = tbl_names[i] + tbl_database, tbl_shortname = tbl_name.split('.') + d_name = d_names[i] + node.query("CREATE TABLE {}(x UInt64, y UInt64) ENGINE=Dictionary('test.dict')".format(tbl_name)) + node.query("CREATE DICTIONARY {}(x UInt64, y UInt64) PRIMARY KEY x "\ + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE '{}' DB '{}')) "\ + "LAYOUT(FLAT()) LIFETIME(0)".format(d_name, tbl_shortname, tbl_database)) + + def check(): + for d_name in d_names: + assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" + + check() + + # Restart must not break anything. + node.restart_clickhouse() + check() + + +@pytest.mark.skip(reason="TODO: should be fixed") +@pytest.mark.parametrize("node", nodes) +def test_dependency_via_dictionary_database(node): + node.query("CREATE DATABASE dict_db ENGINE=Dictionary") + + d_names = ["test.adict", "test.zdict", "atest.dict", "ztest.dict"] + for d_name in d_names: + node.query("CREATE DICTIONARY {}(x UInt64, y UInt64) PRIMARY KEY x "\ + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'test.dict' DB 'dict_db')) "\ + "LAYOUT(FLAT()) LIFETIME(0)".format(d_name)) + + def check(): + for d_name in d_names: + assert node.query("SELECT dictGet({}, 'y', toUInt64(5))".format(d_name)) == "6\n" + + check() + + # Restart must not break anything. + node.restart_clickhouse() + check() diff --git a/tests/integration/test_dictionaries_dependency_xml/__init__.py b/tests/integration/test_dictionaries_dependency_xml/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/config.xml b/tests/integration/test_dictionaries_dependency_xml/configs/config.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/config.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/config.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_x.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_x.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_x.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_y.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_y.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_z.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/dictionaries/dep_z.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/configs/users.xml b/tests/integration/test_dictionaries_dependency_xml/configs/users.xml similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/configs/users.xml rename to tests/integration/test_dictionaries_dependency_xml/configs/users.xml diff --git a/tests/integration/test_dictionaries_depend_on_dictionaries/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py similarity index 100% rename from tests/integration/test_dictionaries_depend_on_dictionaries/test.py rename to tests/integration/test_dictionaries_dependency_xml/test.py From 4d9cfe4cf2b77991a0672f8abcd5e46695f29b65 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 3 Jul 2020 22:37:10 +0300 Subject: [PATCH 220/330] Update 01355_CSV_input_format_allow_errors.sh --- .../queries/0_stateless/01355_CSV_input_format_allow_errors.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh index 300292dd08d..0bd575fad3f 100755 --- a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh +++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh @@ -42,4 +42,4 @@ cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64 echo "Return code: $?" cat "$STD_ERROR_CAPTURED" -rm "$STD_ERROR_CAPTURED" "$SAMPLE_FILE" \ No newline at end of file +rm "$STD_ERROR_CAPTURED" "$SAMPLE_FILE" From f6656029d4a5f2674f40e0f6578de8946e8cea6f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 4 Jul 2020 00:11:42 +0300 Subject: [PATCH 221/330] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23a5aa68116..3ec54e6e3ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## ClickHouse release 20.4 +## ClickHouse release 20.5 ### ClickHouse release v20.5.2.7-stable 2020-07-02 From 99443057b63c46ee0f0fdfd1a7e172c6d289ebe9 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 4 Jul 2020 00:55:00 +0300 Subject: [PATCH 222/330] Update Dockerfile --- docker/server/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 001d09520ad..4d6b37aac16 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -14,6 +14,8 @@ RUN apt-get update \ && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ && apt-get update \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get -y -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --allow-unauthenticated --yes --no-install-recommends \ clickhouse-common-static=$version \ From 08dfc140a3becc3e85d5be6f79519c5738de75ed Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 4 Jul 2020 01:45:17 +0300 Subject: [PATCH 223/330] Added a showcase of minimal Docker image --- docker/bare/Dockerfile | 2 ++ docker/bare/README.md | 35 +++++++++++++++++++++++++++++++++++ docker/bare/prepare | 21 +++++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 docker/bare/Dockerfile create mode 100644 docker/bare/README.md create mode 100755 docker/bare/prepare diff --git a/docker/bare/Dockerfile b/docker/bare/Dockerfile new file mode 100644 index 00000000000..d0ee8661cad --- /dev/null +++ b/docker/bare/Dockerfile @@ -0,0 +1,2 @@ +FROM scratch +ADD root / diff --git a/docker/bare/README.md b/docker/bare/README.md new file mode 100644 index 00000000000..e698bc654ab --- /dev/null +++ b/docker/bare/README.md @@ -0,0 +1,35 @@ +## The bare minimum ClickHouse Docker image. + +It is intented as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel. + +Example usage: + +``` +./prepare +docker build --tag clickhouse-bare . +``` + +Run clickhouse-local: +``` +docker run -it --rm --network host clickhouse-bare /clickhouse local --query "SELECT 1" +``` + +Run clickhouse-client in interactive mode: +``` +docker run -it --rm --network host clickhouse-bare /clickhouse client +``` + +Run clickhouse-server: +``` +docker run -it --rm --network host clickhouse-bare /clickhouse server +``` + +It can be also run in chroot instead of Docker (first edit the `prepare` script to enable `proc`): + +``` +sudo chroot . /clickhouse server +``` + +## What does it miss? + +- creation of `clickhouse` user to run the server; diff --git a/docker/bare/prepare b/docker/bare/prepare new file mode 100755 index 00000000000..c3002935313 --- /dev/null +++ b/docker/bare/prepare @@ -0,0 +1,21 @@ +#!/bin/bash + +set -e + +SRC_DIR=../.. +BUILD_DIR=${SRC_DIR}/build + +mkdir root +pushd root +mkdir lib lib64 etc tmp root +cp ${BUILD_DIR}/programs/clickhouse . +cp ${SRC_DIR}/programs/server/{config,users}.xml . +cp /lib/x86_64-linux-gnu/{libc.so.6,libdl.so.2,libm.so.6,libpthread.so.0,librt.so.1,libnss_dns.so.2,libresolv.so.2} lib +cp /lib64/ld-linux-x86-64.so.2 lib64 +cp /etc/resolv.conf ./etc +strip clickhouse + +# This is needed for chroot but not needed for Docker: + +# mkdir proc +# sudo mount --bind /proc proc From 5d68bc57b94571dfed56d483faa97a73632c92e5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 4 Jul 2020 01:52:15 +0300 Subject: [PATCH 224/330] Added a comment --- docker/bare/prepare | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/bare/prepare b/docker/bare/prepare index c3002935313..7d9cc4aaa94 100755 --- a/docker/bare/prepare +++ b/docker/bare/prepare @@ -5,6 +5,8 @@ set -e SRC_DIR=../.. BUILD_DIR=${SRC_DIR}/build +# BTW, .so files are acceptable from any Linux distribution for the last 12 years at least. + mkdir root pushd root mkdir lib lib64 etc tmp root From a41e5740a95347f7d69e747f032dce831615a5d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 4 Jul 2020 01:52:19 +0300 Subject: [PATCH 225/330] Added a comment --- docker/bare/prepare | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/bare/prepare b/docker/bare/prepare index 7d9cc4aaa94..10d791cac73 100755 --- a/docker/bare/prepare +++ b/docker/bare/prepare @@ -5,7 +5,8 @@ set -e SRC_DIR=../.. BUILD_DIR=${SRC_DIR}/build -# BTW, .so files are acceptable from any Linux distribution for the last 12 years at least. +# BTW, .so files are acceptable from any Linux distribution for the last 12 years (at least). +# See https://presentations.clickhouse.tech/cpp_russia_2020/ for the details. mkdir root pushd root From e922fb9f67a5873eba15cb9edbcff61519187c8b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 4 Jul 2020 02:01:33 +0300 Subject: [PATCH 226/330] Added a comment --- docker/bare/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/bare/README.md b/docker/bare/README.md index e698bc654ab..7b5ab6f5ea9 100644 --- a/docker/bare/README.md +++ b/docker/bare/README.md @@ -33,3 +33,5 @@ sudo chroot . /clickhouse server ## What does it miss? - creation of `clickhouse` user to run the server; +- VOLUME for server; +- most of the details, see other docker images for comparison. From 89034ed565f3c4aecbe770606e4147e8d96a4e57 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 4 Jul 2020 02:03:07 +0300 Subject: [PATCH 227/330] Formatting --- .../results/046_aws_lightsail_4vcpu.json | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json b/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json index 75938abc9f0..ab55813b8e8 100644 --- a/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json +++ b/website/benchmark/hardware/results/046_aws_lightsail_4vcpu.json @@ -6,49 +6,49 @@ "kind": "cloud", "result": [ -[0.002, 0.001, 0.001], -[0.046, 0.026, 0.025], -[0.156, 0.077, 0.078], -[0.746, 0.098, 0.095], -[1.383, 0.233, 0.218], -[2.161, 0.646, 0.626], -[0.041, 0.037, 0.038], -[0.032, 0.029, 0.026], -[1.494, 1.190, 1.159], -[1.843, 1.354, 1.357], -[0.841, 0.375, 0.375], -[1.254, 0.446, 0.448], -[2.235, 1.792, 1.746], -[4.175, 2.354, 2.315], -[2.602, 2.075, 2.042], -[2.258, 2.085, 2.058], -[6.402, 5.909, 5.895], -[4.178, 3.618, 3.670], -[12.978, 12.037, 11.764], -[0.754, 0.107, 0.102], -[19.615, 1.888, 1.868], -[21.740, 2.208, 2.171], -[41.009, 5.277, 5.245], -[38.068, 2.475, 2.435], -[4.739, 0.693, 0.680], -[1.766, 0.549, 0.542], -[4.730, 0.684, 0.672], -[19.010, 1.849, 1.811], -[15.999, 3.086, 3.099], -[3.655, 3.609, 3.593], -[3.967, 1.768, 1.836], -[10.566, 3.036, 2.963], -[20.065, 19.091, null], -[21.474, 8.597, 8.501], -[21.484, 8.563, 8.533], -[3.850, 3.487, 3.477], -[0.408, 0.240, 0.239], -[0.125, 0.087, 0.084], -[0.132, 0.073, 0.073], -[0.685, 0.471, 0.480], -[0.089, 0.028, 0.025], -[0.044, 0.027, 0.018], -[0.007, 0.007, 0.006] + [0.002, 0.001, 0.001], + [0.046, 0.026, 0.025], + [0.156, 0.077, 0.078], + [0.746, 0.098, 0.095], + [1.383, 0.233, 0.218], + [2.161, 0.646, 0.626], + [0.041, 0.037, 0.038], + [0.032, 0.029, 0.026], + [1.494, 1.190, 1.159], + [1.843, 1.354, 1.357], + [0.841, 0.375, 0.375], + [1.254, 0.446, 0.448], + [2.235, 1.792, 1.746], + [4.175, 2.354, 2.315], + [2.602, 2.075, 2.042], + [2.258, 2.085, 2.058], + [6.402, 5.909, 5.895], + [4.178, 3.618, 3.670], + [12.978, 12.037, 11.764], + [0.754, 0.107, 0.102], + [19.615, 1.888, 1.868], + [21.740, 2.208, 2.171], + [41.009, 5.277, 5.245], + [38.068, 2.475, 2.435], + [4.739, 0.693, 0.680], + [1.766, 0.549, 0.542], + [4.730, 0.684, 0.672], + [19.010, 1.849, 1.811], + [15.999, 3.086, 3.099], + [3.655, 3.609, 3.593], + [3.967, 1.768, 1.836], + [10.566, 3.036, 2.963], + [20.065, 19.091, null], + [21.474, 8.597, 8.501], + [21.484, 8.563, 8.533], + [3.850, 3.487, 3.477], + [0.408, 0.240, 0.239], + [0.125, 0.087, 0.084], + [0.132, 0.073, 0.073], + [0.685, 0.471, 0.480], + [0.089, 0.028, 0.025], + [0.044, 0.027, 0.018], + [0.007, 0.007, 0.006] ] } ] From 338e7723935df112ff130c0a323ab1148ac25931 Mon Sep 17 00:00:00 2001 From: "Matwey V. Kornilov" Date: Sat, 4 Jul 2020 10:21:56 +0300 Subject: [PATCH 228/330] contrib/unixodbc-cmake: Fix build when UNBUNDLED target_compile_definitions may only set INTERFACE properties on IMPORTED targets --- contrib/unixodbc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/unixodbc-cmake/CMakeLists.txt b/contrib/unixodbc-cmake/CMakeLists.txt index 6d1922075a6..658fa3329d3 100644 --- a/contrib/unixodbc-cmake/CMakeLists.txt +++ b/contrib/unixodbc-cmake/CMakeLists.txt @@ -307,7 +307,7 @@ if (ENABLE_ODBC) set_target_properties (unixodbc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_ODBC}) endif () - target_compile_definitions (unixodbc PUBLIC USE_ODBC=1) + target_compile_definitions (unixodbc INTERFACE USE_ODBC=1) message (STATUS "Using unixodbc") else () From 8dc204350fb89a4a104bd6c3e0eb999dee84d229 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 4 Jul 2020 10:35:17 +0300 Subject: [PATCH 229/330] Fix version column in replicated version collapsing merge tree (#12121) --- .../ReplicatedMergeTreeTableMetadata.cpp | 9 +++---- ...llapsing_attach_detach_zookeeper.reference | 3 +++ ...ion_collapsing_attach_detach_zookeeper.sql | 26 +++++++++++++++++++ 3 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference create mode 100644 tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.sql diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 50a234b18f6..194614cf421 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -40,13 +40,10 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr /// So rules in zookeeper metadata is following: /// - When we have only ORDER BY, than store it in "primary key:" row of /metadata /// - When we have both, than store PRIMARY KEY in "primary key:" row and ORDER BY in "sorting key:" row of /metadata - if (!metadata_snapshot->isPrimaryKeyDefined()) - primary_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); - else - { - primary_key = formattedAST(metadata_snapshot->getPrimaryKey().expression_list_ast); + + primary_key = formattedAST(metadata_snapshot->getPrimaryKey().expression_list_ast); + if (metadata_snapshot->isPrimaryKeyDefined()) sorting_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); - } data_format_version = data.format_version; diff --git a/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference new file mode 100644 index 00000000000..353c70aec11 --- /dev/null +++ b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference @@ -0,0 +1,3 @@ +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 7\nsign column: sign\nprimary key: key1, key2\ndata format version: 1\npartition key: d\ngranularity bytes: 10485760\n +1 +1 diff --git a/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.sql b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.sql new file mode 100644 index 00000000000..0086ec5c2a3 --- /dev/null +++ b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS versioned_collapsing_table; + +CREATE TABLE versioned_collapsing_table( + d Date, + key1 UInt64, + key2 UInt32, + value String, + sign Int8, + version UInt16 +) +ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/versioned_collapsing_table', '1', sign, version) +PARTITION BY d +ORDER BY (key1, key2); + +INSERT INTO versioned_collapsing_table VALUES (toDate('2019-10-10'), 1, 1, 'Hello', -1, 1); + +SELECT value FROM system.zookeeper WHERE path = '/clickhouse/versioned_collapsing_table' and name = 'metadata'; + +SELECT COUNT() FROM versioned_collapsing_table; + +DETACH TABLE versioned_collapsing_table; +ATTACH TABLE versioned_collapsing_table; + +SELECT COUNT() FROM versioned_collapsing_table; + +DROP TABLE IF EXISTS versioned_collapsing_table; From 20c8e2294271886fa587bf5ff87718cba74998cd Mon Sep 17 00:00:00 2001 From: "Matwey V. Kornilov" Date: Sat, 4 Jul 2020 13:58:53 +0300 Subject: [PATCH 230/330] poco-cmake: Fix Poco::Data::ODBC target when UNBUNDLED By default IMPORTED target has a scope in the directory in which it is created and below. This leads to the following issues when building UNBUNDLED: Target "clickhouse" links to target "Poco::Data::ODBC" but the target was not found. Perhaps a find_package() call is missing for an IMPORTED target, or an ALIAS target is missing? --- contrib/poco-cmake/Data/ODBC/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco-cmake/Data/ODBC/CMakeLists.txt b/contrib/poco-cmake/Data/ODBC/CMakeLists.txt index a0e4f83a7cc..b53b58b0d54 100644 --- a/contrib/poco-cmake/Data/ODBC/CMakeLists.txt +++ b/contrib/poco-cmake/Data/ODBC/CMakeLists.txt @@ -24,7 +24,7 @@ if (ENABLE_ODBC) target_include_directories (_poco_data_odbc SYSTEM PUBLIC ${LIBRARY_DIR}/Data/ODBC/include) target_link_libraries (_poco_data_odbc PUBLIC Poco::Data unixodbc) else () - add_library (Poco::Data::ODBC UNKNOWN IMPORTED) + add_library (Poco::Data::ODBC UNKNOWN IMPORTED GLOBAL) find_library(LIBRARY_POCO_DATA_ODBC PocoDataODBC) find_path(INCLUDE_POCO_DATA_ODBC Poco/Data/ODBC/ODBC.h) From caafbe19a40364c3b776c071e8778973329274e3 Mon Sep 17 00:00:00 2001 From: "Matwey V. Kornilov" Date: Sat, 4 Jul 2020 15:32:02 +0300 Subject: [PATCH 231/330] Add missed include in ProxyListConfiguration --- src/Disks/S3/ProxyListConfiguration.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Disks/S3/ProxyListConfiguration.h b/src/Disks/S3/ProxyListConfiguration.h index a3fe83bfc49..14e23eb04d0 100644 --- a/src/Disks/S3/ProxyListConfiguration.h +++ b/src/Disks/S3/ProxyListConfiguration.h @@ -1,5 +1,7 @@ #pragma once +#include // for std::atomic + #include "ProxyConfiguration.h" namespace DB::S3 From 73676f5022a9f7d06f9fc046bd12ce527179a7be Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 4 Jul 2020 15:48:51 +0300 Subject: [PATCH 232/330] Improve performace of reading in order of sorting key. (#11696) * simplify reading in order of sorting key * add perf test for reading many parts * Revert "simplify reading in order of sorting key" This reverts commit 7267d7c46ee65a8fcd342a8ccd158a4144a46fdb. * add threshold for preliminary merge for reading in order * better threshold * limit threads in test --- src/Core/Settings.h | 1 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +++-- .../performance/read_in_order_many_parts.xml | 32 +++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 tests/performance/read_in_order_many_parts.xml diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0c20a3c6040..9e8f96aa520 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -336,6 +336,7 @@ struct Settings : public SettingsCollection M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \ M(SettingBool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \ M(SettingBool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \ + M(SettingUInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \ M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.", 0) \ M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql' and 'odbc' table functions.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index fefd9dc6e15..69e819a3cf5 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -968,6 +968,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( }; const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; + bool need_preliminary_merge = (parts.size() > settings.read_in_order_two_level_merge_threshold); for (size_t i = 0; i < num_streams && !parts.empty(); ++i) { @@ -1069,7 +1070,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( } } - if (pipes.size() > 1) + if (pipes.size() > 1 && need_preliminary_merge) { SortDescription sort_description; for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) @@ -1087,7 +1088,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( res.emplace_back(std::move(pipes), std::move(merging_sorted)); } else - res.emplace_back(std::move(pipes.front())); + { + for (auto && pipe : pipes) + res.emplace_back(std::move(pipe)); + } } return res; diff --git a/tests/performance/read_in_order_many_parts.xml b/tests/performance/read_in_order_many_parts.xml new file mode 100644 index 00000000000..bb6004d061e --- /dev/null +++ b/tests/performance/read_in_order_many_parts.xml @@ -0,0 +1,32 @@ + + + 1 + 1 + 200 + 8 + + + + + table + + mt_20_parts + mt_200_parts + + + + + CREATE TABLE mt_20_parts(id UInt32, val1 UInt32, val2 UInt32) ENGINE = MergeTree ORDER BY val1 PARTITION BY id % 20 + CREATE TABLE mt_200_parts(id UInt32, val1 UInt32, val2 UInt32) ENGINE = MergeTree ORDER BY val1 PARTITION BY id % 200 + + INSERT INTO mt_20_parts SELECT number, rand() % 10000, rand() FROM numbers_mt(100000000) + INSERT INTO mt_200_parts SELECT number, rand() % 10000, rand() FROM numbers_mt(100000000) + OPTIMIZE TABLE mt_20_parts FINAL + OPTIMIZE TABLE mt_200_parts FINAL + + SELECT val2 FROM {table} ORDER BY val1 LIMIT 100 FORMAT Null + SELECT val2 FROM {table} ORDER BY val1 LIMIT 100000 FORMAT Null + SELECT sum(val2) FROM {table} GROUP BY val1 FORMAT Null + + DROP TABLE IF EXISTS {table} + From e6aacaac5c69bf682e24c2521033f6f97bc3fbec Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 4 Jul 2020 16:40:32 +0300 Subject: [PATCH 233/330] Do not enable sentry if ENABLE_LIBRARIES is not set --- cmake/find/sentry.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index f202c9100a8..84425220f12 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -6,7 +6,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") endif () if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT_UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) - option (USE_SENTRY "Use Sentry" ON) + option (USE_SENTRY "Use Sentry" ${ENABLE_LIBRARIES}) set (SENTRY_TRANSPORT "curl" CACHE STRING "") set (SENTRY_BACKEND "none" CACHE STRING "") set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") From 9258368de28d12348dc732adea392357aec1dff3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 4 Jul 2020 16:48:25 +0300 Subject: [PATCH 234/330] Fail if curl library was enabled and was not found in case of unbundled build --- contrib/curl-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 9edec1633c0..3c3226cae9e 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -180,7 +180,7 @@ if (ENABLE_CURL) set (CURL_VERSION_STRING 7.67.0 CACHE STRING "") add_library (CURL::libcurl ALIAS ${CURL_LIBRARY}) else () - find_package (CURL) + find_package (CURL REQUIRED) endif () endif () From 176a7f2f722d86d0c27f10d1bfd56e3b9c0bbefe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 4 Jul 2020 16:54:24 +0300 Subject: [PATCH 235/330] Normalize "pid" file handling #3501 --- base/daemon/BaseDaemon.cpp | 59 +--------------------------- base/daemon/BaseDaemon.h | 12 +----- programs/copier/ClusterCopierApp.cpp | 4 +- programs/copier/Internals.h | 1 - programs/local/LocalServer.cpp | 2 +- programs/server/Server.cpp | 2 +- src/Common/StatusFile.cpp | 26 +++++++----- src/Common/StatusFile.h | 12 +++++- 8 files changed, 36 insertions(+), 82 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index e7ccf84d7da..711bbd0290a 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -628,7 +628,7 @@ void BaseDaemon::initialize(Application & self) /// Create pid file. if (config().has("pid")) - pid.emplace(config().getString("pid")); + pid.emplace(config().getString("pid"), DB::StatusFile::write_pid); /// Change path for logging. if (!log_path.empty()) @@ -812,63 +812,6 @@ void BaseDaemon::defineOptions(Poco::Util::OptionSet & new_options) Poco::Util::ServerApplication::defineOptions(new_options); } -bool isPidRunning(pid_t pid) -{ - return getpgid(pid) >= 0; -} - -BaseDaemon::PID::PID(const std::string & file_) -{ - file = Poco::Path(file_).absolute().toString(); - Poco::File poco_file(file); - - if (poco_file.exists()) - { - pid_t pid_read = 0; - { - std::ifstream in(file); - if (in.good()) - { - in >> pid_read; - if (pid_read && isPidRunning(pid_read)) - throw Poco::Exception("Pid file exists and program running with pid = " + std::to_string(pid_read) + ", should not start daemon."); - } - } - std::cerr << "Old pid file exists (with pid = " << pid_read << "), removing." << std::endl; - poco_file.remove(); - } - - int fd = open(file.c_str(), - O_CREAT | O_EXCL | O_WRONLY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); - - if (-1 == fd) - { - if (EEXIST == errno) - throw Poco::Exception("Pid file exists, should not start daemon."); - throw Poco::CreateFileException("Cannot create pid file."); - } - - SCOPE_EXIT({ close(fd); }); - - std::stringstream s; - s << getpid(); - if (static_cast(s.str().size()) != write(fd, s.str().c_str(), s.str().size())) - throw Poco::Exception("Cannot write to pid file."); -} - -BaseDaemon::PID::~PID() -{ - try - { - Poco::File(file).remove(); - } - catch (...) - { - DB::tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - void BaseDaemon::handleSignal(int signal_id) { if (signal_id == SIGINT || diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 2a3262dd26f..41d4ad58869 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -163,16 +164,7 @@ protected: std::unique_ptr task_manager; - /// RAII wrapper for pid file. - struct PID - { - std::string file; - - PID(const std::string & file_); - ~PID(); - }; - - std::optional pid; + std::optional pid; std::atomic_bool is_cancelled{false}; diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index ce4bf94589e..e702d2f6353 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -1,4 +1,6 @@ #include "ClusterCopierApp.h" +#include + namespace DB { @@ -91,7 +93,7 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options) void ClusterCopierApp::mainImpl() { - StatusFile status_file(process_path + "/status"); + StatusFile status_file(process_path + "/status", StatusFile::write_full_info); ThreadStatus thread_status; auto * log = &logger(); diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index 8e4f7afeb6e..b1a94e1a5ca 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -66,7 +66,6 @@ #include #include #include -#include #include "Aliases.h" diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 8641287c3ec..c19495a0bb0 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -248,7 +248,7 @@ try if (!context->getPath().empty()) { /// Lock path directory before read - status.emplace(context->getPath() + "status"); + status.emplace(context->getPath() + "status", StatusFile::write_full_info); LOG_DEBUG(log, "Loading metadata from {}", context->getPath()); loadMetadataSystem(*context); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8789a3b7416..037996ae43a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -378,7 +378,7 @@ int Server::main(const std::vector & /*args*/) global_context->setPath(path); - StatusFile status{path + "status"}; + StatusFile status{path + "status", StatusFile::write_full_info}; SCOPE_EXIT({ /** Ask to cancel background jobs all table engines, diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index d228fdb42b6..4dd9c2544ed 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -30,8 +30,21 @@ namespace ErrorCodes } -StatusFile::StatusFile(const std::string & path_) - : path(path_) +StatusFile::FillFunction StatusFile::write_pid = [](WriteBuffer & out) +{ + out << getpid() << "\n"; +}; + +StatusFile::FillFunction StatusFile::write_full_info = [](WriteBuffer & out) +{ + out << "PID: " << getpid() << "\n" + << "Started at: " << LocalDateTime(time(nullptr)) << "\n" + << "Revision: " << ClickHouseRevision::get() << "\n"; +}; + + +StatusFile::StatusFile(std::string path_, FillFunction fill_) + : path(std::move(path_)), fill(std::move(fill_)) { /// If file already exists. NOTE Minor race condition. if (Poco::File(path).exists()) @@ -72,13 +85,8 @@ StatusFile::StatusFile(const std::string & path_) throwFromErrnoWithPath("Cannot lseek " + path, path, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); /// Write information about current server instance to the file. - { - WriteBufferFromFileDescriptor out(fd, 1024); - out - << "PID: " << getpid() << "\n" - << "Started at: " << LocalDateTime(time(nullptr)) << "\n" - << "Revision: " << ClickHouseRevision::get() << "\n"; - } + WriteBufferFromFileDescriptor out(fd, 1024); + fill(out); } catch (...) { diff --git a/src/Common/StatusFile.h b/src/Common/StatusFile.h index 0dde3e3d16f..5115e54428f 100644 --- a/src/Common/StatusFile.h +++ b/src/Common/StatusFile.h @@ -1,23 +1,33 @@ #pragma once #include +#include #include namespace DB { +class WriteBuffer; + /** Provides that no more than one server works with one data directory. */ class StatusFile : private boost::noncopyable { public: - explicit StatusFile(const std::string & path_); + using FillFunction = std::function; + + StatusFile(std::string path_, FillFunction fill_); ~StatusFile(); + /// You can use one of these functions to fill the file or provide your own. + static FillFunction write_pid; + static FillFunction write_full_info; + private: const std::string path; + FillFunction fill; int fd = -1; }; From d9ea8cfc42ac7b914b9760e9872d2454cfa50783 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 4 Jul 2020 16:57:04 +0300 Subject: [PATCH 236/330] Update StatusFile.cpp --- src/Common/StatusFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index 4dd9c2544ed..3766908f9cd 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes StatusFile::FillFunction StatusFile::write_pid = [](WriteBuffer & out) { - out << getpid() << "\n"; + out << getpid(); }; StatusFile::FillFunction StatusFile::write_full_info = [](WriteBuffer & out) From d387fa719eed731eb3dda1bca2cc365e3eb07d23 Mon Sep 17 00:00:00 2001 From: vivarum Date: Sat, 4 Jul 2020 21:54:42 +0300 Subject: [PATCH 237/330] New ISO8601 year modificators for formatDateTime --- src/Functions/formatDateTime.cpp | 22 +++++++++++++++++++ ..._modificators_for_formatDateTime.reference | 10 +++++++++ ...1_week_modificators_for_formatDateTime.sql | 10 +++++++++ 3 files changed, 42 insertions(+) create mode 100644 tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.reference create mode 100644 tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.sql diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 080c1108deb..66130fe2618 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -188,6 +188,16 @@ private: writeNumber2(target, ToISOWeekImpl::execute(source, timezone)); } + static void ISO8601Year2(char * target, Time source, const DateLUTImpl & timezone) + { + writeNumber2(target, ToISOYearImpl::execute(source, timezone) % 100); + } + + static void ISO8601Year4(char * target, Time source, const DateLUTImpl & timezone) + { + writeNumber4(target, ToISOYearImpl::execute(source, timezone)); + } + static void year2(char * target, Time source, const DateLUTImpl & timezone) { writeNumber2(target, ToYearImpl::execute(source, timezone) % 100); @@ -459,6 +469,18 @@ public: result.append("0000-00-00"); break; + // Last two digits of year of ISO 8601 week number (see %G) + case 'g': + instructions.emplace_back(&Action::ISO8601Year2, 2); + result.append("00"); + break; + + // Year of ISO 8601 week number (see %V) + case 'G': + instructions.emplace_back(&Action::ISO8601Year4, 4); + result.append("0000"); + break; + // Day of the year (001-366) 235 case 'j': instructions.emplace_back(&Action::dayOfYear, 3); diff --git a/tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.reference b/tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.reference new file mode 100644 index 00000000000..7ac92da1e7d --- /dev/null +++ b/tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.reference @@ -0,0 +1,10 @@ +2009 +09 +2009 +09 +2010 +10 +2019 +19 +2019 +19 diff --git a/tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.sql b/tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.sql new file mode 100644 index 00000000000..0a7cc047c1f --- /dev/null +++ b/tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.sql @@ -0,0 +1,10 @@ +SELECT formatDateTime(toDate('2010-01-01'), '%G'); -- Friday (first day of the year) attributed to week 53 of the previous year (2009) +SELECT formatDateTime(toDate('2010-01-01'), '%g'); +SELECT formatDateTime(toDate('2010-01-03'), '%G'); -- Sunday, last day attributed to week 53 of the previous year (2009) +SELECT formatDateTime(toDate('2010-01-03'), '%g'); +SELECT formatDateTime(toDate('2010-01-04'), '%G'); -- Monday, first day in the year attributed to week 01 of the current year (2010) +SELECT formatDateTime(toDate('2010-01-04'), '%g'); +SELECT formatDateTime(toDate('2018-12-31'), '%G'); -- Monday (last day of the year) attributed to 01 week of next year (2019) +SELECT formatDateTime(toDate('2018-12-31'), '%g'); +SELECT formatDateTime(toDate('2019-01-01'), '%G'); -- Tuesday (first day of the year) attributed to 01 week of this year (2019) +SELECT formatDateTime(toDate('2019-01-01'), '%g'); From d10856cf07831135fa451926678b36e5922a1fa9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 00:44:29 +0300 Subject: [PATCH 238/330] Added a test --- .../0_stateless/01358_constexpr_constraint.reference | 0 .../0_stateless/01358_constexpr_constraint.sql | 12 ++++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/01358_constexpr_constraint.reference create mode 100644 tests/queries/0_stateless/01358_constexpr_constraint.sql diff --git a/tests/queries/0_stateless/01358_constexpr_constraint.reference b/tests/queries/0_stateless/01358_constexpr_constraint.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01358_constexpr_constraint.sql b/tests/queries/0_stateless/01358_constexpr_constraint.sql new file mode 100644 index 00000000000..799f6f32259 --- /dev/null +++ b/tests/queries/0_stateless/01358_constexpr_constraint.sql @@ -0,0 +1,12 @@ +CREATE TEMPORARY TABLE constrained +( + `URL` String, + CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = domainWithoutWWW(URL), + CONSTRAINT is_utf8 CHECK isValidUTF8(URL) +); + +insert into constrained values ('a'); + +DROP TEMPORARY TABLE constrained; +CREATE TEMPORARY TABLE constrained (x UInt8, CONSTRAINT bogus CHECK 0); +INSERT INTO constrained VALUES (1); -- { serverError 469 } From f91e34b2e80001f0c3b2eec55d65ee8c5aa3431f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 5 Jul 2020 00:56:48 +0300 Subject: [PATCH 239/330] Include libcurl4-openssl-dev into yandex/clickhouse-deb-builder --- docker/packager/deb/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 4f1be197668..48e90d16f5d 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -68,6 +68,7 @@ RUN apt-get --allow-unauthenticated update -y \ libre2-dev \ libjemalloc-dev \ libmsgpack-dev \ + libcurl4-openssl-dev \ opencl-headers \ ocl-icd-libopencl1 \ intel-opencl-icd \ From 731d92657d0412d4fb8c15e6005002838e7f3530 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 00:57:57 +0300 Subject: [PATCH 240/330] Remove harmful code from "geoDistance" #12117 --- src/Functions/greatCircleDistance.cpp | 2 -- src/Interpreters/InterpreterCreateQuery.h | 3 ++- tests/queries/0_stateless/01359_geodistance_loop.reference | 1 + tests/queries/0_stateless/01359_geodistance_loop.sql | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01359_geodistance_loop.reference create mode 100644 tests/queries/0_stateless/01359_geodistance_loop.sql diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index bff92d7738d..5707326c60a 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -102,8 +102,6 @@ void geodistInit() inline float geodistDegDiff(float f) { f = fabsf(f); - while (f > 360) - f -= 360; if (f > 180) f = 360 - f; return f; diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index cf27f68ad73..30f18aa4134 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -45,7 +45,8 @@ public: internal = internal_; } - /// Obtain information about columns, their types, default values and column comments, for case when columns in CREATE query is specified explicitly. + /// Obtain information about columns, their types, default values and column comments, + /// for case when columns in CREATE query is specified explicitly. static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, const Context & context, bool sanity_check_compression_codecs); static ConstraintsDescription getConstraintsDescription(const ASTExpressionList * constraints); diff --git a/tests/queries/0_stateless/01359_geodistance_loop.reference b/tests/queries/0_stateless/01359_geodistance_loop.reference new file mode 100644 index 00000000000..8484d062f57 --- /dev/null +++ b/tests/queries/0_stateless/01359_geodistance_loop.reference @@ -0,0 +1 @@ +inf diff --git a/tests/queries/0_stateless/01359_geodistance_loop.sql b/tests/queries/0_stateless/01359_geodistance_loop.sql new file mode 100644 index 00000000000..4c555a2538e --- /dev/null +++ b/tests/queries/0_stateless/01359_geodistance_loop.sql @@ -0,0 +1 @@ +SELECT geoDistance(0., 0., -inf, 1.); From f8a43d5e07a4ee60edde669b610eeda54840d18a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 5 Jul 2020 01:00:30 +0300 Subject: [PATCH 241/330] Update formatDateTime.cpp --- src/Functions/formatDateTime.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 66130fe2618..ad69d07c337 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -188,12 +188,12 @@ private: writeNumber2(target, ToISOWeekImpl::execute(source, timezone)); } - static void ISO8601Year2(char * target, Time source, const DateLUTImpl & timezone) + static void ISO8601Year2(char * target, Time source, const DateLUTImpl & timezone) // NOLINT { writeNumber2(target, ToISOYearImpl::execute(source, timezone) % 100); } - static void ISO8601Year4(char * target, Time source, const DateLUTImpl & timezone) + static void ISO8601Year4(char * target, Time source, const DateLUTImpl & timezone) // NOLINT { writeNumber4(target, ToISOYearImpl::execute(source, timezone)); } From 96e8e93c6485e9f9d89038d29dc04771e481f10c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 02:16:16 +0300 Subject: [PATCH 242/330] Fix warnings from CodeQL --- base/common/CMakeLists.txt | 4 +--- base/ext/chrono_io.h | 7 ++----- src/AggregateFunctions/AggregateFunctionAvg.h | 2 ++ src/AggregateFunctions/AggregateFunctionAvgWeighted.h | 2 +- src/Disks/S3/registerDiskS3.cpp | 3 ++- src/Functions/GeoHash.cpp | 1 + src/Functions/PolygonUtils.h | 2 +- src/Functions/array/arraySum.cpp | 4 +++- 8 files changed, 13 insertions(+), 12 deletions(-) diff --git a/base/common/CMakeLists.txt b/base/common/CMakeLists.txt index 074f73b158b..f09335f0ca0 100644 --- a/base/common/CMakeLists.txt +++ b/base/common/CMakeLists.txt @@ -77,10 +77,8 @@ target_link_libraries (common Poco::Util Poco::Foundation replxx - fmt - - PRIVATE cctz + fmt ) if (ENABLE_TESTS) diff --git a/base/ext/chrono_io.h b/base/ext/chrono_io.h index 392ec25d526..0b1c47d3874 100644 --- a/base/ext/chrono_io.h +++ b/base/ext/chrono_io.h @@ -1,19 +1,16 @@ #pragma once #include -#include #include -#include #include +#include namespace ext { inline std::string to_string(const std::time_t & time) { - std::stringstream ss; - ss << std::put_time(std::localtime(&time), "%Y-%m-%d %X"); - return ss.str(); + return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone()); } template diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index 1f3426160cb..95b4836c336 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -19,6 +19,8 @@ namespace ErrorCodes template struct AggregateFunctionAvgData { + using NumeratorType = T; + T numerator = 0; Denominator denominator = 0; diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h index a3d3b9958db..8eb619585c7 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h @@ -16,7 +16,7 @@ public: const auto & values = static_cast(*columns[0]); const auto & weights = static_cast(*columns[1]); - this->data(place).numerator += values.getData()[row_num] * weights.getData()[row_num]; + this->data(place).numerator += static_cast(values.getData()[row_num]) * weights.getData()[row_num]; this->data(place).denominator += weights.getData()[row_num]; } diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 999a81bd413..119ba037c96 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -46,7 +46,8 @@ namespace throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS); auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port"); - LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", endpoint.toString(), proxy_scheme, proxy_port); + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}", + endpoint.toString(), proxy_scheme, proxy_port); return std::make_shared(endpoint, proxy_scheme, proxy_port); } diff --git a/src/Functions/GeoHash.cpp b/src/Functions/GeoHash.cpp index 7fdeb52b15c..679c0b87975 100644 --- a/src/Functions/GeoHash.cpp +++ b/src/Functions/GeoHash.cpp @@ -115,6 +115,7 @@ inline Encoded merge(const Encoded & encodedLon, const Encoded & encodedLat, uin result.fill(0); const auto bits = (precision * BITS_PER_SYMBOL) / 2; + assert(bits <= 255); uint8_t i = 0; for (; i < bits; ++i) { diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 6e1b03a47bd..d2843e3ec6a 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -315,7 +315,7 @@ void PointInPolygonWithGrid::buildGrid() if (has_empty_bound) return; - cells.assign(grid_size * grid_size, {}); + cells.assign(size_t(grid_size) * grid_size, {}); const Point & min_corner = box.min_corner(); diff --git a/src/Functions/array/arraySum.cpp b/src/Functions/array/arraySum.cpp index 1c9a4853a16..42cded14689 100644 --- a/src/Functions/array/arraySum.cpp +++ b/src/Functions/array/arraySum.cpp @@ -51,6 +51,7 @@ struct ArraySumImpl const ColVecType * column = checkAndGetColumn(&*mapped); + /// Constant case. if (!column) { const ColumnConst * column_const = checkAndGetColumnConst(&*mapped); @@ -75,7 +76,8 @@ struct ArraySumImpl size_t pos = 0; for (size_t i = 0; i < offsets.size(); ++i) { - res[i] = x * (offsets[i] - pos); + /// Just multiply the value by array size. + res[i] = Result(x) * (offsets[i] - pos); pos = offsets[i]; } From 55f9ddbeb4455adff4eb991bc06e9353bb068ac9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 02:39:13 +0300 Subject: [PATCH 243/330] Added a test --- tests/queries/0_stateless/01359_codeql.reference | 1 + tests/queries/0_stateless/01359_codeql.sql | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/01359_codeql.reference create mode 100644 tests/queries/0_stateless/01359_codeql.sql diff --git a/tests/queries/0_stateless/01359_codeql.reference b/tests/queries/0_stateless/01359_codeql.reference new file mode 100644 index 00000000000..8b68939b011 --- /dev/null +++ b/tests/queries/0_stateless/01359_codeql.reference @@ -0,0 +1 @@ +4294967291 diff --git a/tests/queries/0_stateless/01359_codeql.sql b/tests/queries/0_stateless/01359_codeql.sql new file mode 100644 index 00000000000..9f68661eef5 --- /dev/null +++ b/tests/queries/0_stateless/01359_codeql.sql @@ -0,0 +1,2 @@ +-- In previous ClickHouse versions, the multiplications was made in a wrong type leading to overflow. +SELECT round(avgWeighted(x, y)) FROM (SELECT 0xFFFFFFFF AS x, 1000000000 AS y UNION ALL SELECT 1 AS x, 1 AS y); From d2e8c0df795af017916fe839c8be8fc489f89f2c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 02:39:27 +0300 Subject: [PATCH 244/330] Minor modification --- src/Functions/array/arraySum.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arraySum.cpp b/src/Functions/array/arraySum.cpp index 42cded14689..34e3cafb390 100644 --- a/src/Functions/array/arraySum.cpp +++ b/src/Functions/array/arraySum.cpp @@ -59,7 +59,7 @@ struct ArraySumImpl if (!column_const) return false; - const Element x = column_const->template getValue(); + const Result x = column_const->template getValue(); typename ColVecResult::MutablePtr res_column; if constexpr (IsDecimalNumber) @@ -77,7 +77,7 @@ struct ArraySumImpl for (size_t i = 0; i < offsets.size(); ++i) { /// Just multiply the value by array size. - res[i] = Result(x) * (offsets[i] - pos); + res[i] = x * (offsets[i] - pos); pos = offsets[i]; } From c5cf7199d8333d9b80bb3575b29d24d985c2df87 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 02:58:10 +0300 Subject: [PATCH 245/330] Update libdivide to the latest master --- contrib/libdivide/libdivide.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/contrib/libdivide/libdivide.h b/contrib/libdivide/libdivide.h index a153e7f9c5e..84adbf0db19 100644 --- a/contrib/libdivide/libdivide.h +++ b/contrib/libdivide/libdivide.h @@ -290,10 +290,17 @@ static inline int32_t libdivide_count_leading_zeros32(uint32_t val) { } return 0; #else - int32_t result = 0; - uint32_t hi = 1U << 31; - for (; ~val & hi; hi >>= 1) { - result++; + if (val == 0) + return 32; + int32_t result = 8; + uint32_t hi = 0xFFU << 24; + while ((val & hi) == 0) { + hi >>= 8; + result += 8; + } + while (val & hi) { + result -= 1; + hi <<= 1; } return result; #endif From 351eda5d8a7ba95e4437497aff9cf184b771cd86 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 03:02:50 +0300 Subject: [PATCH 246/330] Replace exit to abort in libdivide --- contrib/libdivide/libdivide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/libdivide/libdivide.h b/contrib/libdivide/libdivide.h index 84adbf0db19..81057b7b43d 100644 --- a/contrib/libdivide/libdivide.h +++ b/contrib/libdivide/libdivide.h @@ -76,7 +76,7 @@ do { \ fprintf(stderr, "libdivide.h:%d: %s(): Error: %s\n", \ __LINE__, LIBDIVIDE_FUNCTION, msg); \ - exit(-1); \ + abort(); \ } while (0) #if defined(LIBDIVIDE_ASSERTIONS_ON) @@ -85,7 +85,7 @@ if (!(x)) { \ fprintf(stderr, "libdivide.h:%d: %s(): Assertion failed: %s\n", \ __LINE__, LIBDIVIDE_FUNCTION, #x); \ - exit(-1); \ + abort(); \ } \ } while (0) #else From 73a5c3839866179ac7cd606751634b995eadc52f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 03:28:08 +0300 Subject: [PATCH 247/330] Fix potential overflow in integer division #12119 --- src/Functions/intDiv.cpp | 17 ++++++++++++++--- src/Functions/modulo.cpp | 16 +++++++++++++--- src/Interpreters/createBlockSelector.cpp | 2 ++ .../DistributedBlockOutputStream.cpp | 1 - 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp index d21e779045f..7e34f106147 100644 --- a/src/Functions/intDiv.cpp +++ b/src/Functions/intDiv.cpp @@ -26,12 +26,11 @@ struct DivideIntegralByConstantImpl static NO_INLINE void vectorConstant(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size) { - if (unlikely(b == 0)) - throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); - #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" + /// Division by -1. By the way, we avoid FPE by division of the largest negative number by -1. + /// And signed integer overflow is well defined in C++20. if (unlikely(is_signed_v && b == -1)) { for (size_t i = 0; i < size; ++i) @@ -39,8 +38,20 @@ struct DivideIntegralByConstantImpl return; } + /// Division with too large divisor. + if (unlikely(b > std::numeric_limits::max() + || (std::is_signed_v && std::is_signed_v && b < std::numeric_limits::lowest()))) + { + for (size_t i = 0; i < size; ++i) + c_pos[i] = 0; + return; + } + #pragma GCC diagnostic pop + if (unlikely(static_cast(b) == 0)) + throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); + libdivide::divider divider(b); const A * a_end = a_pos + size; diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index 631b7d12263..c90a590da61 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -27,12 +27,10 @@ struct ModuloByConstantImpl static NO_INLINE void vectorConstant(const A * __restrict src, B b, ResultType * __restrict dst, size_t size) { - if (unlikely(b == 0)) - throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); - #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" + /// Modulo with too small divisor. if (unlikely((std::is_signed_v && b == -1) || b == 1)) { for (size_t i = 0; i < size; ++i) @@ -40,8 +38,20 @@ struct ModuloByConstantImpl return; } + /// Modulo with too large divisor. + if (unlikely(b > std::numeric_limits::max() + || (std::is_signed_v && std::is_signed_v && b < std::numeric_limits::lowest()))) + { + for (size_t i = 0; i < size; ++i) + dst[i] = src[i]; + return; + } + #pragma GCC diagnostic pop + if (unlikely(static_cast(b) == 0)) + throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); + libdivide::divider divider(b); /// Here we failed to make the SSE variant from libdivide give an advantage. diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index 0759b9d9601..c3d4bcdda48 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -21,6 +21,8 @@ IColumn::Selector createBlockSelector( const std::vector & slots) { const auto total_weight = slots.size(); + assert(total_weight != 0); + size_t num_rows = column.size(); IColumn::Selector selector(num_rows); diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index bf9efef1ba6..546eb5a15a4 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include From 63a88de1618be3104b50a270c8eb8b09f354dd73 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 03:28:48 +0300 Subject: [PATCH 248/330] Added a test --- tests/queries/0_stateless/01360_division_overflow.reference | 6 ++++++ tests/queries/0_stateless/01360_division_overflow.sql | 5 +++++ 2 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/01360_division_overflow.reference create mode 100644 tests/queries/0_stateless/01360_division_overflow.sql diff --git a/tests/queries/0_stateless/01360_division_overflow.reference b/tests/queries/0_stateless/01360_division_overflow.reference new file mode 100644 index 00000000000..a4acafeee60 --- /dev/null +++ b/tests/queries/0_stateless/01360_division_overflow.reference @@ -0,0 +1,6 @@ +0 +0 +0 +1 +3 +5 diff --git a/tests/queries/0_stateless/01360_division_overflow.sql b/tests/queries/0_stateless/01360_division_overflow.sql new file mode 100644 index 00000000000..75601a36536 --- /dev/null +++ b/tests/queries/0_stateless/01360_division_overflow.sql @@ -0,0 +1,5 @@ +select intDiv(materialize(toInt32(1)), 0x100000000); +select intDiv(materialize(toInt32(1)), -0x100000000); +select intDiv(materialize(toInt32(1)), -9223372036854775808); +select materialize(toInt32(1)) % -9223372036854775808; +select value % -9223372036854775808 from (select toInt32(arrayJoin([3, 5])) value); From 6e1c1b0dbb6c7913142be6074ac2689ba4173f04 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 03:35:57 +0300 Subject: [PATCH 249/330] Whitespace --- src/Interpreters/Cluster.cpp | 5 +++-- src/Interpreters/ClusterProxy/SelectStreamFactory.cpp | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index c01d0188e5c..4ee92e9fa89 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -72,8 +72,9 @@ bool Cluster::Address::isLocal(UInt16 clickhouse_port) const } -Cluster::Address::Address(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, UInt32 shard_index_, UInt32 replica_index_) : - shard_index(shard_index_), replica_index(replica_index_) +Cluster::Address::Address( + const Poco::Util::AbstractConfiguration & config, const String & config_prefix, UInt32 shard_index_, UInt32 replica_index_) + : shard_index(shard_index_), replica_index(replica_index_) { host_name = config.getString(config_prefix + ".host"); port = static_cast(config.getInt(config_prefix + ".port")); diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index bfa6fae0977..51f7e93552a 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -170,7 +170,9 @@ void SelectStreamFactory::createForShard( ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable); if (shard_info.hasRemoteConnections()) { - LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), "There is no table {} on local replica of shard {}, will try remote replicas.", main_table.getNameForLogs(), shard_info.shard_num); + LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), + "There is no table {} on local replica of shard {}, will try remote replicas.", + main_table.getNameForLogs(), shard_info.shard_num); emplace_remote_stream(); } else @@ -254,7 +256,8 @@ void SelectStreamFactory::createForShard( catch (const Exception & ex) { if (ex.code() == ErrorCodes::ALL_CONNECTION_TRIES_FAILED) - LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), "Connections to remote replicas of local shard {} failed, will use stale local replica", shard_num); + LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), + "Connections to remote replicas of local shard {} failed, will use stale local replica", shard_num); else throw; } From 66e810fc23b261873fc30948d8ea1a28f11892b2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 03:44:26 +0300 Subject: [PATCH 250/330] Added a test for #4211 --- .../0_stateless/01361_failover_remote_num_tries.reference | 1 + .../queries/0_stateless/01361_failover_remote_num_tries.sh | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/01361_failover_remote_num_tries.reference create mode 100755 tests/queries/0_stateless/01361_failover_remote_num_tries.sh diff --git a/tests/queries/0_stateless/01361_failover_remote_num_tries.reference b/tests/queries/0_stateless/01361_failover_remote_num_tries.reference new file mode 100644 index 00000000000..64bb6b746dc --- /dev/null +++ b/tests/queries/0_stateless/01361_failover_remote_num_tries.reference @@ -0,0 +1 @@ +30 diff --git a/tests/queries/0_stateless/01361_failover_remote_num_tries.sh b/tests/queries/0_stateless/01361_failover_remote_num_tries.sh new file mode 100755 index 00000000000..eb30a9b10ee --- /dev/null +++ b/tests/queries/0_stateless/01361_failover_remote_num_tries.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o 'connect timed out' | wc -l From 10c40f4b11efe53a5d5a80f7e83802350ea692f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 03:48:00 +0300 Subject: [PATCH 251/330] Attempt to fix "Arcadia" build --- base/common/ya.make | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/common/ya.make b/base/common/ya.make index d40b1f5abfd..b64ab93f2fc 100644 --- a/base/common/ya.make +++ b/base/common/ya.make @@ -2,7 +2,7 @@ LIBRARY() ADDINCL( GLOBAL clickhouse/base - contrib/libs/cctz/include + GLOBAL contrib/libs/cctz/include ) CFLAGS (GLOBAL -DARCADIA_BUILD) From e83df36bc076e6b173d59ec1a88da60d9bee5c87 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jul 2020 04:34:45 +0300 Subject: [PATCH 252/330] Rename test --- ...num_tries.reference => 01361_fover_remote_num_tries.reference} | 0 ...ilover_remote_num_tries.sh => 01361_fover_remote_num_tries.sh} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{01361_failover_remote_num_tries.reference => 01361_fover_remote_num_tries.reference} (100%) rename tests/queries/0_stateless/{01361_failover_remote_num_tries.sh => 01361_fover_remote_num_tries.sh} (100%) diff --git a/tests/queries/0_stateless/01361_failover_remote_num_tries.reference b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference similarity index 100% rename from tests/queries/0_stateless/01361_failover_remote_num_tries.reference rename to tests/queries/0_stateless/01361_fover_remote_num_tries.reference diff --git a/tests/queries/0_stateless/01361_failover_remote_num_tries.sh b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh similarity index 100% rename from tests/queries/0_stateless/01361_failover_remote_num_tries.sh rename to tests/queries/0_stateless/01361_fover_remote_num_tries.sh From 32a5dcd4834ce040e5cb16466056eef8248120f1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 5 Jul 2020 06:48:49 +0300 Subject: [PATCH 253/330] Update arraySum.cpp --- src/Functions/array/arraySum.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arraySum.cpp b/src/Functions/array/arraySum.cpp index 34e3cafb390..1aedcb6ef92 100644 --- a/src/Functions/array/arraySum.cpp +++ b/src/Functions/array/arraySum.cpp @@ -59,7 +59,7 @@ struct ArraySumImpl if (!column_const) return false; - const Result x = column_const->template getValue(); + const Result x = column_const->template getValue(); // NOLINT typename ColVecResult::MutablePtr res_column; if constexpr (IsDecimalNumber) From 307c3c92a586e0cde9f5522607bb5d951df32103 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 5 Jul 2020 12:41:58 +0300 Subject: [PATCH 254/330] Run perf tests with memory sampling (for allocations >1M) This is to know the memory allocation size distribution, that can be obtained later from left-metric-log.tsv. This is an attempt to tune tcmalloc (new CPP version by google) to use lock-free part of the allocator for typical allocations (and it is a bad idea just to increase kMaxSize there, since number of allocation for each size class is also important). P.S. hope that this file will be applied, if no, then the same effect can be reached by tunning defaults in Settings.h Refs: #11590 Cc: @akuzm --- .../config/users.d/perf-comparison-tweaks-users.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 6e3e3df5d39..8173934fa3e 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -6,6 +6,8 @@ 1 1 1 + 1 + 1048576 From 99e9b15e00695cbb8532b56cf4fd57c271cddccf Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 5 Jul 2020 17:14:20 +0300 Subject: [PATCH 255/330] Make code clearer: use enum instead of `bool internal`. --- src/Databases/DatabaseDictionary.cpp | 6 +++++- src/Databases/DatabaseWithDictionaries.cpp | 2 +- src/Storages/StorageDictionary.cpp | 16 ++++++++------- src/Storages/StorageDictionary.h | 24 +++++++++++++++++++--- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index c4df7b0be05..986e36de8cf 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -28,7 +28,11 @@ namespace if (!load_result.config) return nullptr; DictionaryStructure dictionary_structure = ExternalDictionariesLoader::getDictionaryStructure(*load_result.config); - return StorageDictionary::create(StorageID(database_name, load_result.name), load_result.name, dictionary_structure, /* internal */ true); + return StorageDictionary::create( + StorageID(database_name, load_result.name), + load_result.name, + dictionary_structure, + StorageDictionary::Location::DictionaryDatabase); } catch (Exception & e) { diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp index 0f597b7b0b2..9be7e4d8b3e 100644 --- a/src/Databases/DatabaseWithDictionaries.cpp +++ b/src/Databases/DatabaseWithDictionaries.cpp @@ -50,7 +50,7 @@ void DatabaseWithDictionaries::attachDictionary(const String & dictionary_name, StorageID(getDatabaseName(), dictionary_name), full_name, ExternalDictionariesLoader::getDictionaryStructure(*attach_info.config), - /* internal */ true), + StorageDictionary::Location::SameDatabaseAndNameAsDictionary), lock); } catch (...) diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index ede0e10614b..4ea028c7ca8 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -97,10 +97,10 @@ StorageDictionary::StorageDictionary( const StorageID & table_id_, const String & dictionary_name_, const ColumnsDescription & columns_, - bool internal_) + Location location_) : IStorage(table_id_) , dictionary_name(dictionary_name_) - , internal(internal_) + , location(location_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); @@ -109,16 +109,18 @@ StorageDictionary::StorageDictionary( StorageDictionary::StorageDictionary( - const StorageID & table_id_, const String & dictionary_name_, const DictionaryStructure & dictionary_structure_, bool internal_) - : StorageDictionary(table_id_, dictionary_name_, ColumnsDescription{getNamesAndTypes(dictionary_structure_)}, internal_) + const StorageID & table_id_, const String & dictionary_name_, const DictionaryStructure & dictionary_structure_, Location location_) + : StorageDictionary(table_id_, dictionary_name_, ColumnsDescription{getNamesAndTypes(dictionary_structure_)}, location_) { } void StorageDictionary::checkTableCanBeDropped() const { - if (internal) - throw Exception("Cannot detach dictionary " + backQuote(dictionary_name) + " as table, use DETACH DICTIONARY query.", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE); + if (location == Location::SameDatabaseAndNameAsDictionary) + throw Exception("Cannot detach dictionary " + backQuote(dictionary_name) + " as table, use DETACH DICTIONARY query", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE); + if (location == Location::DictionaryDatabase) + throw Exception("Cannot detach table " + getStorageID().getFullTableName() + " from a database with DICTIONARY engine", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE); } Pipes StorageDictionary::read( @@ -158,7 +160,7 @@ void registerStorageDictionary(StorageFactory & factory) checkNamesAndTypesCompatibleWithDictionary(dictionary_name, args.columns, dictionary_structure); } - return StorageDictionary::create(args.table_id, dictionary_name, args.columns); + return StorageDictionary::create(args.table_id, dictionary_name, args.columns, StorageDictionary::Location::Custom); }); } diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index e0d4803644b..89dd7258f3d 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -30,22 +30,40 @@ public: const String & dictionaryName() const { return dictionary_name; } + /// Specifies where the table is located relative to the dictionary. + enum class Location + { + /// Table was created automatically as an element of a database with the DICTIONARY engine. + DictionaryDatabase, + + /// Table was created automatically along with a dictionary + /// and has the same database and name as the dictionary. + /// It provides table-like access to the dictionary. + /// User cannot drop that table. + SameDatabaseAndNameAsDictionary, + + /// Table was created explicitly by a statement like + /// CREATE TABLE ... ENGINE=Dictionary + /// User choosed the table's database and name and can drop that table. + Custom, + }; + private: const String dictionary_name; - const bool internal = false; + const Location location; protected: StorageDictionary( const StorageID & table_id_, const String & dictionary_name_, const ColumnsDescription & columns_, - bool internal_ = false); + Location location_); StorageDictionary( const StorageID & table_id_, const String & dictionary_name_, const DictionaryStructure & dictionary_structure, - bool internal_ = false); + Location location_); }; } From 1a760e18a55ec5f17beee07682d84d517d520135 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 5 Jul 2020 18:44:28 +0300 Subject: [PATCH 256/330] Update StorageDictionary.h --- src/Storages/StorageDictionary.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 89dd7258f3d..e1f31e0424b 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -33,7 +33,7 @@ public: /// Specifies where the table is located relative to the dictionary. enum class Location { - /// Table was created automatically as an element of a database with the DICTIONARY engine. + /// Table was created automatically as an element of a database with the Dictionary engine. DictionaryDatabase, /// Table was created automatically along with a dictionary From cb126a23ef1c31e67d45c724e7ca93f1599cd0fc Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 5 Jul 2020 18:45:05 +0300 Subject: [PATCH 257/330] Update StorageDictionary.h --- src/Storages/StorageDictionary.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index e1f31e0424b..f152f8c9932 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -44,7 +44,7 @@ public: /// Table was created explicitly by a statement like /// CREATE TABLE ... ENGINE=Dictionary - /// User choosed the table's database and name and can drop that table. + /// User chose the table's database and name and can drop that table. Custom, }; From 8c3417fbf74aed2f7bbfbc7688fb233a1660a3f4 Mon Sep 17 00:00:00 2001 From: myrrc Date: Sun, 5 Jul 2020 18:57:59 +0300 Subject: [PATCH 258/330] ILIKE operator (#12125) * Integrated CachingAllocator into MarkCache * fixed build errors * reset func hotfix * upd: Fixing build * updated submodules links * fix 2 * updating grabber allocator proto * updating lost work * updating CMake to use concepts * some other changes to get it building (integration into MarkCache) * further integration into caches * updated Async metrics, fixed some build errors * and some other errors revealing * added perfect forwarding to some functions * fix: forward template * fix: constexpr modifier * fix: FakePODAllocator missing member func * updated PODArray constructor taking alloc params * fix: PODArray overload with n restored * fix: FakePODAlloc duplicating alloc() func * added constexpr variable for alloc_tag_t * split cache values by allocators, provided updates * fix: memcpy * fix: constexpr modifier * fix: noexcept modifier * fix: alloc_tag_t for PODArray constructor * fix: PODArray copy ctor with different alloc * fix: resize() signature * updating to lastest working master * syncing with 273267 * first draft version * fix: update Searcher to case-insensitive * added ILIKE test * fixed style errors, updated test, split like and ilike, added notILike * replaced inconsistent comments * fixed show tables ilike * updated missing test cases * regenerated ya.make * Update 01355_ilike.sql Co-authored-by: myrrc Co-authored-by: alexey-milovidov --- programs/client/Suggest.cpp | 2 +- src/Functions/MatchImpl.h | 27 +++++--- src/Functions/Regexps.h | 11 ++-- src/Functions/ilike.cpp | 24 ++++++++ src/Functions/like.cpp | 8 ++- src/Functions/likePatternToRegexp.h | 3 +- src/Functions/notILike.cpp | 24 ++++++++ .../registerFunctionsStringRegexp.cpp | 6 +- src/Functions/ya.make | 4 +- src/Interpreters/CrossToInnerJoinVisitor.cpp | 2 +- src/Interpreters/misc.h | 2 +- src/Parsers/ASTFunction.cpp | 6 +- src/Parsers/ASTShowTablesQuery.cpp | 14 ++++- src/Parsers/ASTShowTablesQuery.h | 4 ++ src/Parsers/ExpressionElementParsers.cpp | 1 + src/Parsers/ExpressionListParsers.cpp | 2 + src/Parsers/ParserShowTablesQuery.cpp | 12 +++- src/Parsers/ParserShowTablesQuery.h | 4 +- src/Storages/MergeTree/KeyCondition.cpp | 1 + src/Storages/MergeTree/KeyCondition.h | 2 +- .../queries/0_stateless/01355_ilike.reference | 44 +++++++++++++ tests/queries/0_stateless/01355_ilike.sql | 61 +++++++++++++++++++ 22 files changed, 232 insertions(+), 32 deletions(-) create mode 100644 src/Functions/ilike.cpp create mode 100644 src/Functions/notILike.cpp create mode 100644 tests/queries/0_stateless/01355_ilike.reference create mode 100644 tests/queries/0_stateless/01355_ilike.sql diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index 4ac5e735fd5..713aa82bb3e 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -79,7 +79,7 @@ Suggest::Suggest() "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "FOR", "RANDOMIZED", - "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP"}; + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE"}; } void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index a851fe3dd58..70c8419fcc9 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -27,7 +28,7 @@ namespace ErrorCodes } -/// Is the LIKE expression reduced to finding a substring in a string? +/// Is the [I]LIKE expression reduced to finding a substring in a string? static inline bool likePatternIsStrstr(const String & pattern, String & res) { res = ""; @@ -67,17 +68,21 @@ static inline bool likePatternIsStrstr(const String & pattern, String & res) return true; } -/** 'like' - if true, treat pattern as SQL LIKE; if false - treat pattern as re2 regexp. +/** 'like' - if true, treat pattern as SQL LIKE or ILIKE; if false - treat pattern as re2 regexp. * NOTE: We want to run regexp search for whole block by one call (as implemented in function 'position') * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. */ -template +template struct MatchImpl { static constexpr bool use_default_implementation_for_constants = true; using ResultType = UInt8; + using Searcher = std::conditional_t; + static void vectorConstant( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray & res) { @@ -85,7 +90,8 @@ struct MatchImpl return; String strstr_pattern; - /// A simple case where the LIKE expression reduces to finding a substring in a string + + /// A simple case where the [I]LIKE expression reduces to finding a substring in a string if (like && likePatternIsStrstr(pattern, strstr_pattern)) { const UInt8 * begin = data.data(); @@ -96,7 +102,7 @@ struct MatchImpl size_t i = 0; /// TODO You need to make that `searcher` is common to all the calls of the function. - Volnitsky searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); + Searcher searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) @@ -126,7 +132,10 @@ struct MatchImpl { size_t size = offsets.size(); - auto regexp = Regexps::get(pattern); + constexpr int flags = case_insensitive ? + Regexps::Regexp::RE_CASELESS : 0; + + auto regexp = Regexps::get(pattern, flags); std::string required_substring; bool is_trivial; @@ -170,7 +179,7 @@ struct MatchImpl /// The current index in the array of strings. size_t i = 0; - Volnitsky searcher(required_substring.data(), required_substring.size(), end - pos); + Searcher searcher(required_substring.data(), required_substring.size(), end - pos); /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) @@ -248,7 +257,7 @@ struct MatchImpl /// If pattern is larger than string size - it cannot be found. if (strstr_pattern.size() <= n) { - Volnitsky searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); + Searcher searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) @@ -328,7 +337,7 @@ struct MatchImpl /// If required substring is larger than string size - it cannot be found. if (strstr_pattern.size() <= n) { - Volnitsky searcher(required_substring.data(), required_substring.size(), end - pos); + Searcher searcher(required_substring.data(), required_substring.size(), end - pos); /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index a9b1b336c79..cbfbbf7107d 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -59,19 +59,20 @@ namespace Regexps * In destructor, it returns the object back to the Pool for further reuse. */ template - inline Pool::Pointer get(const std::string & pattern) + inline Pool::Pointer get(const std::string & pattern, int flags = 0) { /// C++11 has thread-safe function-local statics on most modern compilers. static Pool known_regexps; /// Different variables for different pattern parameters. - return known_regexps.get(pattern, [&pattern] + return known_regexps.get(pattern, [flags, &pattern] { - int flags = OptimizedRegularExpression::RE_DOT_NL; + int flags_final = flags | OptimizedRegularExpression::RE_DOT_NL; + if (no_capture) - flags |= OptimizedRegularExpression::RE_NO_CAPTURE; + flags_final |= OptimizedRegularExpression::RE_NO_CAPTURE; ProfileEvents::increment(ProfileEvents::RegexpCreated); - return new Regexp{createRegexp(pattern, flags)}; + return new Regexp{createRegexp(pattern, flags_final)}; }); } } diff --git a/src/Functions/ilike.cpp b/src/Functions/ilike.cpp new file mode 100644 index 00000000000..a39a907eff2 --- /dev/null +++ b/src/Functions/ilike.cpp @@ -0,0 +1,24 @@ +#include "FunctionsStringSearch.h" +#include "FunctionFactory.h" +#include "MatchImpl.h" + +namespace DB +{ + +struct NameILike +{ + static constexpr auto name = "ilike"; +}; + +namespace +{ + using ILikeImpl = MatchImpl; +} + +using FunctionILike = FunctionsStringSearch; + +void registerFunctionILike(FunctionFactory & factory) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/like.cpp b/src/Functions/like.cpp index c07f68dfb56..f334cef7917 100644 --- a/src/Functions/like.cpp +++ b/src/Functions/like.cpp @@ -11,11 +11,15 @@ struct NameLike static constexpr auto name = "like"; }; -using FunctionLike = FunctionsStringSearch, NameLike>; +namespace +{ + using LikeImpl = MatchImpl; +} + +using FunctionLike = FunctionsStringSearch; void registerFunctionLike(FunctionFactory & factory) { factory.registerFunction(); } - } diff --git a/src/Functions/likePatternToRegexp.h b/src/Functions/likePatternToRegexp.h index 3a078b468c2..24cb6ea78c7 100644 --- a/src/Functions/likePatternToRegexp.h +++ b/src/Functions/likePatternToRegexp.h @@ -4,7 +4,8 @@ namespace DB { -/// Transforms the LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$ + +/// Transforms the [I]LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$ inline String likePatternToRegexp(const String & pattern) { String res; diff --git a/src/Functions/notILike.cpp b/src/Functions/notILike.cpp new file mode 100644 index 00000000000..0f95564b09e --- /dev/null +++ b/src/Functions/notILike.cpp @@ -0,0 +1,24 @@ +#include "FunctionsStringSearch.h" +#include "FunctionFactory.h" +#include "MatchImpl.h" + +namespace DB +{ + +struct NameNotILike +{ + static constexpr auto name = "notILike"; +}; + +namespace +{ + using NotILikeImpl = MatchImpl; +} + +using FunctionNotILike = FunctionsStringSearch; + +void registerFunctionNotILike(FunctionFactory & factory) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/registerFunctionsStringRegexp.cpp b/src/Functions/registerFunctionsStringRegexp.cpp index 2a0a3c0ea1f..61853b19d11 100644 --- a/src/Functions/registerFunctionsStringRegexp.cpp +++ b/src/Functions/registerFunctionsStringRegexp.cpp @@ -4,7 +4,9 @@ namespace DB class FunctionFactory; void registerFunctionLike(FunctionFactory &); +void registerFunctionILike(FunctionFactory &); void registerFunctionNotLike(FunctionFactory &); +void registerFunctionNotILike(FunctionFactory &); void registerFunctionMatch(FunctionFactory &); void registerFunctionExtract(FunctionFactory &); void registerFunctionReplaceOne(FunctionFactory &); @@ -24,7 +26,9 @@ void registerFunctionExtractAllGroupsHorizontal(FunctionFactory &); void registerFunctionsStringRegexp(FunctionFactory & factory) { registerFunctionLike(factory); + registerFunctionILike(factory); registerFunctionNotLike(factory); + registerFunctionNotILike(factory); registerFunctionMatch(factory); registerFunctionExtract(factory); registerFunctionReplaceOne(factory); @@ -41,6 +45,4 @@ void registerFunctionsStringRegexp(FunctionFactory & factory) registerFunctionExtractAllGroupsVertical(factory); registerFunctionExtractAllGroupsHorizontal(factory); } - } - diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 0c1a181471d..86d2425eac4 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -92,8 +92,8 @@ SRCS( array/emptyArrayToSingle.cpp array/hasAll.cpp array/hasAny.cpp - array/hasSubstr.cpp array/has.cpp + array/hasSubstr.cpp array/indexOf.cpp array/length.cpp array/range.cpp @@ -228,6 +228,7 @@ SRCS( ifNull.cpp IFunction.cpp ignore.cpp + ilike.cpp in.cpp intDiv.cpp intDivOrZero.cpp @@ -288,6 +289,7 @@ SRCS( neighbor.cpp notEmpty.cpp notEquals.cpp + notILike.cpp notLike.cpp now64.cpp now.cpp diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index 5ebebae2578..604bfc7774f 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -132,7 +132,7 @@ public: { /// leave other comparisons as is } - else if (functionIsLikeOperator(node.name) || /// LIKE, NOT LIKE + else if (functionIsLikeOperator(node.name) || /// LIKE, NOT LIKE, ILIKE, NOT ILIKE functionIsInOperator(node.name)) /// IN, NOT IN { /// leave as is. It's not possible to make push down here cause of unknown aliases and not implemented JOIN predicates. diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index bc994194336..30379567366 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -17,7 +17,7 @@ inline bool functionIsInOrGlobalInOperator(const std::string & name) inline bool functionIsLikeOperator(const std::string & name) { - return name == "like" || name == "notLike"; + return name == "like" || name == "ilike" || name == "notLike" || name == "notILike"; } inline bool functionIsJoinGet(const std::string & name) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 988ac66916d..ec46eb4ac37 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -54,7 +54,7 @@ ASTPtr ASTFunction::clone() const } -/** A special hack. If it's LIKE or NOT LIKE expression and the right hand side is a string literal, +/** A special hack. If it's [I]LIKE or NOT [I]LIKE expression and the right hand side is a string literal, * we will highlight unescaped metacharacters % and _ in string literal for convenience. * Motivation: most people are unaware that _ is a metacharacter and forgot to properly escape it with two backslashes. * With highlighting we make it clearly obvious. @@ -168,7 +168,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format "greater", " > ", "equals", " = ", "like", " LIKE ", + "ilike", " ILIKE ", "notLike", " NOT LIKE ", + "notILike", " NOT ILIKE ", "in", " IN ", "notIn", " NOT IN ", "globalIn", " GLOBAL IN ", @@ -186,7 +188,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); bool special_hilite = settings.hilite - && (name == "like" || name == "notLike") + && (name == "like" || name == "notLike" || name == "ilike" || name == "notILike") && highlightStringLiteralWithMetacharacters(arguments->children[1], settings, "%_"); /// Format x IN 1 as x IN (1): put parens around rhs even if there is a single element in set. diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 25a638c77d4..5a284109cf2 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -22,8 +22,13 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format else if (clusters) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTERS" << (settings.hilite ? hilite_none : ""); + if (!like.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << (not_like ? " NOT" : "") << " LIKE " << (settings.hilite ? hilite_none : "") + settings.ostr + << (settings.hilite ? hilite_keyword : "") + << (not_like ? " NOT" : "") + << (case_insensitive_like ? " ILIKE " : " LIKE ") + << (settings.hilite ? hilite_none : "") << std::quoted(like, '\''); if (limit_length) @@ -47,8 +52,13 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format << backQuoteIfNeed(from); if (!like.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << (not_like ? " NOT" : "") << " LIKE " << (settings.hilite ? hilite_none : "") + settings.ostr + << (settings.hilite ? hilite_keyword : "") + << (not_like ? " NOT" : "") + << (case_insensitive_like ? " ILIKE " : " LIKE ") + << (settings.hilite ? hilite_none : "") << std::quoted(like, '\''); + else if (where_expression) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index f14d6e7bd33..acf365be91a 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -19,10 +19,14 @@ public: bool cluster{false}; bool dictionaries{false}; bool temporary{false}; + String cluster_str; String from; String like; + bool not_like{false}; + bool case_insensitive_like{false}; + ASTPtr where_expression; ASTPtr limit_length; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 1b8dbccdcc1..563dd376ac3 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1115,6 +1115,7 @@ const char * ParserAlias::restricted_keywords[] = "NOT", "BETWEEN", "LIKE", + "ILIKE", nullptr }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index e33e80f1f18..778ebe9f5d3 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -47,7 +47,9 @@ const char * ParserComparisonExpression::operators[] = ">", "greater", "=", "equals", "LIKE", "like", + "ILIKE", "ilike", "NOT LIKE", "notLike", + "NOT ILIKE", "notILike", "IN", "in", "NOT IN", "notIn", "GLOBAL IN", "globalIn", diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index ee50d23ffc8..66ecdf61c58 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -28,6 +28,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_in("IN"); ParserKeyword s_not("NOT"); ParserKeyword s_like("LIKE"); + ParserKeyword s_ilike("ILIKE"); ParserKeyword s_where("WHERE"); ParserKeyword s_limit("LIMIT"); ParserStringLiteral like_p; @@ -53,8 +54,11 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_not.ignore(pos, expected)) query->not_like = true; - if (s_like.ignore(pos, expected)) + if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) { + if (insensitive) + query->case_insensitive_like = true; + if (!like_p.parse(pos, like, expected)) return false; } @@ -98,8 +102,11 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_not.ignore(pos, expected)) query->not_like = true; - if (s_like.ignore(pos, expected)) + if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) { + if (insensitive) + query->case_insensitive_like = true; + if (!like_p.parse(pos, like, expected)) return false; } @@ -119,6 +126,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } tryGetIdentifierNameInto(database, query->from); + if (like) query->like = safeGet(like->as().value); diff --git a/src/Parsers/ParserShowTablesQuery.h b/src/Parsers/ParserShowTablesQuery.h index 4fd11d8e2a0..3b8bb033275 100644 --- a/src/Parsers/ParserShowTablesQuery.h +++ b/src/Parsers/ParserShowTablesQuery.h @@ -7,14 +7,14 @@ namespace DB { /** Query like this: - * SHOW TABLES [FROM db] [[NOT] LIKE 'str'] [LIMIT expr] + * SHOW TABLES [FROM db] [[NOT] [I]LIKE 'str'] [LIMIT expr] * or * SHOW DATABASES. */ class ParserShowTablesQuery : public IParserBase { protected: - const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] LIKE 'str'] [LIMIT expr]"; } + const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d85a44a2f6a..929242b2815 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -841,6 +841,7 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont func_name = "greaterOrEquals"; else if (func_name == "in" || func_name == "notIn" || func_name == "like" || func_name == "notLike" || + func_name == "ilike" || func_name == "notIlike" || func_name == "startsWith") { /// "const IN data_column" doesn't make sense (unlike "data_column IN const") diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index f12acdbf7bf..16197b9fa69 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -20,7 +20,7 @@ using FunctionBasePtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; -/** A field, that can be stored in two reperesenations: +/** A field, that can be stored in two representations: * - A standalone field. * - A field with reference to its position in a block. * It's needed for execution of functions on ranges during diff --git a/tests/queries/0_stateless/01355_ilike.reference b/tests/queries/0_stateless/01355_ilike.reference new file mode 100644 index 00000000000..8b2bd84f27a --- /dev/null +++ b/tests/queries/0_stateless/01355_ilike.reference @@ -0,0 +1,44 @@ +0 +1 +1 +1 +1 +0 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +1 +1 +1 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +1 +1 +0 +1 +1 +1 +1 +1 +test1 +test2 diff --git a/tests/queries/0_stateless/01355_ilike.sql b/tests/queries/0_stateless/01355_ilike.sql new file mode 100644 index 00000000000..6f08be2dab2 --- /dev/null +++ b/tests/queries/0_stateless/01355_ilike.sql @@ -0,0 +1,61 @@ +SELECT 'Hello' ILIKE ''; +SELECT 'Hello' ILIKE '%'; +SELECT 'Hello' ILIKE '%%'; +SELECT 'Hello' ILIKE '%%%'; +SELECT 'Hello' ILIKE '%_%'; +SELECT 'Hello' ILIKE '_'; +SELECT 'Hello' ILIKE '_%'; +SELECT 'Hello' ILIKE '%_'; + +SELECT 'Hello' ILIKE 'H%o'; +SELECT 'hello' ILIKE 'H%o'; +SELECT 'hello' ILIKE 'h%o'; +SELECT 'Hello' ILIKE 'h%o'; + +SELECT 'Hello' NOT ILIKE 'H%o'; +SELECT 'hello' NOT ILIKE 'H%o'; +SELECT 'hello' NOT ILIKE 'h%o'; +SELECT 'Hello' NOT ILIKE 'h%o'; + +SELECT 'OHello' ILIKE '%lhell%'; +SELECT 'Ohello' ILIKE '%hell%'; +SELECT 'hEllo' ILIKE '%HEL%'; + +SELECT 'OHello' NOT ILIKE '%lhell%'; +SELECT 'Ohello' NOT ILIKE '%hell%'; +SELECT 'hEllo' NOT ILIKE '%HEL%'; + +SELECT materialize('prepre_f') ILIKE '%pre_f%'; + +SELECT 'abcdef' ILIKE '%aBc%def%'; +SELECT 'ABCDDEF' ILIKE '%abc%def%'; +SELECT 'Abc\nDef' ILIKE '%abc%def%'; +SELECT 'abc\ntdef' ILIKE '%abc%def%'; +SELECT 'abct\ndef' ILIKE '%abc%dEf%'; +SELECT 'abc\n\ndeF' ILIKE '%abc%def%'; +SELECT 'abc\n\ntdef' ILIKE '%abc%deF%'; +SELECT 'Abc\nt\ndef' ILIKE '%abc%def%'; +SELECT 'abct\n\ndef' ILIKE '%abc%def%'; +SELECT 'ab\ndef' ILIKE '%Abc%def%'; +SELECT 'aBc\nef' ILIKE '%ABC%DEF%'; + +SELECT CAST('hello' AS FixedString(5)) ILIKE '%he%o%'; + +SELECT 'ёЁё' ILIKE 'Ё%Ё'; +SELECT 'ощщЁё' ILIKE 'Щ%Ё'; +SELECT 'ощЩЁё' ILIKE '%Щ%Ё'; + +SELECT 'Щущпандер' ILIKE '%щп%е%'; +SELECT 'Щущпандер' ILIKE '%щП%е%'; +SELECT 'ощщЁё' ILIKE '%щ%'; +SELECT 'ощЩЁё' ILIKE '%ё%'; + +SHOW TABLES NOT ILIKE '%'; +DROP DATABASE IF EXISTS test_01355; +CREATE DATABASE test_01355; +USE test_01355; +CREATE TABLE test1 (x UInt8) ENGINE = Memory; +CREATE TABLE test2 (x UInt8) ENGINE = Memory; +SHOW TABLES ILIKE 'tES%'; +SHOW TABLES NOT ILIKE 'TeS%'; +DROP DATABASE test_01355; From 5816858403b116c7df3c9179c2b917bafdc96859 Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 6 Jul 2020 02:09:20 +0800 Subject: [PATCH 259/330] add parseDateTimeBestEffortUS function (#12028) * add function parseDateTimeBestEffortUS * add test * add doc Co-authored-by: alexey-milovidov --- .../functions/type-conversion-functions.md | 74 +++++++++++++ src/Functions/FunctionsConversion.cpp | 1 + src/Functions/FunctionsConversion.h | 14 ++- src/IO/parseDateTimeBestEffort.cpp | 102 ++++++++++++------ src/IO/parseDateTimeBestEffort.h | 1 + ...0569_parse_date_time_best_effort.reference | 2 +- ...1_parse_date_time_best_effort_us.reference | 14 +++ .../01351_parse_date_time_best_effort_us.sql | 20 ++++ 8 files changed, 194 insertions(+), 34 deletions(-) create mode 100644 tests/queries/0_stateless/01351_parse_date_time_best_effort_us.reference create mode 100644 tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 254dceef29b..929c861281b 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -521,6 +521,80 @@ Result: - [toDate](#todate) - [toDateTime](#todatetime) +## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS} + +This function is similar to ['parseDateTimeBestEffort'](#parsedatetimebesteffort), the only difference is that this function prefers US style (`MM/DD/YYYY` etc) in case of ambiguouty. + +**Syntax** + +``` sql +parseDateTimeBestEffortUS(time_string [, time_zone]); +``` + +**Parameters** + +- `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). + +**Supported non-standard formats** + +- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time). +- A string with a date and a time component: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc. +- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` etc. +- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`. +- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`. + +**Returned value** + +- `time_string` converted to the `DateTime` data type. + +**Examples** + +Query: + +``` sql +SELECT parseDateTimeBestEffortUS('09/12/2020 12:12:57') +AS parseDateTimeBestEffortUS; +``` + +Result: + +``` text +┌─parseDateTimeBestEffortUS─┐ +│ 2020-09-12 12:12:57 │ +└─────────────────────────——┘ +``` + +Query: + +``` sql +SELECT parseDateTimeBestEffortUS('09-12-2020 12:12:57') +AS parseDateTimeBestEffortUS; +``` + +Result: + +``` text +┌─parseDateTimeBestEffortUS─┐ +│ 2020-09-12 12:12:57 │ +└─────────────────────────——┘ +``` + +Query: + +``` sql +SELECT parseDateTimeBestEffortUS('09.12.2020 12:12:57') +AS parseDateTimeBestEffortUS; +``` + +Result: + +``` text +┌─parseDateTimeBestEffortUS─┐ +│ 2020-09-12 12:12:57 │ +└─────────────────────────——┘ +``` + ## parseDateTimeBestEffortOrNull {#parsedatetimebesteffortornull} Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns null when it encounters a date format that cannot be processed. diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index ef1e78baa16..da42c8a2623 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -78,6 +78,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 0dee48709b9..932a8cd5bed 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -534,7 +534,8 @@ enum class ConvertFromStringExceptionMode enum class ConvertFromStringParsingMode { Normal, - BestEffort /// Only applicable for DateTime. Will use sophisticated method, that is slower. + BestEffort, /// Only applicable for DateTime. Will use sophisticated method, that is slower. + BestEffortUS }; template , NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; +struct NameParseDateTimeBestEffortUS { static constexpr auto name = "parseDateTimeBestEffortUS"; }; struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; }; struct NameParseDateTimeBestEffortOrNull { static constexpr auto name = "parseDateTimeBestEffortOrNull"; }; struct NameParseDateTime64BestEffort { static constexpr auto name = "parseDateTime64BestEffort"; }; @@ -1587,6 +1595,8 @@ struct NameParseDateTime64BestEffortOrNull { static constexpr auto name = "parse using FunctionParseDateTimeBestEffort = FunctionConvertFromString< DataTypeDateTime, NameParseDateTimeBestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTimeBestEffortUS = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; using FunctionParseDateTimeBestEffortOrZero = FunctionConvertFromString< DataTypeDateTime, NameParseDateTimeBestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString< diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 4b6183e9c0b..e98dbbc0480 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -88,7 +88,7 @@ struct DateTimeSubsecondPart UInt8 digits; }; -template +template ReturnType parseDateTimeBestEffortImpl( time_t & res, ReadBuffer & in, @@ -264,11 +264,11 @@ ReturnType parseDateTimeBestEffortImpl( /// DD-MM-YY /// DD - UInt8 hour_or_day_of_month = 0; + UInt8 hour_or_day_of_month_or_month = 0; if (num_digits == 2) - readDecimalNumber<2>(hour_or_day_of_month, digits); + readDecimalNumber<2>(hour_or_day_of_month_or_month, digits); else if (num_digits == 1) //-V547 - readDecimalNumber<1>(hour_or_day_of_month, digits); + readDecimalNumber<1>(hour_or_day_of_month_or_month, digits); else return on_error("Cannot read DateTime: logical error, unexpected branch in code", ErrorCodes::LOGICAL_ERROR); @@ -277,7 +277,7 @@ ReturnType parseDateTimeBestEffortImpl( if (has_time) return on_error("Cannot read DateTime: time component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); - hour = hour_or_day_of_month; + hour = hour_or_day_of_month_or_month; has_time = true; num_digits = readDigits(digits, sizeof(digits), in); @@ -309,29 +309,48 @@ ReturnType parseDateTimeBestEffortImpl( if (month) return on_error("Cannot read DateTime: month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); - day_of_month = hour_or_day_of_month; - - num_digits = readDigits(digits, sizeof(digits), in); - - if (num_digits == 2) - readDecimalNumber<2>(month, digits); - else if (num_digits == 1) - readDecimalNumber<1>(month, digits); - else if (num_digits == 0) + if constexpr (is_us_style) { - /// Month in alphabetical form - - char alpha[9]; /// The longest month name: September - size_t num_alpha = readAlpha(alpha, sizeof(alpha), in); - - if (num_alpha < 3) - return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME); - - if (!read_alpha_month(alpha)) - return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME); + month = hour_or_day_of_month_or_month; + num_digits = readDigits(digits, sizeof(digits), in); + if (num_digits == 2) + readDecimalNumber<2>(day_of_month, digits); + else if (num_digits == 1) + readDecimalNumber<1>(day_of_month, digits); + else + return on_error("Cannot read DateTime: unexpected number of decimal digits after month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); } else - return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + { + day_of_month = hour_or_day_of_month_or_month; + + num_digits = readDigits(digits, sizeof(digits), in); + + if (num_digits == 2) + readDecimalNumber<2>(month, digits); + else if (num_digits == 1) + readDecimalNumber<1>(month, digits); + else if (num_digits == 0) + { + /// Month in alphabetical form + + char alpha[9]; /// The longest month name: September + size_t num_alpha = readAlpha(alpha, sizeof(alpha), in); + + if (num_alpha < 3) + return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME); + + if (!read_alpha_month(alpha)) + return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME); + } + else + return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + + + } + + if (month > 12) + std::swap(month, day_of_month); if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { @@ -358,9 +377,9 @@ ReturnType parseDateTimeBestEffortImpl( else { if (day_of_month) - hour = hour_or_day_of_month; + hour = hour_or_day_of_month_or_month; else - day_of_month = hour_or_day_of_month; + day_of_month = hour_or_day_of_month_or_month; } } else if (num_digits != 0) @@ -522,6 +541,22 @@ ReturnType parseDateTimeBestEffortImpl( if (!day_of_month) day_of_month = 1; + auto is_leap_year = (year % 400 == 0) || (year % 100 != 0 && year % 4 == 0); + + auto check_date = [](const auto & is_leap_year_, const auto & month_, const auto & day_) + { + if ((month_ == 1 || month_ == 3 || month_ == 5 || month_ == 7 || month_ == 8 || month_ == 10 || month_ == 12) && day_ >= 1 && day_ <= 31) + return true; + else if (month_ == 2 && ((is_leap_year_ && day_ >= 1 && day_ <= 29) || (!is_leap_year_ && day_ >= 1 && day_ <= 28))) + return true; + else if ((month_ == 4 || month_ == 6 || month_ == 9 || month_ == 11) && day_ >= 1 && day_ <= 30) + return true; + return false; + }; + + if (!check_date(is_leap_year, month, day_of_month)) + return on_error("Cannot read DateTime: logical error, unexpected date: " + std::to_string(year) + "-" + std::to_string(month) + "-" + std::to_string(day_of_month), ErrorCodes::LOGICAL_ERROR); + if (is_pm && hour < 12) hour += 12; @@ -565,12 +600,12 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf if constexpr (std::is_same_v) { - if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) + if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) return false; } else { - parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond); + parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond); } @@ -598,12 +633,17 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf void parseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { - parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); + parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); +} + +void parseDateTimeBestEffortUS(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) +{ + parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); } bool tryParseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { - return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); } void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) diff --git a/src/IO/parseDateTimeBestEffort.h b/src/IO/parseDateTimeBestEffort.h index 54432b5db2f..093bca571d8 100644 --- a/src/IO/parseDateTimeBestEffort.h +++ b/src/IO/parseDateTimeBestEffort.h @@ -56,6 +56,7 @@ class ReadBuffer; */ void parseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); +void parseDateTimeBestEffortUS(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); diff --git a/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference b/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference index bf11e1c1d71..b353b66f8eb 100644 --- a/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference +++ b/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference @@ -58,7 +58,7 @@ 2017-01 03:04:05 MSD Jun 2017-05-31 23:04:05 2017-05-31 23:04:05 2017-01 03:04 MSD Jun 2017-05-31 23:04:00 2017-05-31 23:04:00 2017/01/31 2017-01-31 00:00:00 2017-01-31 00:00:00 - 2017/01/32 0000-00-00 00:00:00 0000-00-00 00:00:00 + 2017/01/32 ᴺᵁᴸᴸ 0000-00-00 00:00:00 2017-01 MSD Jun 2017-05-31 20:00:00 2017-05-31 20:00:00 201701 MSD Jun 2017-05-31 20:00:00 2017-05-31 20:00:00 2017 25 1:2:3 ᴺᵁᴸᴸ 0000-00-00 00:00:00 diff --git a/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.reference b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.reference new file mode 100644 index 00000000000..22a778b715a --- /dev/null +++ b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.reference @@ -0,0 +1,14 @@ + s a + + 1970/01/02 010203Z 1970-01-02 01:02:03 + 01-02-2001 UTC 2001-01-02 00:00:00 + 10.23.1990 1990-10-23 00:00:00 + 01-02-2017 03:04:05+1 2017-01-02 02:04:05 + 01/02/2017 03:04:05+300 2017-01-02 00:04:05 + 01.02.2017 03:04:05GMT 2017-01-02 03:04:05 + 01-02-2017 03:04:05 MSD 2017-01-01 23:04:05 + 01.02.17 03:04:05 MSD Feb 2017-02-01 23:04:05 + 01/02/2017 03:04:05 MSK 2017-01-02 00:04:05 + 12/13/2019 2019-12-13 00:00:00 + 13/12/2019 2019-12-13 00:00:00 + 03/04/2019 2019-03-04 00:00:00 diff --git a/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql new file mode 100644 index 00000000000..0c33b2de49f --- /dev/null +++ b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql @@ -0,0 +1,20 @@ +SELECT + s, + parseDateTimeBestEffortUS(s, 'UTC') AS a +FROM +( + SELECT arrayJoin([ +'1970/01/02 010203Z', +'01-02-2001 UTC', +'10.23.1990', +'01-02-2017 03:04:05+1', +'01/02/2017 03:04:05+300', +'01.02.2017 03:04:05GMT', +'01-02-2017 03:04:05 MSD', +'01.02.17 03:04:05 MSD Feb', +'01/02/2017 03:04:05 MSK', +'12/13/2019', +'13/12/2019', +'03/04/2019' +]) AS s) +FORMAT PrettySpaceNoEscapes; From c1e31b61485908289f30919218fbce715fd65506 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 00:58:07 +0300 Subject: [PATCH 260/330] Cleanup changelog (half done). Now it is acceptable #12104 --- CHANGELOG.md | 130 ++++++++++++++++++++++++--------------------------- 1 file changed, 62 insertions(+), 68 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa007d41dbe..1f3fbade384 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,17 @@ * Added a check for the case when user-level setting is specified in a wrong place. User-level settings should be specified in `users.xml` inside `` section for specific user profile (or in `` for default settings). The server won't start with exception message in log. This fixes [#9051](https://github.com/ClickHouse/ClickHouse/issues/9051). If you want to skip the check, you can either move settings to the appropriate place or add `1` to config.xml. [#11449](https://github.com/ClickHouse/ClickHouse/pull/11449) ([alexey-milovidov](https://github.com/alexey-milovidov)). * The setting `input_format_with_names_use_header` is enabled by default. It will affect parsing of input formats `-WithNames` and `-WithNamesAndTypes`. [#10937](https://github.com/ClickHouse/ClickHouse/pull/10937) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove `experimental_use_processors` setting. It is enabled by default. [#10924](https://github.com/ClickHouse/ClickHouse/pull/10924) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update `zstd` to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. This change is backward compatible but we list it here in changelog in case you will wonder about these messages. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([alexey-milovidov](https://github.com/alexey-milovidov)). #### New Feature +* `TTL DELETE WHERE` and `TTL GROUP BY` for automatic data coarsening and rollup in tables. [#10537](https://github.com/ClickHouse/ClickHouse/pull/10537) ([expl0si0nn](https://github.com/expl0si0nn)). +* Implementation of PostgreSQL wire protocol. [#10242](https://github.com/ClickHouse/ClickHouse/pull/10242) ([Movses](https://github.com/MovElb)). +* Support writes in ODBC Table function [#10554](https://github.com/ClickHouse/ClickHouse/pull/10554) ([ageraab](https://github.com/ageraab)). [#10901](https://github.com/ClickHouse/ClickHouse/pull/10901) ([tavplubix](https://github.com/tavplubix)). +* Add query performance metrics based on Linux `perf_events` (these metrics are calculated with hardware CPU counters and OS counters). It is optional and requires `CAP_SYS_ADMIN` to be set on clickhouse binary. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Now support `NULL` and `NOT NULL` modifiers for data types in `CREATE` query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). +* Added a new layout `direct` which loads all the data directly from the source for each query, without storing or caching data. [#10622](https://github.com/ClickHouse/ClickHouse/pull/10622) ([Artem Streltsov](https://github.com/kekekekule)). +* Added new `complex_key_direct` layout to dictionaries, that does not store anything locally during query execution. [#10850](https://github.com/ClickHouse/ClickHouse/pull/10850) ([Artem Streltsov](https://github.com/kekekekule)). * Added support for MySQL style global variables syntax (stub). This is needed for compatibility of MySQL protocol. [#11832](https://github.com/ClickHouse/ClickHouse/pull/11832) ([alexey-milovidov](https://github.com/alexey-milovidov)). * `minMap` and `maxMap` functions were added. [#11603](https://github.com/ClickHouse/ClickHouse/pull/11603) ([Ildus Kurbangaliev](https://github.com/ildus)). * Add the `system.asynchronous_metric_log` table that logs historical metrics from `system.asynchronous_metrics`. [#11588](https://github.com/ClickHouse/ClickHouse/pull/11588) ([Alexander Kuzmenkov](https://github.com/akuzm)). @@ -20,43 +28,27 @@ * Add 2 more virtual columns for engine=Kafka to access message headers. [#11283](https://github.com/ClickHouse/ClickHouse/pull/11283) ([filimonov](https://github.com/filimonov)). * Add `_timestamp_ms` virtual column for Kafka engine (type is `Nullable(DateTime64(3))`). [#11260](https://github.com/ClickHouse/ClickHouse/pull/11260) ([filimonov](https://github.com/filimonov)). * Add function `fuzzBits` that randomly flips bits in a string with given probability. [#11237](https://github.com/ClickHouse/ClickHouse/pull/11237) ([Andrei Nekrashevich](https://github.com/xolm)). -* Use HTTP client for S3 based on Poco. [#11230](https://github.com/ClickHouse/ClickHouse/pull/11230) ([Pavel Kovalenko](https://github.com/Jokser)). -* Add query performance metrics based on Linux `perf_events`. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Default S3 credentials and custom auth headers. [#11134](https://github.com/ClickHouse/ClickHouse/pull/11134) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). -* Add's a hasSubStr function that allows for look for sub sequences in arrays. [#11071](https://github.com/ClickHouse/ClickHouse/pull/11071) ([Ryad Zenine](https://github.com/r-zenine)). -* Now support NULL and NOT NULL modifiers for data types in create query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). +* Allow to specify default S3 credentials and custom auth headers. [#11134](https://github.com/ClickHouse/ClickHouse/pull/11134) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). * Added new functions to import/export DateTime64 as Int64 with various precision: `to-/fromUnixTimestamp64Milli/-Micro/-Nano`. [#10923](https://github.com/ClickHouse/ClickHouse/pull/10923) ([Vasily Nemkov](https://github.com/Enmk)). * Allow specifying `mongodb://` URI for MongoDB dictionaries. [#10915](https://github.com/ClickHouse/ClickHouse/pull/10915) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Support writes in ODBC Table function [#10554](https://github.com/ClickHouse/ClickHouse/pull/10554) ([ageraab](https://github.com/ageraab)). [#10901](https://github.com/ClickHouse/ClickHouse/pull/10901) ([tavplubix](https://github.com/tavplubix)). -* Support `ALTER RENAME COLUMN` for the distributed table engine. Continuation of [#10727](https://github.com/ClickHouse/ClickHouse/issues/10727). Fixes [#10747](https://github.com/ClickHouse/ClickHouse/issues/10747). [#10887](https://github.com/ClickHouse/ClickHouse/pull/10887) ([alesapin](https://github.com/alesapin)). -* Added new complex key direct layout to dictionaries, that does not store anything locally during query execution. [#10850](https://github.com/ClickHouse/ClickHouse/pull/10850) ([Artem Streltsov](https://github.com/kekekekule)). * OFFSET keyword can now be used without an affiliated LIMIT clause. [#10802](https://github.com/ClickHouse/ClickHouse/pull/10802) ([Guillaume Tassery](https://github.com/YiuRULE)). * Added `system.licenses` table. This table contains licenses of third-party libraries that are located in `contrib` directory. This closes [#2890](https://github.com/ClickHouse/ClickHouse/issues/2890). [#10795](https://github.com/ClickHouse/ClickHouse/pull/10795) ([alexey-milovidov](https://github.com/alexey-milovidov)). * New function function toStartOfSecond(DateTime64) -> DateTime64 that nullifies sub-second part of DateTime64 value. [#10722](https://github.com/ClickHouse/ClickHouse/pull/10722) ([Vasily Nemkov](https://github.com/Enmk)). -* Add data type Point (Tuple(Float64, Float64)) and Polygon (Array(Array(Tuple(Float64, Float64))). [#10678](https://github.com/ClickHouse/ClickHouse/pull/10678) ([Alexey Ilyukhov](https://github.com/livace)). -* Default user and database creation on docker image starting. [#10637](https://github.com/ClickHouse/ClickHouse/pull/10637) ([Paramtamtam](https://github.com/tarampampam)). -* Added a new layout ```direct``` which loads all the data directly from the source for each query, without storing or caching data. [#10622](https://github.com/ClickHouse/ClickHouse/pull/10622) ([Artem Streltsov](https://github.com/kekekekule)). * Add new input format `JSONAsString` that accepts a sequence of JSON objects separated by newlines, spaces and/or commas. [#10607](https://github.com/ClickHouse/ClickHouse/pull/10607) ([Kruglov Pavel](https://github.com/Avogar)). * Allowed to profile memory with finer granularity steps than 4 MiB. Added sampling memory profiler to capture random allocations/deallocations. [#10598](https://github.com/ClickHouse/ClickHouse/pull/10598) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* (Only Linux) Clickhouse server now tries to fallback to ProcfsMetricsProvider when clickhouse binary is not attributed with CAP_NET_ADMIN capability to collect per-query system metrics (for CPU and I/O). [#10544](https://github.com/ClickHouse/ClickHouse/pull/10544) ([Alexander Kazakov](https://github.com/Akazz)). -* Added ability to delete a subset of expired rows, which satisfies the condition in WHERE clause. Added ability to replace expired rows with aggregates of them specified in GROUP BY clause. [#10537](https://github.com/ClickHouse/ClickHouse/pull/10537) ([expl0si0nn](https://github.com/expl0si0nn)). -* Function that extracts from haystack all matching non-overlapping groups with regular expressions, and put those into `Array(Array(String))` column. [#10534](https://github.com/ClickHouse/ClickHouse/pull/10534) ([Vasily Nemkov](https://github.com/Enmk)). -* Selects with final are executed in parallel. Added setting `max_final_threads` to limit the number of threads used. [#10463](https://github.com/ClickHouse/ClickHouse/pull/10463) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Implementation of PostgreSQL wire protocol. [#10242](https://github.com/ClickHouse/ClickHouse/pull/10242) ([Movses](https://github.com/MovElb)). -* Added OpenCl support and bitonic sort algorithm, which can be used for sorting integer types of data in single column. Needs to be build with flag `-DENABLE_OPENCL=1`. For using bitonic sort algorithm instead of others you need to set `bitonic_sort` for Setting's option `special_sort` and make sure that OpenCL is available. [#10232](https://github.com/ClickHouse/ClickHouse/pull/10232) ([Ri](https://github.com/margaritiko)). * `SimpleAggregateFunction` now also supports `sumMap`. [#10000](https://github.com/ClickHouse/ClickHouse/pull/10000) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Support `ALTER RENAME COLUMN` for the distributed table engine. Continuation of [#10727](https://github.com/ClickHouse/ClickHouse/issues/10727). Fixes [#10747](https://github.com/ClickHouse/ClickHouse/issues/10747). [#10887](https://github.com/ClickHouse/ClickHouse/pull/10887) ([alesapin](https://github.com/alesapin)). #### Bug Fix +* Fix UBSan report in Decimal parse. This fixes [#7540](https://github.com/ClickHouse/ClickHouse/issues/7540). [#10512](https://github.com/ClickHouse/ClickHouse/pull/10512) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix potential floating point exception when parsing DateTime64. This fixes [#11374](https://github.com/ClickHouse/ClickHouse/issues/11374). [#11875](https://github.com/ClickHouse/ClickHouse/pull/11875) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix rare crash caused by using `Nullable` column in prewhere condition. [#11895](https://github.com/ClickHouse/ClickHouse/pull/11895) [#11608](https://github.com/ClickHouse/ClickHouse/issues/11608) [#11869](https://github.com/ClickHouse/ClickHouse/pull/11869) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Don't allow arrayJoin inside higher order functions. It was leading to broken protocol synchronization. This closes [#3933](https://github.com/ClickHouse/ClickHouse/issues/3933). [#11846](https://github.com/ClickHouse/ClickHouse/pull/11846) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix wrong result for `if()` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). -* Preserve column alias with optimize_aggregators_of_group_by_keys (`optimize_aggregators_of_group_by_keys` has been introduced in [#11667](https://github.com/ClickHouse/ClickHouse/issues/11667)). [#11806](https://github.com/ClickHouse/ClickHouse/pull/11806) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong result for `if` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)). * Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fixed `Scalar doesn't exist` exception when using `WITH ...` in `SELECT ... FROM merge_tree_table ...` https://github.com/ClickHouse/ClickHouse/issues/11621. [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)). -* Fixes crash in special generated queries when `optimize_arithmetic_operations_in_aggregate_functions = 1`. [#11756](https://github.com/ClickHouse/ClickHouse/pull/11756) ([Ruslan](https://github.com/kamalov-ruslan)). * Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)). * Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)). * Parse metadata stored in zookeeper before checking for equality. [#11739](https://github.com/ClickHouse/ClickHouse/pull/11739) ([Azat Khuzhin](https://github.com/azat)). @@ -71,9 +63,9 @@ * Add support for regular expressions with case-insensitive flags. This fixes [#11101](https://github.com/ClickHouse/ClickHouse/issues/11101) and fixes [#11506](https://github.com/ClickHouse/ClickHouse/issues/11506). [#11649](https://github.com/ClickHouse/ClickHouse/pull/11649) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove trivial count query optimization if row-level security is set. In previous versions the user get total count of records in a table instead filtered. This fixes [#11352](https://github.com/ClickHouse/ClickHouse/issues/11352). [#11644](https://github.com/ClickHouse/ClickHouse/pull/11644) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix bloom filters for String (data skipping indices). [#11638](https://github.com/ClickHouse/ClickHouse/pull/11638) ([Azat Khuzhin](https://github.com/azat)). -* without -q option the database does not get created at startup. [#11604](https://github.com/ClickHouse/ClickHouse/pull/11604) ([giordyb](https://github.com/giordyb)). +* Without `-q` option the database does not get created at startup. [#11604](https://github.com/ClickHouse/ClickHouse/pull/11604) ([giordyb](https://github.com/giordyb)). * Fix error `Block structure mismatch` for queries with sampling reading from `Buffer` table. [#11602](https://github.com/ClickHouse/ClickHouse/pull/11602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix wrong exit code of the clickhouse-client, when exception.code() % 256 = 0. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). +* Fix wrong exit code of the clickhouse-client, when `exception.code() % 256 == 0`. [#11601](https://github.com/ClickHouse/ClickHouse/pull/11601) ([filimonov](https://github.com/filimonov)). * Fix race conditions in CREATE/DROP of different replicas of ReplicatedMergeTree. Continue to work if the table was not removed completely from ZooKeeper or not created successfully. This fixes [#11432](https://github.com/ClickHouse/ClickHouse/issues/11432). [#11592](https://github.com/ClickHouse/ClickHouse/pull/11592) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix trivial error in log message about "Mark cache size was lowered" at server startup. This closes [#11399](https://github.com/ClickHouse/ClickHouse/issues/11399). [#11589](https://github.com/ClickHouse/ClickHouse/pull/11589) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix error `Size of offsets doesn't match size of column` for queries with `PREWHERE column in (subquery)` and `ARRAY JOIN`. [#11580](https://github.com/ClickHouse/ClickHouse/pull/11580) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). @@ -83,42 +75,39 @@ * Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix shard_num/replica_num for `` (breaks use_compact_format_in_distributed_parts_names). [#11528](https://github.com/ClickHouse/ClickHouse/pull/11528) ([Azat Khuzhin](https://github.com/azat)). * Fix async INSERT into Distributed for prefer_localhost_replica=0 and w/o internal_replication. [#11527](https://github.com/ClickHouse/ClickHouse/pull/11527) ([Azat Khuzhin](https://github.com/azat)). -* Fix memory leak when exception is thrown in the middle of aggregation with -State functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix memory leak when exception is thrown in the middle of aggregation with `-State` functions. This fixes [#8995](https://github.com/ClickHouse/ClickHouse/issues/8995). [#11496](https://github.com/ClickHouse/ClickHouse/pull/11496) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix `Pipeline stuck` exception for `INSERT SELECT FINAL` where `SELECT` (`max_threads`>1) has multiple streams but `INSERT` has only one (`max_insert_threads`==0). [#11455](https://github.com/ClickHouse/ClickHouse/pull/11455) ([Azat Khuzhin](https://github.com/azat)). * Fix wrong result in queries like `select count() from t, u`. [#11454](https://github.com/ClickHouse/ClickHouse/pull/11454) ([Artem Zuikov](https://github.com/4ertus2)). * Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)). -* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). * Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix crash in JOIN over LowCarinality(T) and Nullable(T). [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix crash in JOIN over `LowCarinality(T)` and `Nullable(T)`. [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)). * Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)). -* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). +* Fixed `geohashesInBox` with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)). * Better errors for `joinGet()` functions. [#11389](https://github.com/ClickHouse/ClickHouse/pull/11389) ([Artem Zuikov](https://github.com/4ertus2)). * Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Remove redundant lock during parts send in ReplicatedMergeTree. [#11354](https://github.com/ClickHouse/ClickHouse/pull/11354) ([alesapin](https://github.com/alesapin)). * Fix support for `\G` (vertical output) in clickhouse-client in multiline mode. This closes [#9933](https://github.com/ClickHouse/ClickHouse/issues/9933). [#11350](https://github.com/ClickHouse/ClickHouse/pull/11350) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix potential segfault when using `Lazy` database. [#11348](https://github.com/ClickHouse/ClickHouse/pull/11348) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix crash in direct selects from StorageJoin (without JOIN) and wrong nullability. [#11340](https://github.com/ClickHouse/ClickHouse/pull/11340) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix crash in direct selects from `Join` table engine (without JOIN) and wrong nullability. [#11340](https://github.com/ClickHouse/ClickHouse/pull/11340) ([Artem Zuikov](https://github.com/4ertus2)). * Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Now merges stopped before change metadata in `ALTER` queries. [#11335](https://github.com/ClickHouse/ClickHouse/pull/11335) ([alesapin](https://github.com/alesapin)). * Make writing to `MATERIALIZED VIEW` with setting `parallel_view_processing = 1` parallel again. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#11330](https://github.com/ClickHouse/ClickHouse/pull/11330) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix visitParamExtractRaw when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). +* Fix `visitParamExtractRaw` when extracted JSON has strings with unbalanced { or [. [#11318](https://github.com/ClickHouse/ClickHouse/pull/11318) ([Ewout](https://github.com/devwout)). * Fix very rare race condition in ThreadPool. [#11314](https://github.com/ClickHouse/ClickHouse/pull/11314) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix insignificant data race in clickhouse-copier. Found by integration tests. [#11313](https://github.com/ClickHouse/ClickHouse/pull/11313) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix insignificant data race in `clickhouse-copier`. Found by integration tests. [#11313](https://github.com/ClickHouse/ClickHouse/pull/11313) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix wrong markup in documentation. [#11263](https://github.com/ClickHouse/ClickHouse/pull/11263) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix crash when `SET DEFAULT ROLE` is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix crash while reading malformed data in `Protobuf` format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed a bug when `cache` dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Now `primary.idx` will be checked if it's defined in `CREATE` query. [#11199](https://github.com/ClickHouse/ClickHouse/pull/11199) ([alesapin](https://github.com/alesapin)). * Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed S3 globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fixed `S3` globbing which could fail in case of more than 1000 keys and some backends. [#11179](https://github.com/ClickHouse/ClickHouse/pull/11179) ([Vladimir Chebotarev](https://github.com/excitoon)). * If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)). -* Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). * Fix for the hang which was happening sometimes during DROP of table engine=Kafka (or during server restarts). [#11145](https://github.com/ClickHouse/ClickHouse/pull/11145) ([filimonov](https://github.com/filimonov)). * Fix excessive reserving of threads for simple queries (optimization for reducing the number of threads, which was partly broken after changes in pipeline). [#11114](https://github.com/ClickHouse/ClickHouse/pull/11114) ([Azat Khuzhin](https://github.com/azat)). * Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)). @@ -172,7 +161,7 @@ * On `SYSTEM DROP DNS CACHE` query also drop caches, which are used to check if user is allowed to connect from some IP addresses. [#10608](https://github.com/ClickHouse/ClickHouse/pull/10608) ([tavplubix](https://github.com/tavplubix)). * Fixed incorrect scalar results inside inner query of `MATERIALIZED VIEW` in case if this query contained dependent table. [#10603](https://github.com/ClickHouse/ClickHouse/pull/10603) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fixed handling condition variable for synchronous mutations. In some cases signals to that condition variable could be lost. [#10588](https://github.com/ClickHouse/ClickHouse/pull/10588) ([Vladimir Chebotarev](https://github.com/excitoon)). -* This PR fixes possible crash when `createDictionary()` is called before `loadStoredObject()` has finished. [#10587](https://github.com/ClickHouse/ClickHouse/pull/10587) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixes possible crash `createDictionary()` is called before `loadStoredObject()` has finished. [#10587](https://github.com/ClickHouse/ClickHouse/pull/10587) ([Vitaly Baranov](https://github.com/vitlibar)). * Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)). * Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)). * Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)). @@ -180,19 +169,21 @@ * Disable GROUP BY sharding_key optimization by default (`optimize_distributed_group_by_sharding_key` had been introduced and turned of by default, due to trickery of sharding_key analyzing, simple example is `if` in sharding key) and fix it for WITH ROLLUP/CUBE/TOTALS. [#10516](https://github.com/ClickHouse/ClickHouse/pull/10516) ([Azat Khuzhin](https://github.com/azat)). * Fixes: [#10263](https://github.com/ClickHouse/ClickHouse/issues/10263) (after that PR dist send via INSERT had been postponing on each INSERT) Fixes: [#8756](https://github.com/ClickHouse/ClickHouse/issues/8756) (that PR breaks distributed sends with all of the following conditions met (unlikely setup for now I guess): `internal_replication == false`, multiple local shards (activates the hardlinking code) and `distributed_storage_policy` (makes `link(2)` fails on `EXDEV`)). [#10486](https://github.com/ClickHouse/ClickHouse/pull/10486) ([Azat Khuzhin](https://github.com/azat)). * Fixed error with "max_rows_to_sort" limit. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Get dictionary and check access rights only once per each call of any function reading external dictionaries. [#10928](https://github.com/ClickHouse/ClickHouse/pull/10928) ([Vitaly Baranov](https://github.com/vitlibar)). #### Improvement -* Rewrite code for `optimize_arithmetic_operations_in_aggregate_functions` optimisation. [#11899](https://github.com/ClickHouse/ClickHouse/pull/11899) ([Artem Zuikov](https://github.com/4ertus2)). +* Apply `TTL` for old data, after `ALTER MODIFY TTL` query. This behaviour is controlled by setting `materialize_ttl_after_modify`, which is enabled by default. [#11042](https://github.com/ClickHouse/ClickHouse/pull/11042) ([Anton Popov](https://github.com/CurtizJ)). * When reading Decimal value, cut extra digits after point. This behaviour is more compatible with MySQL and PostgreSQL. This fixes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11831](https://github.com/ClickHouse/ClickHouse/pull/11831) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Slightly improve diagnostic of reading decimal from string. This closes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11829](https://github.com/ClickHouse/ClickHouse/pull/11829) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix sleep invocation in signal handler. It was sleeping for less amount of time than expected. [#11825](https://github.com/ClickHouse/ClickHouse/pull/11825) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* (Only Linux) OS related performance metrics (for CPU and I/O) will work even without `CAP_NET_ADMIN` capability. [#10544](https://github.com/ClickHouse/ClickHouse/pull/10544) ([Alexander Kazakov](https://github.com/Akazz)). * Added `hostname` as an alias to function `hostName`. This feature was suggested by Victor Tarnavskiy from Yandex.Metrica. [#11821](https://github.com/ClickHouse/ClickHouse/pull/11821) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Remove leader election, step 3: remove yielding of leadership; remove sending queries to leader. [#11795](https://github.com/ClickHouse/ClickHouse/pull/11795) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Add more `jemalloc` statistics to `system.asynchronous_metrics`, and ensure that we see up-to-date values for them. [#11748](https://github.com/ClickHouse/ClickHouse/pull/11748) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Add `cast_keep_nullable` setting. If set `CAST(something_nullable AS Type)` return `Nullable(Type)`. [#11733](https://github.com/ClickHouse/ClickHouse/pull/11733) ([Artem Zuikov](https://github.com/4ertus2)). * Added support for distributed `DDL` (update/delete/drop partition) on cross replication clusters. [#11703](https://github.com/ClickHouse/ClickHouse/pull/11703) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Emit warning instead of error in server log at startup if we cannot listen one of the listen addresses (e.g. IPv6 is unavailable inside Docker). Note that if server fails to listen all listed addresses, it will refuse to startup as before. This fixes [#4406](https://github.com/ClickHouse/ClickHouse/issues/4406). [#11687](https://github.com/ClickHouse/ClickHouse/pull/11687) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Default user and database creation on docker image starting. [#10637](https://github.com/ClickHouse/ClickHouse/pull/10637) ([Paramtamtam](https://github.com/tarampampam)). * When multiline query is printed to server log, the lines are joined. Make it to work correct in case of multiline string literals, identifiers and single-line comments. This fixes [#3853](https://github.com/ClickHouse/ClickHouse/issues/3853). [#11686](https://github.com/ClickHouse/ClickHouse/pull/11686) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Multiple names are now allowed in commands: CREATE USER, CREATE ROLE, ALTER USER, SHOW CREATE USER, SHOW GRANTS and so on. [#11670](https://github.com/ClickHouse/ClickHouse/pull/11670) ([Vitaly Baranov](https://github.com/vitlibar)). * Clear password from command line in `clickhouse-client` and `clickhouse-benchmark` if the user has specified it with explicit value. This prevents password exposure by `ps` and similar tools. [#11665](https://github.com/ClickHouse/ClickHouse/pull/11665) ([alexey-milovidov](https://github.com/alexey-milovidov)). @@ -234,7 +225,6 @@ * Enable percpu_arena:percpu for jemalloc (This will reduce memory fragmentation due to thread pool). [#11084](https://github.com/ClickHouse/ClickHouse/pull/11084) ([Azat Khuzhin](https://github.com/azat)). * Show authentication type in table system.users and while executing SHOW CREATE USER query. [#11080](https://github.com/ClickHouse/ClickHouse/pull/11080) ([Vitaly Baranov](https://github.com/vitlibar)). * Now `dictGet*` functions accept table names. [#11050](https://github.com/ClickHouse/ClickHouse/pull/11050) ([Vitaly Baranov](https://github.com/vitlibar)). -* Apply `TTL` for old data, after `ALTER MODIFY TTL` query. This behaviour is controlled by setting `materialize_ttl_after_modify`, which is enabled by default. [#11042](https://github.com/ClickHouse/ClickHouse/pull/11042) ([Anton Popov](https://github.com/CurtizJ)). * Add `NCHAR` and `NVARCHAR` synonims for data types. [#11025](https://github.com/ClickHouse/ClickHouse/pull/11025) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove data on explicit `DROP DATABASE` for `Memory` database engine. Fixes [#10557](https://github.com/ClickHouse/ClickHouse/issues/10557). [#11021](https://github.com/ClickHouse/ClickHouse/pull/11021) ([tavplubix](https://github.com/tavplubix)). * Set thread names for internal threads of rdkafka library. Make logs from rdkafka available in server logs. [#10983](https://github.com/ClickHouse/ClickHouse/pull/10983) ([Azat Khuzhin](https://github.com/azat)). @@ -251,40 +241,50 @@ * Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Change HTTP response code in case of some parse errors to 400 Bad Request. This fix [#10636](https://github.com/ClickHouse/ClickHouse/issues/10636). [#10640](https://github.com/ClickHouse/ClickHouse/pull/10640) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Print a message if clickhouse-client is newer than clickhouse-server. [#10627](https://github.com/ClickHouse/ClickHouse/pull/10627) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make pointInPolygon work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)). +* Make pointInPolygon work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)) [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). * Added `move_ttl_info` to `system.parts` in order to provide introspection of move TTL functionality. [#10591](https://github.com/ClickHouse/ClickHouse/pull/10591) ([Vladimir Chebotarev](https://github.com/excitoon)). * Possibility to work with S3 through proxies. [#10576](https://github.com/ClickHouse/ClickHouse/pull/10576) ([Pavel Kovalenko](https://github.com/Jokser)). -* - Adding support for `INSERT INTO [db.]table WATCH` query. [#10498](https://github.com/ClickHouse/ClickHouse/pull/10498) ([vzakaznikov](https://github.com/vzakaznikov)). +* Adding support for `INSERT INTO [db.]table WATCH` query. [#10498](https://github.com/ClickHouse/ClickHouse/pull/10498) ([vzakaznikov](https://github.com/vzakaznikov)). * Added system tables for users, roles, grants, settings profiles, quotas, row policies; added commands SHOW USER, SHOW [CURRENT|ENABLED] ROLES, SHOW SETTINGS PROFILES. [#10387](https://github.com/ClickHouse/ClickHouse/pull/10387) ([Vitaly Baranov](https://github.com/vitlibar)). * Allow to pass quota_key in clickhouse-client. This closes [#10227](https://github.com/ClickHouse/ClickHouse/issues/10227). [#10270](https://github.com/ClickHouse/ClickHouse/pull/10270) ([alexey-milovidov](https://github.com/alexey-milovidov)). #### Performance Improvement -* This optimization eliminates min/max/any aggregators of GROUP BY keys in SELECT section. [#11667](https://github.com/ClickHouse/ClickHouse/pull/11667) ([xPoSx](https://github.com/xPoSx)). -* Allow multiple replicas to assign merges, mutations, partition drop, move and replace concurrently. This closes [#10367](https://github.com/ClickHouse/ClickHouse/issues/10367). [#11639](https://github.com/ClickHouse/ClickHouse/pull/11639) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* New optimization that takes all operations out of "any" function. [#11529](https://github.com/ClickHouse/ClickHouse/pull/11529) ([Ruslan](https://github.com/kamalov-ruslan)). +* Allow multiple replicas to assign merges, mutations, partition drop, move and replace concurrently. This closes [#10367](https://github.com/ClickHouse/ClickHouse/issues/10367). [#11639](https://github.com/ClickHouse/ClickHouse/pull/11639) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#11795](https://github.com/ClickHouse/ClickHouse/pull/11795) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Optimization of GROUP BY with respect to table sorting key, enabled with `optimize_aggregation_in_order` setting. [#9113](https://github.com/ClickHouse/ClickHouse/pull/9113) ([dimarub2000](https://github.com/dimarub2000)). +* Selects with final are executed in parallel. Added setting `max_final_threads` to limit the number of threads used. [#10463](https://github.com/ClickHouse/ClickHouse/pull/10463) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve performance for INSERT queries via `INSERT SELECT` or INSERT with clickhouse-client when small blocks are generated (typical case with parallel parsing). This fixes [#11275](https://github.com/ClickHouse/ClickHouse/issues/11275). Fix the issue that CONSTRAINTs were not working for DEFAULT fields. This fixes [#11273](https://github.com/ClickHouse/ClickHouse/issues/11273). Fix the issue that CONSTRAINTS were ignored for TEMPORARY tables. This fixes [#11274](https://github.com/ClickHouse/ClickHouse/issues/11274). [#11276](https://github.com/ClickHouse/ClickHouse/pull/11276) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Optimization that eliminates min/max/any aggregators of GROUP BY keys in SELECT section, enabled with `optimize_aggregators_of_group_by_keys` setting. [#11667](https://github.com/ClickHouse/ClickHouse/pull/11667) ([xPoSx](https://github.com/xPoSx)). [#11806](https://github.com/ClickHouse/ClickHouse/pull/11806) ([Azat Khuzhin](https://github.com/azat)). +* New optimization that takes all operations out of `any` function, enabled with `optimize_move_functions_out_of_any` [#11529](https://github.com/ClickHouse/ClickHouse/pull/11529) ([Ruslan](https://github.com/kamalov-ruslan)). * Improve performance of `clickhouse-client` in interactive mode when Pretty formats are used. In previous versions, significant amount of time can be spent calculating visible width of UTF-8 string. This closes [#11323](https://github.com/ClickHouse/ClickHouse/issues/11323). [#11323](https://github.com/ClickHouse/ClickHouse/pull/11323) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve performance for INSERT queries via INSERT SELECT or INSERT with clickhouse-client when small blocks are generated (typical case with parallel parsing). This fixes [#11275](https://github.com/ClickHouse/ClickHouse/issues/11275). Fix the issue that CONSTRAINTs were not working for DEFAULT fields. This fixes [#11273](https://github.com/ClickHouse/ClickHouse/issues/11273). Fix the issue that CONSTRAINTS were ignored for TEMPORARY tables. This fixes [#11274](https://github.com/ClickHouse/ClickHouse/issues/11274). [#11276](https://github.com/ClickHouse/ClickHouse/pull/11276) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improved performance for queries with `ORDER BY` and small `LIMIT` (less, then `max_block_size`). [#11171](https://github.com/ClickHouse/ClickHouse/pull/11171) ([Albert Kidrachev](https://github.com/Provet)). -* Enable mlock of clickhouse binary by default. It will prevent clickhouse executable from being paged out under high IO load. [#11139](https://github.com/ClickHouse/ClickHouse/pull/11139) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Enable `mlock` of clickhouse binary by default. It will prevent clickhouse executable from being paged out under high IO load. [#11139](https://github.com/ClickHouse/ClickHouse/pull/11139) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Make queries with `sum` aggregate function and without GROUP BY keys to run multiple times faster. [#10992](https://github.com/ClickHouse/ClickHouse/pull/10992) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improving radix sort by removing some redundant data moves. [#10981](https://github.com/ClickHouse/ClickHouse/pull/10981) ([Arslan Gumerov](https://github.com/g-arslan)). -* Get dictionary and check access rights only once per each call of any function reading external dictionaries. [#10928](https://github.com/ClickHouse/ClickHouse/pull/10928) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improving radix sort (used in `ORDER BY` with simple keys) by removing some redundant data moves. [#10981](https://github.com/ClickHouse/ClickHouse/pull/10981) ([Arslan Gumerov](https://github.com/g-arslan)). * Sort bigger parts of the left table in MergeJoin. Buffer left blocks in memory. Add `partial_merge_join_left_table_buffer_bytes` setting to manage the left blocks buffers sizes. [#10601](https://github.com/ClickHouse/ClickHouse/pull/10601) ([Artem Zuikov](https://github.com/4ertus2)). -* Remove duplicate ORDER BY and DISTINCT from subqueries. [#10067](https://github.com/ClickHouse/ClickHouse/pull/10067) ([Mikhail Malafeev](https://github.com/demo-99)). -* This feature eliminates functions of other keys in GROUP BY section. [#10051](https://github.com/ClickHouse/ClickHouse/pull/10051) ([xPoSx](https://github.com/xPoSx)). -* New optimization that takes arithmetic operations out of aggregate functions. [#10047](https://github.com/ClickHouse/ClickHouse/pull/10047) ([Ruslan](https://github.com/kamalov-ruslan)). -* Optimization of GROUP BY with respect to table sorting key. [#9113](https://github.com/ClickHouse/ClickHouse/pull/9113) ([dimarub2000](https://github.com/dimarub2000)). +* Remove duplicate ORDER BY and DISTINCT from subqueries, this optimization is enabled with `optimize_duplicate_order_by_and_distinct` [#10067](https://github.com/ClickHouse/ClickHouse/pull/10067) ([Mikhail Malafeev](https://github.com/demo-99)). +* This feature eliminates functions of other keys in GROUP BY section, enabled with `optimize_group_by_function_keys` [#10051](https://github.com/ClickHouse/ClickHouse/pull/10051) ([xPoSx](https://github.com/xPoSx)). +* New optimization that takes arithmetic operations out of aggregate functions, enabled with `optimize_arithmetic_operations_in_aggregate_functions` [#10047](https://github.com/ClickHouse/ClickHouse/pull/10047) ([Ruslan](https://github.com/kamalov-ruslan)). +* Use HTTP client for S3 based on Poco instead of curl. This will improve performance and lower memory usage of s3 storage and table functions. [#11230](https://github.com/ClickHouse/ClickHouse/pull/11230) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). + +#### Experimental Feature + +* Add data type `Point` (Tuple(Float64, Float64)) and `Polygon` (Array(Array(Tuple(Float64, Float64))). [#10678](https://github.com/ClickHouse/ClickHouse/pull/10678) ([Alexey Ilyukhov](https://github.com/livace)). +* Add's a `hasSubstr` function that allows for look for subsequences in arrays. Note: this function is likely to be renamed without further notice. [#11071](https://github.com/ClickHouse/ClickHouse/pull/11071) ([Ryad Zenine](https://github.com/r-zenine)). +* Added OpenCL support and bitonic sort algorithm, which can be used for sorting integer types of data in single column. Needs to be build with flag `-DENABLE_OPENCL=1`. For using bitonic sort algorithm instead of others you need to set `bitonic_sort` for Setting's option `special_sort` and make sure that OpenCL is available. This feature does not improve performance or anything else, it is only provided as an example and for demonstration purposes. It is likely to be removed in near future if there will be no further development in this direction. [#10232](https://github.com/ClickHouse/ClickHouse/pull/10232) ([Ri](https://github.com/margaritiko)). #### Build/Testing/Packaging Improvement +* Enable clang-tidy for programs and utils. [#10991](https://github.com/ClickHouse/ClickHouse/pull/10991) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove dependency on `tzdata`: do not fail if `/usr/share/zoneinfo` directory does not exist. Note that all timezones work in ClickHouse even without tzdata installed in system. [#11827](https://github.com/ClickHouse/ClickHouse/pull/11827) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added MSan and UBSan stress tests. Note that we already have MSan, UBSan for functional tests and "stress" test is another kind of tests. [#10871](https://github.com/ClickHouse/ClickHouse/pull/10871) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Print compiler build id in crash messages. It will make us slightly more certain about what binary has crashed. Added new function `buildId`. [#11824](https://github.com/ClickHouse/ClickHouse/pull/11824) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Added a test to ensure that mutations continue to work after FREEZE query. [#11820](https://github.com/ClickHouse/ClickHouse/pull/11820) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Don't allow tests with "fail" substring in their names because it makes looking at the tests results in browser less convenient when you type Ctrl+F and search for "fail". [#11817](https://github.com/ClickHouse/ClickHouse/pull/11817) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Removes unused imports from HTTPHandlerFactory. [#11660](https://github.com/ClickHouse/ClickHouse/pull/11660) ([Bharat Nallan](https://github.com/bharatnc)). * Added a random sampling of instances where copier is executed. It is needed to avoid `Too many simultaneous queries` error. Also increased timeout and decreased fault probability. [#11573](https://github.com/ClickHouse/ClickHouse/pull/11573) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix missed include for `std::move` used at line 17. [#11525](https://github.com/ClickHouse/ClickHouse/pull/11525) ([Matwey V. Kornilov](https://github.com/matwey)). +* Fix missed include. [#11525](https://github.com/ClickHouse/ClickHouse/pull/11525) ([Matwey V. Kornilov](https://github.com/matwey)). * Speed up build by removing old example programs. Also found some orphan functional test. [#11486](https://github.com/ClickHouse/ClickHouse/pull/11486) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Increase ccache size for builds in CI. [#11450](https://github.com/ClickHouse/ClickHouse/pull/11450) ([alesapin](https://github.com/alesapin)). * Leave only unit_tests_dbms in deb build. [#11429](https://github.com/ClickHouse/ClickHouse/pull/11429) ([Ilya Yatsishin](https://github.com/qoega)). @@ -305,50 +305,44 @@ * Enable performance test that was not working. [#11158](https://github.com/ClickHouse/ClickHouse/pull/11158) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Create root S3 bucket for tests before any CH instance is started. [#11142](https://github.com/ClickHouse/ClickHouse/pull/11142) ([Pavel Kovalenko](https://github.com/Jokser)). * Add performance test for non-constant polygons. [#11141](https://github.com/ClickHouse/ClickHouse/pull/11141) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make `system_tables_lazy_load` false by default. [#11029](https://github.com/ClickHouse/ClickHouse/pull/11029) ([Azat Khuzhin](https://github.com/azat)). -* Fixing 00979_live_view_watch_continuous_aggregates test. [#11024](https://github.com/ClickHouse/ClickHouse/pull/11024) ([vzakaznikov](https://github.com/vzakaznikov)). +* Fixing `00979_live_view_watch_continuous_aggregates` test. [#11024](https://github.com/ClickHouse/ClickHouse/pull/11024) ([vzakaznikov](https://github.com/vzakaznikov)). * Add ability to run zookeeper in integration tests over tmpfs. [#11002](https://github.com/ClickHouse/ClickHouse/pull/11002) ([alesapin](https://github.com/alesapin)). -* Enable clang-tidy for programs and utils. [#10991](https://github.com/ClickHouse/ClickHouse/pull/10991) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Wait for odbc-bridge with exponential backoff. Previous wait time of 200 ms was not enough in our CI environment. [#10990](https://github.com/ClickHouse/ClickHouse/pull/10990) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix non-deterministic test. [#10989](https://github.com/ClickHouse/ClickHouse/pull/10989) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Return tzdata to build images and as dependency to .deb package. [#10929](https://github.com/ClickHouse/ClickHouse/pull/10929) ([alesapin](https://github.com/alesapin)). * Added a test for empty external data. [#10926](https://github.com/ClickHouse/ClickHouse/pull/10926) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Database is recreated for every test. This improves separation of tests. [#10902](https://github.com/ClickHouse/ClickHouse/pull/10902) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Prepare to add MSan and UBSan stress tests. [#10871](https://github.com/ClickHouse/ClickHouse/pull/10871) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Added more asserts in columns code. [#10833](https://github.com/ClickHouse/ClickHouse/pull/10833) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Better cooperation with sanitizers. Print information about query_id in the message of sanitizer failure. [#10832](https://github.com/ClickHouse/ClickHouse/pull/10832) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix obvious race condition in "Split build smoke test" check. [#10820](https://github.com/ClickHouse/ClickHouse/pull/10820) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix (false) MSan report in MergeTreeIndexFullText. The issue first appeared in [#9968](https://github.com/ClickHouse/ClickHouse/issues/9968). [#10801](https://github.com/ClickHouse/ClickHouse/pull/10801) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Add MSan suppression for MariaDB Client library. [#10800](https://github.com/ClickHouse/ClickHouse/pull/10800) ([alexey-milovidov](https://github.com/alexey-milovidov)). * GRPC make couldn't find protobuf files, changed make file by adding the right link. [#10794](https://github.com/ClickHouse/ClickHouse/pull/10794) ([mnkonkova](https://github.com/mnkonkova)). -* Enable extra warnings for base, utils, programs. [#10779](https://github.com/ClickHouse/ClickHouse/pull/10779) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Enable extra warnings (`-Weverything`) for base, utils, programs. Note that we already have it for the most of the code. [#10779](https://github.com/ClickHouse/ClickHouse/pull/10779) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Suppressions of warnings from libraries was mistakenly declared as public in [#10396](https://github.com/ClickHouse/ClickHouse/issues/10396). [#10776](https://github.com/ClickHouse/ClickHouse/pull/10776) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Restore a patch that was accidentially deleted in [#10396](https://github.com/ClickHouse/ClickHouse/issues/10396). [#10774](https://github.com/ClickHouse/ClickHouse/pull/10774) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix performance tests errors, part 2. [#10773](https://github.com/ClickHouse/ClickHouse/pull/10773) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix performance test errors. [#10766](https://github.com/ClickHouse/ClickHouse/pull/10766) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Update cross-builds to use clang-10 compiler. [#10724](https://github.com/ClickHouse/ClickHouse/pull/10724) ([Ivan](https://github.com/abyss7)). * Update instruction to install RPM packages. This was suggested by Denis (TG login @ldviolet) and implemented by Arkady Shejn. [#10707](https://github.com/ClickHouse/ClickHouse/pull/10707) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Trying to fix tests/queries/0_stateless/01246_insert_into_watch_live_view.py test. [#10670](https://github.com/ClickHouse/ClickHouse/pull/10670) ([vzakaznikov](https://github.com/vzakaznikov)). -* Volumes and storages refactoring. [#10666](https://github.com/ClickHouse/ClickHouse/pull/10666) ([Gleb Novikov](https://github.com/NanoBjorn)). -* Update zstd to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Trying to fix `tests/queries/0_stateless/01246_insert_into_watch_live_view.py` test. [#10670](https://github.com/ClickHouse/ClickHouse/pull/10670) ([vzakaznikov](https://github.com/vzakaznikov)). * Fixing and re-enabling 00979_live_view_watch_continuous_aggregates.py test. [#10658](https://github.com/ClickHouse/ClickHouse/pull/10658) ([vzakaznikov](https://github.com/vzakaznikov)). * Fix OOM in ASan stress test. [#10646](https://github.com/ClickHouse/ClickHouse/pull/10646) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix UBSan report (adding zero to nullptr) in HashTable that appeared after migration to clang-10. [#10638](https://github.com/ClickHouse/ClickHouse/pull/10638) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove external call to `ld` (bfd) linker during tzdata processing in compile time. [#10634](https://github.com/ClickHouse/ClickHouse/pull/10634) ([alesapin](https://github.com/alesapin)). -* Allow to use lld to link blobs (resources). [#10632](https://github.com/ClickHouse/ClickHouse/pull/10632) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix UBSan report in LZ4 library. [#10631](https://github.com/ClickHouse/ClickHouse/pull/10631) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Update LZ4 to the latest dev branch. It may fix the error under UBSan. [#10630](https://github.com/ClickHouse/ClickHouse/pull/10630) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Added auto-generated machine-readable file with list of stable versions. [#10628](https://github.com/ClickHouse/ClickHouse/pull/10628) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow to use `lld` to link blobs (resources). [#10632](https://github.com/ClickHouse/ClickHouse/pull/10632) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in `LZ4` library. [#10631](https://github.com/ClickHouse/ClickHouse/pull/10631) ([alexey-milovidov](https://github.com/alexey-milovidov)). See also [https://github.com/lz4/lz4/issues/857](https://github.com/lz4/lz4/issues/857) +* Update LZ4 to the latest dev branch. [#10630](https://github.com/ClickHouse/ClickHouse/pull/10630) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added auto-generated machine-readable file with the list of stable versions. [#10628](https://github.com/ClickHouse/ClickHouse/pull/10628) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix `capnproto` version check for `capnp::UnalignedFlatArrayMessageReader`. [#10618](https://github.com/ClickHouse/ClickHouse/pull/10618) ([Matwey V. Kornilov](https://github.com/matwey)). -* Lower memory usage in tests. It may fix the issue that "address sanitizer is out of memory" in stress test. [#10617](https://github.com/ClickHouse/ClickHouse/pull/10617) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Lower memory usage in tests. [#10617](https://github.com/ClickHouse/ClickHouse/pull/10617) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fixing hard coded timeouts in new live view tests. [#10604](https://github.com/ClickHouse/ClickHouse/pull/10604) ([vzakaznikov](https://github.com/vzakaznikov)). * Increasing timeout when opening a client in tests/queries/0_stateless/helpers/client.py. [#10599](https://github.com/ClickHouse/ClickHouse/pull/10599) ([vzakaznikov](https://github.com/vzakaznikov)). * Enable ThinLTO for clang builds, continuation of https://github.com/ClickHouse/ClickHouse/pull/10435. [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)). * Adding fuzzers and preparing for oss-fuzz integration. [#10546](https://github.com/ClickHouse/ClickHouse/pull/10546) ([kyprizel](https://github.com/kyprizel)). -* Fix UBSan report in Decimal parse. This fixes [#7540](https://github.com/ClickHouse/ClickHouse/issues/7540). [#10512](https://github.com/ClickHouse/ClickHouse/pull/10512) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix FreeBSD build. [#10150](https://github.com/ClickHouse/ClickHouse/pull/10150) ([Ivan](https://github.com/abyss7)). * Add new build for query tests using pytest framework. [#10039](https://github.com/ClickHouse/ClickHouse/pull/10039) ([Ivan](https://github.com/abyss7)). + ## ClickHouse release v20.4 ### ClickHouse release v20.4.6.53-stable 2020-06-25 From 70e56180a66616bace122609ed0aa56b2ffef01a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 01:02:40 +0300 Subject: [PATCH 261/330] Cleanup changelog (half done). Now it is more acceptable #12104 --- CHANGELOG.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f3fbade384..fd9f715197b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ * `TTL DELETE WHERE` and `TTL GROUP BY` for automatic data coarsening and rollup in tables. [#10537](https://github.com/ClickHouse/ClickHouse/pull/10537) ([expl0si0nn](https://github.com/expl0si0nn)). * Implementation of PostgreSQL wire protocol. [#10242](https://github.com/ClickHouse/ClickHouse/pull/10242) ([Movses](https://github.com/MovElb)). +* Added system tables for users, roles, grants, settings profiles, quotas, row policies; added commands SHOW USER, SHOW [CURRENT|ENABLED] ROLES, SHOW SETTINGS PROFILES. [#10387](https://github.com/ClickHouse/ClickHouse/pull/10387) ([Vitaly Baranov](https://github.com/vitlibar)). * Support writes in ODBC Table function [#10554](https://github.com/ClickHouse/ClickHouse/pull/10554) ([ageraab](https://github.com/ageraab)). [#10901](https://github.com/ClickHouse/ClickHouse/pull/10901) ([tavplubix](https://github.com/tavplubix)). * Add query performance metrics based on Linux `perf_events` (these metrics are calculated with hardware CPU counters and OS counters). It is optional and requires `CAP_SYS_ADMIN` to be set on clickhouse binary. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Now support `NULL` and `NOT NULL` modifiers for data types in `CREATE` query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). @@ -174,7 +175,11 @@ #### Improvement * Apply `TTL` for old data, after `ALTER MODIFY TTL` query. This behaviour is controlled by setting `materialize_ttl_after_modify`, which is enabled by default. [#11042](https://github.com/ClickHouse/ClickHouse/pull/11042) ([Anton Popov](https://github.com/CurtizJ)). +* When parsing C-style backslash escapes in string literals, VALUES and various text formats (this is an extension to SQL standard that is endemic for ClickHouse and MySQL), keep backslash if unknown escape sequence is found (e.g. `\%` or `\w`) that will make usage of `LIKE` and `match` regular expressions more convenient (it's enough to write `name LIKE 'used\_cars'` instead of `name LIKE 'used\\_cars'`) and more compatible at the same time. This fixes [#10922](https://github.com/ClickHouse/ClickHouse/issues/10922). [#11208](https://github.com/ClickHouse/ClickHouse/pull/11208) ([alexey-milovidov](https://github.com/alexey-milovidov)). * When reading Decimal value, cut extra digits after point. This behaviour is more compatible with MySQL and PostgreSQL. This fixes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11831](https://github.com/ClickHouse/ClickHouse/pull/11831) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow to DROP replicated table if the metadata in ZooKeeper was already removed and does not exist (this is also the case when using TestKeeper for testing and the server was restarted). Allow to RENAME replicated table even if there is an error communicating with ZooKeeper. This fixes [#10720](https://github.com/ClickHouse/ClickHouse/issues/10720). [#11652](https://github.com/ClickHouse/ClickHouse/pull/11652) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow comparison of numbers with constant string in comparison operators, IN and VALUES sections. [#11647](https://github.com/ClickHouse/ClickHouse/pull/11647) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add `round_robin` load_balancing. [#11645](https://github.com/ClickHouse/ClickHouse/pull/11645) ([Azat Khuzhin](https://github.com/azat)). * Slightly improve diagnostic of reading decimal from string. This closes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11829](https://github.com/ClickHouse/ClickHouse/pull/11829) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix sleep invocation in signal handler. It was sleeping for less amount of time than expected. [#11825](https://github.com/ClickHouse/ClickHouse/pull/11825) ([alexey-milovidov](https://github.com/alexey-milovidov)). * (Only Linux) OS related performance metrics (for CPU and I/O) will work even without `CAP_NET_ADMIN` capability. [#10544](https://github.com/ClickHouse/ClickHouse/pull/10544) ([Alexander Kazakov](https://github.com/Akazz)). @@ -190,12 +195,9 @@ * Don't use debug info from ELF file if it doesn't correspond to the running binary. It is needed to avoid printing wrong function names and source locations in stack traces. This fixes [#7514](https://github.com/ClickHouse/ClickHouse/issues/7514). [#11657](https://github.com/ClickHouse/ClickHouse/pull/11657) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Added column `position` to `system.columns` table and `column_position` to `system.parts_columns` table. It contains ordinal position of a column in a table starting with 1. This closes [#7744](https://github.com/ClickHouse/ClickHouse/issues/7744). [#11655](https://github.com/ClickHouse/ClickHouse/pull/11655) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow to DROP replicated table if the metadata in ZooKeeper was already removed and does not exist (this is also the case when using TestKeeper for testing and the server was restarted). Allow to RENAME replicated table even if there is an error communicating with ZooKeeper. This fixes [#10720](https://github.com/ClickHouse/ClickHouse/issues/10720). [#11652](https://github.com/ClickHouse/ClickHouse/pull/11652) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Skip empty parameters in requested URL. They may appear when you write `http://localhost:8123/?&a=b` or `http://localhost:8123/?a=b&&c=d`. This closes [#10749](https://github.com/ClickHouse/ClickHouse/issues/10749). [#11651](https://github.com/ClickHouse/ClickHouse/pull/11651) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Allow using `groupArrayArray` and `groupUniqArrayArray` as `SimpleAggregateFunction`. [#11650](https://github.com/ClickHouse/ClickHouse/pull/11650) ([Volodymyr Kuznetsov](https://github.com/ksvladimir)). * Allow comparison with constant strings by implicit conversions when analysing index conditions on other types. This may close [#11630](https://github.com/ClickHouse/ClickHouse/issues/11630). [#11648](https://github.com/ClickHouse/ClickHouse/pull/11648) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow comparison of numbers with constant string in comparison operators, IN and VALUES sections. [#11647](https://github.com/ClickHouse/ClickHouse/pull/11647) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add round_robin load_balancing. [#11645](https://github.com/ClickHouse/ClickHouse/pull/11645) ([Azat Khuzhin](https://github.com/azat)). * https://github.com/ClickHouse/ClickHouse/pull/7572#issuecomment-642815377 Support config default HTTPHandlers. [#11628](https://github.com/ClickHouse/ClickHouse/pull/11628) ([Winter Zhang](https://github.com/zhang2014)). * Make more input format work with Kafka engine. Fix the issue with premature flushes. Fix the performance issue when `kafka_num_consumers` is greater than number of partitions in topic. [#11599](https://github.com/ClickHouse/ClickHouse/pull/11599) ([filimonov](https://github.com/filimonov)). * Improve `multiple_joins_rewriter_version=2` logic. Fix unknown columns error for lambda aliases. [#11587](https://github.com/ClickHouse/ClickHouse/pull/11587) ([Artem Zuikov](https://github.com/4ertus2)). @@ -218,7 +220,6 @@ * Support kafka_client_id parameter for Kafka tables. It also changes the default `client.id` used by ClickHouse when communicating with Kafka to be more verbose and usable. [#11252](https://github.com/ClickHouse/ClickHouse/pull/11252) ([filimonov](https://github.com/filimonov)). * Keep the value of `DistributedFilesToInsert` metric on exceptions. In previous versions, the value was set when we are going to send some files, but it is zero, if there was an exception and some files are still pending. Now it corresponds to the number of pending files in filesystem. [#11220](https://github.com/ClickHouse/ClickHouse/pull/11220) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Add support for multi-word data type names (such as `DOUBLE PRECISION` and `CHAR VARYING`) for better SQL compatibility. [#11214](https://github.com/ClickHouse/ClickHouse/pull/11214) ([Павел Потемкин](https://github.com/Potya)). -* When parsing C-style backslash escapes in string literals, VALUES and various text formats (this is an extension to SQL standard that is endemic for ClickHouse and MySQL), keep backslash if unknown escape sequence is found (e.g. `\%` or `\w`) that will make usage of `LIKE` and `match` regular expressions more convenient (it's enough to write `name LIKE 'used\_cars'` instead of `name LIKE 'used\\_cars'`) and more compatible at the same time. This fixes [#10922](https://github.com/ClickHouse/ClickHouse/issues/10922). [#11208](https://github.com/ClickHouse/ClickHouse/pull/11208) ([alexey-milovidov](https://github.com/alexey-milovidov)). * The query log is now enabled by default. [#11184](https://github.com/ClickHouse/ClickHouse/pull/11184) ([Ivan Blinkov](https://github.com/blinkov)). * Resolved [#7224](https://github.com/ClickHouse/ClickHouse/issues/7224): added `FailedQuery`, `FailedSelectQuery` and `FailedInsertQuery` metrics to `system.events` table. [#11151](https://github.com/ClickHouse/ClickHouse/pull/11151) ([Nikita Orlov](https://github.com/naorlov)). * Add port() function (to extract port from URL). [#11120](https://github.com/ClickHouse/ClickHouse/pull/11120) ([Azat Khuzhin](https://github.com/azat)). @@ -241,11 +242,10 @@ * Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Change HTTP response code in case of some parse errors to 400 Bad Request. This fix [#10636](https://github.com/ClickHouse/ClickHouse/issues/10636). [#10640](https://github.com/ClickHouse/ClickHouse/pull/10640) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Print a message if clickhouse-client is newer than clickhouse-server. [#10627](https://github.com/ClickHouse/ClickHouse/pull/10627) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make pointInPolygon work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)) [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Make `pointInPolygon` work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)) [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). * Added `move_ttl_info` to `system.parts` in order to provide introspection of move TTL functionality. [#10591](https://github.com/ClickHouse/ClickHouse/pull/10591) ([Vladimir Chebotarev](https://github.com/excitoon)). * Possibility to work with S3 through proxies. [#10576](https://github.com/ClickHouse/ClickHouse/pull/10576) ([Pavel Kovalenko](https://github.com/Jokser)). * Adding support for `INSERT INTO [db.]table WATCH` query. [#10498](https://github.com/ClickHouse/ClickHouse/pull/10498) ([vzakaznikov](https://github.com/vzakaznikov)). -* Added system tables for users, roles, grants, settings profiles, quotas, row policies; added commands SHOW USER, SHOW [CURRENT|ENABLED] ROLES, SHOW SETTINGS PROFILES. [#10387](https://github.com/ClickHouse/ClickHouse/pull/10387) ([Vitaly Baranov](https://github.com/vitlibar)). * Allow to pass quota_key in clickhouse-client. This closes [#10227](https://github.com/ClickHouse/ClickHouse/issues/10227). [#10270](https://github.com/ClickHouse/ClickHouse/pull/10270) ([alexey-milovidov](https://github.com/alexey-milovidov)). #### Performance Improvement From 7997da377c0c0c68a406a05ce866edc4dd283678 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 01:09:51 +0300 Subject: [PATCH 262/330] Cleanup changelog (half done). Now it is more acceptable #12104 --- CHANGELOG.md | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd9f715197b..187a29d3940 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ * The setting `input_format_with_names_use_header` is enabled by default. It will affect parsing of input formats `-WithNames` and `-WithNamesAndTypes`. [#10937](https://github.com/ClickHouse/ClickHouse/pull/10937) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove `experimental_use_processors` setting. It is enabled by default. [#10924](https://github.com/ClickHouse/ClickHouse/pull/10924) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Update `zstd` to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. This change is backward compatible but we list it here in changelog in case you will wonder about these messages. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)). #### New Feature @@ -29,6 +30,23 @@ * Add 2 more virtual columns for engine=Kafka to access message headers. [#11283](https://github.com/ClickHouse/ClickHouse/pull/11283) ([filimonov](https://github.com/filimonov)). * Add `_timestamp_ms` virtual column for Kafka engine (type is `Nullable(DateTime64(3))`). [#11260](https://github.com/ClickHouse/ClickHouse/pull/11260) ([filimonov](https://github.com/filimonov)). * Add function `fuzzBits` that randomly flips bits in a string with given probability. [#11237](https://github.com/ClickHouse/ClickHouse/pull/11237) ([Andrei Nekrashevich](https://github.com/xolm)). +* Allow comparison of numbers with constant string in comparison operators, IN and VALUES sections. [#11647](https://github.com/ClickHouse/ClickHouse/pull/11647) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add `round_robin` load_balancing mode. [#11645](https://github.com/ClickHouse/ClickHouse/pull/11645) ([Azat Khuzhin](https://github.com/azat)). +* Add `cast_keep_nullable` setting. If set `CAST(something_nullable AS Type)` return `Nullable(Type)`. [#11733](https://github.com/ClickHouse/ClickHouse/pull/11733) ([Artem Zuikov](https://github.com/4ertus2)). +* Added column `position` to `system.columns` table and `column_position` to `system.parts_columns` table. It contains ordinal position of a column in a table starting with 1. This closes [#7744](https://github.com/ClickHouse/ClickHouse/issues/7744). [#11655](https://github.com/ClickHouse/ClickHouse/pull/11655) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* ON CLUSTER support for SYSTEM {FLUSH DISTRIBUTED,STOP/START DISTRIBUTED SEND}. [#11415](https://github.com/ClickHouse/ClickHouse/pull/11415) ([Azat Khuzhin](https://github.com/azat)). +* Add system.distribution_queue table. [#11394](https://github.com/ClickHouse/ClickHouse/pull/11394) ([Azat Khuzhin](https://github.com/azat)). +* Support for all format settings in Kafka, expose some setting on table level, adjust the defaults for better performance. [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388) ([filimonov](https://github.com/filimonov)). +* Add `port` function (to extract port from URL). [#11120](https://github.com/ClickHouse/ClickHouse/pull/11120) ([Azat Khuzhin](https://github.com/azat)). +* Now `dictGet*` functions accept table names. [#11050](https://github.com/ClickHouse/ClickHouse/pull/11050) ([Vitaly Baranov](https://github.com/vitlibar)). +* The `clickhouse-format` tool is now able to format multiple queries when the `-n` argument is used. [#10852](https://github.com/ClickHouse/ClickHouse/pull/10852) ([Darío](https://github.com/dgrr)). +* Possibility to configure proxy-resolver for DiskS3. [#10744](https://github.com/ClickHouse/ClickHouse/pull/10744) ([Pavel Kovalenko](https://github.com/Jokser)). +* Make `pointInPolygon` work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)) [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). +* Added `move_ttl_info` to `system.parts` in order to provide introspection of move TTL functionality. [#10591](https://github.com/ClickHouse/ClickHouse/pull/10591) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Possibility to work with S3 through proxies. [#10576](https://github.com/ClickHouse/ClickHouse/pull/10576) ([Pavel Kovalenko](https://github.com/Jokser)). +* Add `NCHAR` and `NVARCHAR` synonims for data types. [#11025](https://github.com/ClickHouse/ClickHouse/pull/11025) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Resolved [#7224](https://github.com/ClickHouse/ClickHouse/issues/7224): added `FailedQuery`, `FailedSelectQuery` and `FailedInsertQuery` metrics to `system.events` table. [#11151](https://github.com/ClickHouse/ClickHouse/pull/11151) ([Nikita Orlov](https://github.com/naorlov)). +* Add more `jemalloc` statistics to `system.asynchronous_metrics`, and ensure that we see up-to-date values for them. [#11748](https://github.com/ClickHouse/ClickHouse/pull/11748) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Allow to specify default S3 credentials and custom auth headers. [#11134](https://github.com/ClickHouse/ClickHouse/pull/11134) ([Grigory Pervakov](https://github.com/GrigoryPervakov)). * Added new functions to import/export DateTime64 as Int64 with various precision: `to-/fromUnixTimestamp64Milli/-Micro/-Nano`. [#10923](https://github.com/ClickHouse/ClickHouse/pull/10923) ([Vasily Nemkov](https://github.com/Enmk)). * Allow specifying `mongodb://` URI for MongoDB dictionaries. [#10915](https://github.com/ClickHouse/ClickHouse/pull/10915) ([Alexander Kuzmenkov](https://github.com/akuzm)). @@ -178,14 +196,10 @@ * When parsing C-style backslash escapes in string literals, VALUES and various text formats (this is an extension to SQL standard that is endemic for ClickHouse and MySQL), keep backslash if unknown escape sequence is found (e.g. `\%` or `\w`) that will make usage of `LIKE` and `match` regular expressions more convenient (it's enough to write `name LIKE 'used\_cars'` instead of `name LIKE 'used\\_cars'`) and more compatible at the same time. This fixes [#10922](https://github.com/ClickHouse/ClickHouse/issues/10922). [#11208](https://github.com/ClickHouse/ClickHouse/pull/11208) ([alexey-milovidov](https://github.com/alexey-milovidov)). * When reading Decimal value, cut extra digits after point. This behaviour is more compatible with MySQL and PostgreSQL. This fixes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11831](https://github.com/ClickHouse/ClickHouse/pull/11831) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Allow to DROP replicated table if the metadata in ZooKeeper was already removed and does not exist (this is also the case when using TestKeeper for testing and the server was restarted). Allow to RENAME replicated table even if there is an error communicating with ZooKeeper. This fixes [#10720](https://github.com/ClickHouse/ClickHouse/issues/10720). [#11652](https://github.com/ClickHouse/ClickHouse/pull/11652) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow comparison of numbers with constant string in comparison operators, IN and VALUES sections. [#11647](https://github.com/ClickHouse/ClickHouse/pull/11647) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add `round_robin` load_balancing. [#11645](https://github.com/ClickHouse/ClickHouse/pull/11645) ([Azat Khuzhin](https://github.com/azat)). * Slightly improve diagnostic of reading decimal from string. This closes [#10202](https://github.com/ClickHouse/ClickHouse/issues/10202). [#11829](https://github.com/ClickHouse/ClickHouse/pull/11829) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix sleep invocation in signal handler. It was sleeping for less amount of time than expected. [#11825](https://github.com/ClickHouse/ClickHouse/pull/11825) ([alexey-milovidov](https://github.com/alexey-milovidov)). * (Only Linux) OS related performance metrics (for CPU and I/O) will work even without `CAP_NET_ADMIN` capability. [#10544](https://github.com/ClickHouse/ClickHouse/pull/10544) ([Alexander Kazakov](https://github.com/Akazz)). * Added `hostname` as an alias to function `hostName`. This feature was suggested by Victor Tarnavskiy from Yandex.Metrica. [#11821](https://github.com/ClickHouse/ClickHouse/pull/11821) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add more `jemalloc` statistics to `system.asynchronous_metrics`, and ensure that we see up-to-date values for them. [#11748](https://github.com/ClickHouse/ClickHouse/pull/11748) ([Alexander Kuzmenkov](https://github.com/akuzm)). -* Add `cast_keep_nullable` setting. If set `CAST(something_nullable AS Type)` return `Nullable(Type)`. [#11733](https://github.com/ClickHouse/ClickHouse/pull/11733) ([Artem Zuikov](https://github.com/4ertus2)). * Added support for distributed `DDL` (update/delete/drop partition) on cross replication clusters. [#11703](https://github.com/ClickHouse/ClickHouse/pull/11703) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Emit warning instead of error in server log at startup if we cannot listen one of the listen addresses (e.g. IPv6 is unavailable inside Docker). Note that if server fails to listen all listed addresses, it will refuse to startup as before. This fixes [#4406](https://github.com/ClickHouse/ClickHouse/issues/4406). [#11687](https://github.com/ClickHouse/ClickHouse/pull/11687) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Default user and database creation on docker image starting. [#10637](https://github.com/ClickHouse/ClickHouse/pull/10637) ([Paramtamtam](https://github.com/tarampampam)). @@ -193,22 +207,17 @@ * Multiple names are now allowed in commands: CREATE USER, CREATE ROLE, ALTER USER, SHOW CREATE USER, SHOW GRANTS and so on. [#11670](https://github.com/ClickHouse/ClickHouse/pull/11670) ([Vitaly Baranov](https://github.com/vitlibar)). * Clear password from command line in `clickhouse-client` and `clickhouse-benchmark` if the user has specified it with explicit value. This prevents password exposure by `ps` and similar tools. [#11665](https://github.com/ClickHouse/ClickHouse/pull/11665) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Don't use debug info from ELF file if it doesn't correspond to the running binary. It is needed to avoid printing wrong function names and source locations in stack traces. This fixes [#7514](https://github.com/ClickHouse/ClickHouse/issues/7514). [#11657](https://github.com/ClickHouse/ClickHouse/pull/11657) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Added column `position` to `system.columns` table and `column_position` to `system.parts_columns` table. It contains ordinal position of a column in a table starting with 1. This closes [#7744](https://github.com/ClickHouse/ClickHouse/issues/7744). [#11655](https://github.com/ClickHouse/ClickHouse/pull/11655) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Skip empty parameters in requested URL. They may appear when you write `http://localhost:8123/?&a=b` or `http://localhost:8123/?a=b&&c=d`. This closes [#10749](https://github.com/ClickHouse/ClickHouse/issues/10749). [#11651](https://github.com/ClickHouse/ClickHouse/pull/11651) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Allow using `groupArrayArray` and `groupUniqArrayArray` as `SimpleAggregateFunction`. [#11650](https://github.com/ClickHouse/ClickHouse/pull/11650) ([Volodymyr Kuznetsov](https://github.com/ksvladimir)). * Allow comparison with constant strings by implicit conversions when analysing index conditions on other types. This may close [#11630](https://github.com/ClickHouse/ClickHouse/issues/11630). [#11648](https://github.com/ClickHouse/ClickHouse/pull/11648) ([alexey-milovidov](https://github.com/alexey-milovidov)). * https://github.com/ClickHouse/ClickHouse/pull/7572#issuecomment-642815377 Support config default HTTPHandlers. [#11628](https://github.com/ClickHouse/ClickHouse/pull/11628) ([Winter Zhang](https://github.com/zhang2014)). -* Make more input format work with Kafka engine. Fix the issue with premature flushes. Fix the performance issue when `kafka_num_consumers` is greater than number of partitions in topic. [#11599](https://github.com/ClickHouse/ClickHouse/pull/11599) ([filimonov](https://github.com/filimonov)). +* Make more input formats to work with Kafka engine. Fix the issue with premature flushes. Fix the performance issue when `kafka_num_consumers` is greater than number of partitions in topic. [#11599](https://github.com/ClickHouse/ClickHouse/pull/11599) ([filimonov](https://github.com/filimonov)). * Improve `multiple_joins_rewriter_version=2` logic. Fix unknown columns error for lambda aliases. [#11587](https://github.com/ClickHouse/ClickHouse/pull/11587) ([Artem Zuikov](https://github.com/4ertus2)). -* Optimize memory usage when reading a response from an S3 HTTP client. [#11561](https://github.com/ClickHouse/ClickHouse/pull/11561) ([Pavel Kovalenko](https://github.com/Jokser)). * Better exception message when cannot parse columns declaration list. This closes [#10403](https://github.com/ClickHouse/ClickHouse/issues/10403). [#11537](https://github.com/ClickHouse/ClickHouse/pull/11537) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improve `enable_optimize_predicate_expression=1` logic for VIEW. [#11513](https://github.com/ClickHouse/ClickHouse/pull/11513) ([Artem Zuikov](https://github.com/4ertus2)). * Adding support for PREWHERE in live view tables. [#11495](https://github.com/ClickHouse/ClickHouse/pull/11495) ([vzakaznikov](https://github.com/vzakaznikov)). * Automatically update DNS cache, which is used to check if user is allowed to connect from an address. [#11487](https://github.com/ClickHouse/ClickHouse/pull/11487) ([tavplubix](https://github.com/tavplubix)). -* ON CLUSTER support for SYSTEM {FLUSH DISTRIBUTED,STOP/START DISTRIBUTED SEND}. [#11415](https://github.com/ClickHouse/ClickHouse/pull/11415) ([Azat Khuzhin](https://github.com/azat)). -* Add system.distribution_queue table. [#11394](https://github.com/ClickHouse/ClickHouse/pull/11394) ([Azat Khuzhin](https://github.com/azat)). -* Support for all format settings in Kafka, expose some setting on table level, adjust the defaults for better performance. [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388) ([filimonov](https://github.com/filimonov)). * OPTIMIZE FINAL will force merge even if concurrent merges are performed. This closes [#11309](https://github.com/ClickHouse/ClickHouse/issues/11309) and closes [#11322](https://github.com/ClickHouse/ClickHouse/issues/11322). [#11346](https://github.com/ClickHouse/ClickHouse/pull/11346) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Suppress output of cancelled queries in clickhouse-client. In previous versions result may continue to print in terminal even after you press Ctrl+C to cancel query. This closes [#9473](https://github.com/ClickHouse/ClickHouse/issues/9473). [#11342](https://github.com/ClickHouse/ClickHouse/pull/11342) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Better log messages in while reloading configuration. [#11341](https://github.com/ClickHouse/ClickHouse/pull/11341) ([alexey-milovidov](https://github.com/alexey-milovidov)). @@ -216,35 +225,22 @@ * Add setting "output_format_pretty_max_value_width". If value is longer, it will be cut to avoid output of too large values in terminal. This closes [#11140](https://github.com/ClickHouse/ClickHouse/issues/11140). [#11324](https://github.com/ClickHouse/ClickHouse/pull/11324) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Better exception message in case when there is shortage of memory mappings. This closes [#11027](https://github.com/ClickHouse/ClickHouse/issues/11027). [#11316](https://github.com/ClickHouse/ClickHouse/pull/11316) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Support (U)Int8, (U)Int16, Date in ASOF JOIN. [#11301](https://github.com/ClickHouse/ClickHouse/pull/11301) ([Artem Zuikov](https://github.com/4ertus2)). -* Update librdkafka to version [1.4.2](https://github.com/edenhill/librdkafka/releases/tag/v1.4.2). [#11256](https://github.com/ClickHouse/ClickHouse/pull/11256) ([filimonov](https://github.com/filimonov)). * Support kafka_client_id parameter for Kafka tables. It also changes the default `client.id` used by ClickHouse when communicating with Kafka to be more verbose and usable. [#11252](https://github.com/ClickHouse/ClickHouse/pull/11252) ([filimonov](https://github.com/filimonov)). * Keep the value of `DistributedFilesToInsert` metric on exceptions. In previous versions, the value was set when we are going to send some files, but it is zero, if there was an exception and some files are still pending. Now it corresponds to the number of pending files in filesystem. [#11220](https://github.com/ClickHouse/ClickHouse/pull/11220) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Add support for multi-word data type names (such as `DOUBLE PRECISION` and `CHAR VARYING`) for better SQL compatibility. [#11214](https://github.com/ClickHouse/ClickHouse/pull/11214) ([Павел Потемкин](https://github.com/Potya)). * The query log is now enabled by default. [#11184](https://github.com/ClickHouse/ClickHouse/pull/11184) ([Ivan Blinkov](https://github.com/blinkov)). -* Resolved [#7224](https://github.com/ClickHouse/ClickHouse/issues/7224): added `FailedQuery`, `FailedSelectQuery` and `FailedInsertQuery` metrics to `system.events` table. [#11151](https://github.com/ClickHouse/ClickHouse/pull/11151) ([Nikita Orlov](https://github.com/naorlov)). -* Add port() function (to extract port from URL). [#11120](https://github.com/ClickHouse/ClickHouse/pull/11120) ([Azat Khuzhin](https://github.com/azat)). -* Enable percpu_arena:percpu for jemalloc (This will reduce memory fragmentation due to thread pool). [#11084](https://github.com/ClickHouse/ClickHouse/pull/11084) ([Azat Khuzhin](https://github.com/azat)). * Show authentication type in table system.users and while executing SHOW CREATE USER query. [#11080](https://github.com/ClickHouse/ClickHouse/pull/11080) ([Vitaly Baranov](https://github.com/vitlibar)). -* Now `dictGet*` functions accept table names. [#11050](https://github.com/ClickHouse/ClickHouse/pull/11050) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add `NCHAR` and `NVARCHAR` synonims for data types. [#11025](https://github.com/ClickHouse/ClickHouse/pull/11025) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove data on explicit `DROP DATABASE` for `Memory` database engine. Fixes [#10557](https://github.com/ClickHouse/ClickHouse/issues/10557). [#11021](https://github.com/ClickHouse/ClickHouse/pull/11021) ([tavplubix](https://github.com/tavplubix)). * Set thread names for internal threads of rdkafka library. Make logs from rdkafka available in server logs. [#10983](https://github.com/ClickHouse/ClickHouse/pull/10983) ([Azat Khuzhin](https://github.com/azat)). -* Some additions and cleanup for [#10232](https://github.com/ClickHouse/ClickHouse/issues/10232). [#10934](https://github.com/ClickHouse/ClickHouse/pull/10934) ([Artem Zuikov](https://github.com/4ertus2)). * Support for unicode whitespaces in queries. This helps when queries are copy-pasted from Word or from web page. This fixes [#10896](https://github.com/ClickHouse/ClickHouse/issues/10896). [#10903](https://github.com/ClickHouse/ClickHouse/pull/10903) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Allow large UInt types as the index in function `tupleElement`. [#10874](https://github.com/ClickHouse/ClickHouse/pull/10874) ([hcz](https://github.com/hczhcz)). * Respect prefer_localhost_replica/load_balancing on INSERT into Distributed. [#10867](https://github.com/ClickHouse/ClickHouse/pull/10867) ([Azat Khuzhin](https://github.com/azat)). -* Introduce `min_insert_block_size_rows_for_materialized_views ` , `min_insert_block_size_bytes_for_materialized_views` settings. This settings are similar to `min_insert_block_size_rows` and `min_insert_block_size_bytes`, but applied only for blocks inserted into `MATERIALIZED VIEW`. It helps to control blocks squashing while pushing to MVs and avoid excessive memory usage. [#10858](https://github.com/ClickHouse/ClickHouse/pull/10858) ([Azat Khuzhin](https://github.com/azat)). -* The `clickhouse-format` tool is now able to format multiple queries when the `-n` argument is used. [#10852](https://github.com/ClickHouse/ClickHouse/pull/10852) ([Darío](https://github.com/dgrr)). +* Introduce `min_insert_block_size_rows_for_materialized_views`, `min_insert_block_size_bytes_for_materialized_views` settings. This settings are similar to `min_insert_block_size_rows` and `min_insert_block_size_bytes`, but applied only for blocks inserted into `MATERIALIZED VIEW`. It helps to control blocks squashing while pushing to MVs and avoid excessive memory usage. [#10858](https://github.com/ClickHouse/ClickHouse/pull/10858) ([Azat Khuzhin](https://github.com/azat)). * Get rid of exception from replicated queue during server shutdown. Fixes [#10819](https://github.com/ClickHouse/ClickHouse/issues/10819). [#10841](https://github.com/ClickHouse/ClickHouse/pull/10841) ([alesapin](https://github.com/alesapin)). * Ensure that `varSamp`, `varPop` cannot return negative results due to numerical errors and that `stddevSamp`, `stddevPop` cannot be calculated from negative variance. This fixes [#10532](https://github.com/ClickHouse/ClickHouse/issues/10532). [#10829](https://github.com/ClickHouse/ClickHouse/pull/10829) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Better DNS exception message. This fixes [#10813](https://github.com/ClickHouse/ClickHouse/issues/10813). [#10828](https://github.com/ClickHouse/ClickHouse/pull/10828) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Possibility to configure proxy-resolver for DiskS3. [#10744](https://github.com/ClickHouse/ClickHouse/pull/10744) ([Pavel Kovalenko](https://github.com/Jokser)). -* Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Change HTTP response code in case of some parse errors to 400 Bad Request. This fix [#10636](https://github.com/ClickHouse/ClickHouse/issues/10636). [#10640](https://github.com/ClickHouse/ClickHouse/pull/10640) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Print a message if clickhouse-client is newer than clickhouse-server. [#10627](https://github.com/ClickHouse/ClickHouse/pull/10627) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make `pointInPolygon` work with non-constant polygon. PointInPolygon now can take Array(Array(Tuple(..., ...))) as second argument, array of polygon and holes. [#10623](https://github.com/ClickHouse/ClickHouse/pull/10623) ([Alexey Ilyukhov](https://github.com/livace)) [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)). -* Added `move_ttl_info` to `system.parts` in order to provide introspection of move TTL functionality. [#10591](https://github.com/ClickHouse/ClickHouse/pull/10591) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Possibility to work with S3 through proxies. [#10576](https://github.com/ClickHouse/ClickHouse/pull/10576) ([Pavel Kovalenko](https://github.com/Jokser)). * Adding support for `INSERT INTO [db.]table WATCH` query. [#10498](https://github.com/ClickHouse/ClickHouse/pull/10498) ([vzakaznikov](https://github.com/vzakaznikov)). * Allow to pass quota_key in clickhouse-client. This closes [#10227](https://github.com/ClickHouse/ClickHouse/issues/10227). [#10270](https://github.com/ClickHouse/ClickHouse/pull/10270) ([alexey-milovidov](https://github.com/alexey-milovidov)). @@ -267,6 +263,8 @@ * New optimization that takes arithmetic operations out of aggregate functions, enabled with `optimize_arithmetic_operations_in_aggregate_functions` [#10047](https://github.com/ClickHouse/ClickHouse/pull/10047) ([Ruslan](https://github.com/kamalov-ruslan)). * Use HTTP client for S3 based on Poco instead of curl. This will improve performance and lower memory usage of s3 storage and table functions. [#11230](https://github.com/ClickHouse/ClickHouse/pull/11230) ([Pavel Kovalenko](https://github.com/Jokser)). * Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). +* Enable percpu_arena:percpu for jemalloc (This will reduce memory fragmentation due to thread pool). [#11084](https://github.com/ClickHouse/ClickHouse/pull/11084) ([Azat Khuzhin](https://github.com/azat)). +* Optimize memory usage when reading a response from an S3 HTTP client. [#11561](https://github.com/ClickHouse/ClickHouse/pull/11561) ([Pavel Kovalenko](https://github.com/Jokser)). #### Experimental Feature @@ -288,6 +286,7 @@ * Speed up build by removing old example programs. Also found some orphan functional test. [#11486](https://github.com/ClickHouse/ClickHouse/pull/11486) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Increase ccache size for builds in CI. [#11450](https://github.com/ClickHouse/ClickHouse/pull/11450) ([alesapin](https://github.com/alesapin)). * Leave only unit_tests_dbms in deb build. [#11429](https://github.com/ClickHouse/ClickHouse/pull/11429) ([Ilya Yatsishin](https://github.com/qoega)). +* Update librdkafka to version [1.4.2](https://github.com/edenhill/librdkafka/releases/tag/v1.4.2). [#11256](https://github.com/ClickHouse/ClickHouse/pull/11256) ([filimonov](https://github.com/filimonov)). * Refactor CMake build files. [#11390](https://github.com/ClickHouse/ClickHouse/pull/11390) ([Ivan](https://github.com/abyss7)). * Fix several flaky integration tests. [#11355](https://github.com/ClickHouse/ClickHouse/pull/11355) ([alesapin](https://github.com/alesapin)). * Add support for unit tests run with UBSan. [#11345](https://github.com/ClickHouse/ClickHouse/pull/11345) ([alexey-milovidov](https://github.com/alexey-milovidov)). From 89497a08fe4e1dffd9069771ece161e078d244fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 01:12:23 +0300 Subject: [PATCH 263/330] Cleanup changelog (half done). Additions requested by @filimonov #12104 --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 187a29d3940..5211da4203b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ * Remove `experimental_use_processors` setting. It is enabled by default. [#10924](https://github.com/ClickHouse/ClickHouse/pull/10924) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Update `zstd` to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. This change is backward compatible but we list it here in changelog in case you will wonder about these messages. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Several Kafka setting changes their defaults. See [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388). #### New Feature @@ -265,6 +266,7 @@ * Fix Kafka performance issue related to reschedules based on limits, which were always applied. [#11149](https://github.com/ClickHouse/ClickHouse/pull/11149) ([filimonov](https://github.com/filimonov)). * Enable percpu_arena:percpu for jemalloc (This will reduce memory fragmentation due to thread pool). [#11084](https://github.com/ClickHouse/ClickHouse/pull/11084) ([Azat Khuzhin](https://github.com/azat)). * Optimize memory usage when reading a response from an S3 HTTP client. [#11561](https://github.com/ClickHouse/ClickHouse/pull/11561) ([Pavel Kovalenko](https://github.com/Jokser)). +* Adjust the default Kafka settings for better performance. [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388) ([filimonov](https://github.com/filimonov)). #### Experimental Feature From 5eb60d5bd381124e36f2a03eb95980a70beaf671 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 01:16:32 +0300 Subject: [PATCH 264/330] Remove underscore as word-break character. This partially reverts #11975 --- base/common/LineReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/common/LineReader.h b/base/common/LineReader.h index 6f6e8176c9e..77dc70d8808 100644 --- a/base/common/LineReader.h +++ b/base/common/LineReader.h @@ -48,7 +48,7 @@ protected: }; const String history_file_path; - static constexpr char word_break_characters[] = " \t\v\f\a\b\r\n`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?_"; + static constexpr char word_break_characters[] = " \t\v\f\a\b\r\n`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?"; String input; From 05ee63b7f5cadf6d5aed5404d769d3fc6424a077 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 01:18:20 +0300 Subject: [PATCH 265/330] Fix bad test number --- ...ear_of_ISO8601_week_modificators_for_formatDateTime.reference} | 0 ...1362_year_of_ISO8601_week_modificators_for_formatDateTime.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{04659_year_of_ISO8601_week_modificators_for_formatDateTime.reference => 01362_year_of_ISO8601_week_modificators_for_formatDateTime.reference} (100%) rename tests/queries/0_stateless/{04659_year_of_ISO8601_week_modificators_for_formatDateTime.sql => 01362_year_of_ISO8601_week_modificators_for_formatDateTime.sql} (100%) diff --git a/tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.reference b/tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.reference similarity index 100% rename from tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.reference rename to tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.reference diff --git a/tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.sql b/tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.sql similarity index 100% rename from tests/queries/0_stateless/04659_year_of_ISO8601_week_modificators_for_formatDateTime.sql rename to tests/queries/0_stateless/01362_year_of_ISO8601_week_modificators_for_formatDateTime.sql From f2672233d993813475d3ea79966145aaa47dca1d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 01:45:28 +0300 Subject: [PATCH 266/330] Added a test --- ...tocomplete_word_break_characters.reference | 0 ...ient_autocomplete_word_break_characters.sh | 29 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.reference create mode 100755 tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.sh diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.reference b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.sh b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.sh new file mode 100755 index 00000000000..0e895e55cd0 --- /dev/null +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.sh @@ -0,0 +1,29 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 1 +match_max 100000 + +spawn clickhouse-client +expect ":) " + +# Make a query +send -- "SET max_distributed" +expect "SET max_distributed" + +# Wait for suggestions to load, they are loaded in background +set is_done 0 +while {$is_done == 0} { + send -- "\t" + expect { + "_connections" { + set is_done 1 + } + default { + sleep 1 + } + } +} + +send -- "\3\4" +expect eof From 3ae9d3c9f3e3920492db28751b2c8cd418887d36 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 02:13:29 +0300 Subject: [PATCH 267/330] Change exception code from LOGICAL_ERROR to BAD_ARGUMENTS when the name of remote table is empty --- src/TableFunctions/TableFunctionRemote.cpp | 3 +++ .../01372_remote_table_function_empty_table.reference | 0 .../0_stateless/01372_remote_table_function_empty_table.sql | 1 + 3 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/01372_remote_table_function_empty_table.reference create mode 100644 tests/queries/0_stateless/01372_remote_table_function_empty_table.sql diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index cfeb3907136..b79b22517c5 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -186,6 +186,9 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C secure); } + if (remote_table.empty()) + throw Exception("The name of remote table cannot be empty", ErrorCodes::BAD_ARGUMENTS); + auto remote_table_id = StorageID::createEmpty(); remote_table_id.database_name = remote_database; remote_table_id.table_name = remote_table; diff --git a/tests/queries/0_stateless/01372_remote_table_function_empty_table.reference b/tests/queries/0_stateless/01372_remote_table_function_empty_table.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql b/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql new file mode 100644 index 00000000000..698c323d73f --- /dev/null +++ b/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql @@ -0,0 +1 @@ +SELECT * FROM remote('127..2', 'a.'); -- { serverError 36 } From e2e92afd981b0c3af79fd7d9f70e653dd86f2065 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 6 Jul 2020 02:17:46 +0300 Subject: [PATCH 268/330] fix segfault with -StateResample combinators --- src/Interpreters/Aggregator.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 1b8439fc704..39fcb382e57 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1181,15 +1181,17 @@ Block Aggregator::prepareBlockAndFill( if (aggregate_functions[i]->isState()) { /// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states. - ColumnAggregateFunction * column_aggregate_func = nullptr; - /// Aggregate state can be wrapped into array if aggregate function ends with -Resample combinator. - if (auto * column_array = typeid_cast(final_aggregate_columns[i].get())) - column_aggregate_func = &assert_cast(column_array->getData()); - else - column_aggregate_func = &assert_cast(*final_aggregate_columns[i]); + if (auto * column_aggregate_func = typeid_cast(final_aggregate_columns[i].get())) + for (auto & pool : data_variants.aggregates_pools) + column_aggregate_func->addArena(pool); - for (auto & pool : data_variants.aggregates_pools) - column_aggregate_func->addArena(pool); + /// Aggregate state can be wrapped into array if aggregate function ends with -Resample combinator. + final_aggregate_columns[i]->forEachSubcolumn([&data_variants](auto & subcolumn) + { + if (auto * column_aggregate_func = typeid_cast(subcolumn.get())) + for (auto & pool : data_variants.aggregates_pools) + column_aggregate_func->addArena(pool); + }); } } } From 7f3a8f3eb94b4cc36583b412bcf9b5a96390ce0c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 02:50:20 +0300 Subject: [PATCH 269/330] Fix bad code in redundant ORDER BY optimization #10067 --- .../AggregateFunctionCount.cpp | 3 +- .../AggregateFunctionFactory.cpp | 46 +++++++++++++++++++ .../AggregateFunctionFactory.h | 6 +++ .../AggregateFunctionGroupArray.cpp | 6 ++- .../AggregateFunctionGroupArrayMoving.cpp | 6 ++- .../AggregateFunctionGroupArrayMoving.h | 4 +- .../AggregateFunctionGroupUniqArray.cpp | 4 +- .../AggregateFunctionMinMaxAny.cpp | 15 ++++-- .../AggregateFunctionTopK.cpp | 6 ++- .../AggregateFunctionUniq.cpp | 8 ++-- src/AggregateFunctions/IAggregateFunction.h | 5 ++ src/Interpreters/DuplicateOrderByVisitor.h | 6 ++- 12 files changed, 95 insertions(+), 20 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionCount.cpp b/src/AggregateFunctions/AggregateFunctionCount.cpp index b00adaa0f1a..6ea63bedaf0 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -28,7 +28,8 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons void registerAggregateFunctionCount(AggregateFunctionFactory & factory) { - factory.registerFunction("count", {createAggregateFunctionCount, {true}}, AggregateFunctionFactory::CaseInsensitive); + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false }; + factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 83221df784a..f7c6fe9da14 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -162,6 +162,52 @@ AggregateFunctionPtr AggregateFunctionFactory::tryGet( } +std::optional AggregateFunctionFactory::tryGetPropertiesImpl(const String & name_param, int recursion_level) const +{ + String name = getAliasToOrName(name_param); + Value found; + + /// Find by exact match. + if (auto it = aggregate_functions.find(name); it != aggregate_functions.end()) + { + found = it->second; + } + /// Find by case-insensitive name. + /// Combinators cannot apply for case insensitive (SQL-style) aggregate function names. Only for native names. + else if (recursion_level == 0) + { + if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) + found = jt->second; + } + + if (found.creator) + return found.properties; + + /// Combinators of aggregate functions. + /// For every aggregate function 'agg' and combiner '-Comb' there is combined aggregate function with name 'aggComb', + /// that can have different number and/or types of arguments, different result type and different behaviour. + + if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name)) + { + if (combinator->isForInternalUsageOnly()) + return {}; + + String nested_name = name.substr(0, name.size() - combinator->getName().size()); + + /// NOTE: It's reasonable to also allow to transform properties by combinator. + return tryGetPropertiesImpl(nested_name, recursion_level + 1); + } + + return {}; +} + + +std::optional AggregateFunctionFactory::tryGetProperties(const String & name) const +{ + return tryGetPropertiesImpl(name, 0); +} + + bool AggregateFunctionFactory::isAggregateFunctionName(const String & name, int recursion_level) const { if (aggregate_functions.count(name) || isAlias(name)) diff --git a/src/AggregateFunctions/AggregateFunctionFactory.h b/src/AggregateFunctions/AggregateFunctionFactory.h index 90e44145f4b..143e6562a30 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.h +++ b/src/AggregateFunctions/AggregateFunctionFactory.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -72,6 +73,9 @@ public: const Array & parameters, AggregateFunctionProperties & out_properties) const; + /// Get properties if the aggregate function exists. + std::optional tryGetProperties(const String & name) const; + bool isAggregateFunctionName(const String & name, int recursion_level = 0) const; private: @@ -83,6 +87,8 @@ private: bool has_null_arguments, int recursion_level) const; + std::optional tryGetPropertiesImpl(const String & name, int recursion_level) const; + private: using AggregateFunctions = std::unordered_map; diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 0c1cb1d0d36..61dbdeef16e 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -120,8 +120,10 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(const std::string & void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory) { - factory.registerFunction("groupArray", createAggregateFunctionGroupArray); - factory.registerFunction("groupArraySample", createAggregateFunctionGroupArraySample); + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties }); + factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp index 31a921c3b2c..f8084e3716f 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp @@ -95,8 +95,10 @@ AggregateFunctionPtr createAggregateFunctionMoving(const std::string & name, con void registerAggregateFunctionMoving(AggregateFunctionFactory & factory) { - factory.registerFunction("groupArrayMovingSum", createAggregateFunctionMoving); - factory.registerFunction("groupArrayMovingAvg", createAggregateFunctionMoving); + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("groupArrayMovingSum", { createAggregateFunctionMoving, properties }); + factory.registerFunction("groupArrayMovingAvg", { createAggregateFunctionMoving, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 19562b37a12..13895dea8d4 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -71,7 +71,6 @@ struct MovingAvgData void add(T val, Arena * arena) { sum += val; - value.push_back(sum, arena); } @@ -96,7 +95,8 @@ class MovingImpl final public: using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; - using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; // probably for overflow function in the future + // probably for overflow function in the future + using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; explicit MovingImpl(const DataTypePtr & data_type_, UInt64 win_size_ = std::numeric_limits::max()) : IAggregateFunctionDataHelper>({data_type_}, {}) diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index 552089bb58d..dd29a64819a 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -110,7 +110,9 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & n void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory & factory) { - factory.registerFunction("groupUniqArray", createAggregateFunctionGroupUniqArray); + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("groupUniqArray", { createAggregateFunctionGroupUniqArray, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp b/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp index 9358d361616..a98eaccdabd 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp @@ -49,13 +49,18 @@ AggregateFunctionPtr createAggregateFunctionArgMax(const std::string & name, con void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory) { - factory.registerFunction("any", createAggregateFunctionAny); - factory.registerFunction("anyLast", createAggregateFunctionAnyLast); - factory.registerFunction("anyHeavy", createAggregateFunctionAnyHeavy); factory.registerFunction("min", createAggregateFunctionMin, AggregateFunctionFactory::CaseInsensitive); factory.registerFunction("max", createAggregateFunctionMax, AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("argMin", createAggregateFunctionArgMin); - factory.registerFunction("argMax", createAggregateFunctionArgMax); + + /// The functions below depend on the order of data. + + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("any", { createAggregateFunctionAny, properties }); + factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties }); + factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties }); + factory.registerFunction("argMin", { createAggregateFunctionArgMin, properties }); + factory.registerFunction("argMax", { createAggregateFunctionArgMax, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 344ab340d62..a8cea5eb59b 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -117,8 +117,10 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const void registerAggregateFunctionTopK(AggregateFunctionFactory & factory) { - factory.registerFunction("topK", createAggregateFunctionTopK); - factory.registerFunction("topKWeighted", createAggregateFunctionTopK); + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("topK", { createAggregateFunctionTopK, properties }); + factory.registerFunction("topKWeighted", { createAggregateFunctionTopK, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index 40742ae336e..32fdb188529 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -122,14 +122,16 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory) { + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false }; + factory.registerFunction("uniq", - {createAggregateFunctionUniq, {true}}); + {createAggregateFunctionUniq, properties}); factory.registerFunction("uniqHLL12", - {createAggregateFunctionUniq, {true}}); + {createAggregateFunctionUniq, properties}); factory.registerFunction("uniqExact", - {createAggregateFunctionUniq>, {true}}); + {createAggregateFunctionUniq>, properties}); } } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 40d589f773d..eb9c560af98 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -289,6 +289,11 @@ struct AggregateFunctionProperties * or we should return non-Nullable type with default value (example: count, countDistinct). */ bool returns_default_when_only_null = false; + + /** Result varies depending on the data order (example: groupArray). + * Some may also name this property as "non-commutative". + */ + bool is_order_dependent = false; }; diff --git a/src/Interpreters/DuplicateOrderByVisitor.h b/src/Interpreters/DuplicateOrderByVisitor.h index 85f34377e54..72e0419f114 100644 --- a/src/Interpreters/DuplicateOrderByVisitor.h +++ b/src/Interpreters/DuplicateOrderByVisitor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -24,7 +25,9 @@ public: bool & is_stateful; void visit(ASTFunction & ast_function, ASTPtr &) { - if (ast_function.name == "any" || ast_function.name == "groupArray") + auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(ast_function.name); + + if (aggregate_function_properties && aggregate_function_properties->is_order_dependent) { is_stateful = true; return; @@ -85,7 +88,6 @@ public: if (done) return; - /// Disable optimization for distributed tables for (const auto & elem : select_query.children) { if (elem->as() && !elem->as()->is_standalone) From 752b0218dbec84de5d842012fa771bbbc89319e7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 03:00:48 +0300 Subject: [PATCH 270/330] Add a test --- .../01372_wrong_order_by_removal.reference | 1 + .../0_stateless/01372_wrong_order_by_removal.sql | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/01372_wrong_order_by_removal.reference create mode 100644 tests/queries/0_stateless/01372_wrong_order_by_removal.sql diff --git a/tests/queries/0_stateless/01372_wrong_order_by_removal.reference b/tests/queries/0_stateless/01372_wrong_order_by_removal.reference new file mode 100644 index 00000000000..c1b36bd53ca --- /dev/null +++ b/tests/queries/0_stateless/01372_wrong_order_by_removal.reference @@ -0,0 +1 @@ +SELECT \n k,\n groupArrayMovingSum(v)\nFROM \n(\n SELECT \n k,\n dt,\n v\n FROM moving_sum_num\n ORDER BY \n k ASC,\n dt ASC\n)\nGROUP BY k\nORDER BY k ASC diff --git a/tests/queries/0_stateless/01372_wrong_order_by_removal.sql b/tests/queries/0_stateless/01372_wrong_order_by_removal.sql new file mode 100644 index 00000000000..93f3388676b --- /dev/null +++ b/tests/queries/0_stateless/01372_wrong_order_by_removal.sql @@ -0,0 +1,11 @@ +CREATE TEMPORARY TABLE moving_sum_num +( + `k` String, + `dt` DateTime, + `v` UInt64 +); + +SET enable_debug_queries = 1; + +-- ORDER BY from subquery shall not be removed. +ANALYZE SELECT k, groupArrayMovingSum(v) FROM (SELECT * FROM moving_sum_num ORDER BY k, dt) GROUP BY k ORDER BY k; From 37ac45643947e2ecf085e0c840341e526a3b44f1 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Mon, 6 Jul 2020 09:02:02 +0800 Subject: [PATCH 271/330] Support KILL QUERY [connection_id] for MySQL --- src/Server/MySQLHandler.cpp | 30 ++++++++++++++++++++++++++++-- src/Server/MySQLHandler.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index a97182f15fc..6892ebbd31a 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -103,7 +103,8 @@ void MySQLHandler::run() { if (!handshake_response.database.empty()) connection_context.setCurrentDatabase(handshake_response.database); - connection_context.setCurrentQueryId(""); + connection_context.setCurrentQueryId(Poco::format("mysql:%lu", connection_id)); + } catch (const Exception & exc) { @@ -295,6 +296,12 @@ void MySQLHandler::comQuery(ReadBuffer & payload) replacement_query = boost::replace_all_copy(query, "SHOW TABLE STATUS LIKE ", show_table_status_replacement_query); } + if (0 == strncasecmp("KILL QUERY", query.c_str(), 10)) + { + should_replace = true; + replacement_query = kill_connection_id_replacement_query(query); + } + if (0 == strncasecmp("SHOW VARIABLES", query.c_str(), 13)) { should_replace = true; @@ -379,7 +386,7 @@ const String MySQLHandler::show_table_status_replacement_query("SELECT" " 'Dynamic' AS Row_format," " 0 AS Rows," " 0 AS Avg_row_length," - " 0 AS Data_length," + " 0 AS Data_length," " 0 AS Max_data_length," " 0 AS Index_length," " 0 AS Data_free," @@ -394,4 +401,23 @@ const String MySQLHandler::show_table_status_replacement_query("SELECT" " FROM system.tables" " WHERE name LIKE "); +String MySQLHandler::kill_connection_id_replacement_query(const String & query) +{ + const String s = "KILL QUERY "; + + if (query.size() > s.size()) + { + String process_id = query.data() + s.length(); + + static const std::regex expr{"^[0-9]"}; + if (std::regex_match(process_id, expr)) + { + String replacement = Poco::format("KILL QUERY WHERE query_id = 'mysql:%s'", process_id); + return replacement; + } + } + return query; } + +} + diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 5f506089493..41d4cc9a483 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -73,6 +73,7 @@ protected: private: static const String show_table_status_replacement_query; + String kill_connection_id_replacement_query(const String & query); }; #if USE_SSL From de4459c25a7654427635d7fe411e48f06d8bb717 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 04:25:45 +0300 Subject: [PATCH 272/330] Fix error --- src/TableFunctions/TableFunctionRemote.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index b79b22517c5..7fa3868d142 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -186,7 +186,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C secure); } - if (remote_table.empty()) + if (!remote_table_function_ptr && remote_table.empty()) throw Exception("The name of remote table cannot be empty", ErrorCodes::BAD_ARGUMENTS); auto remote_table_id = StorageID::createEmpty(); From bd523a0aff1e1355300febcddadced05f393a15f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 04:30:03 +0300 Subject: [PATCH 273/330] Autocomplete does not have to work in "Unbundled" build --- tests/queries/0_stateless/arcadia_skip_list.txt | 1 + tests/queries/skip_list.json | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 3995f905332..61c10cf8b14 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -126,3 +126,4 @@ 01053_ssd_dictionary 01280_ssd_complex_key_dictionary 01354_order_by_tuple_collate_const +01370_client_autocomplete_word_break_characters diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 9ad291928c0..2ef448e5d99 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -81,7 +81,8 @@ "_arrow", "01099_parallel_distributed_insert_select", "01300_client_save_history_when_terminated", - "orc_output" + "orc_output", + "01370_client_autocomplete_word_break_characters" ], "release-build": [ "avx2" From 95a7a09c37844e9c9537933d795d6fdfbcb46626 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 04:50:45 +0300 Subject: [PATCH 274/330] Fix transform query for external databases in presense of aliases #12032 --- .../JoinToSubqueryTransformVisitor.cpp | 2 +- src/Interpreters/PredicateRewriteVisitor.cpp | 2 +- ..._transform_query_for_external_database.cpp | 12 ++++++++ .../transformQueryForExternalDatabase.cpp | 28 +++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index a34276b5519..5f38f410e04 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -257,7 +257,7 @@ struct ColumnAliasesMatcher if (!last_table) { IdentifierSemantic::coverName(node, alias); - node.setAlias(""); + node.setAlias({}); } } else if (node.compound()) diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp index 7fc45044a88..2a4bd4c1fd2 100644 --- a/src/Interpreters/PredicateRewriteVisitor.cpp +++ b/src/Interpreters/PredicateRewriteVisitor.cpp @@ -76,7 +76,7 @@ static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vectortryGetAlias(); !alias.empty()) - predicate->setAlias(""); + predicate->setAlias({}); if (ASTIdentifier * identifier = predicate->as()) identifiers.emplace_back(identifier); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 318d667d9b0..bd7d7d5d1b8 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,8 @@ struct State {"apply_type", std::make_shared()}, {"apply_status", std::make_shared()}, {"create_time", std::make_shared()}, + {"field", std::make_shared()}, + {"value", std::make_shared()}, }; static const State & instance() @@ -117,3 +120,12 @@ TEST(TransformQueryForExternalDatabase, Issue7245) R"(SELECT "apply_id", "apply_type", "apply_status", "create_time" FROM "test"."table" WHERE ("apply_type" = 2) AND ("create_time" > '2018-12-25 01:02:03') AND ("apply_status" IN (3, 4)))", state.context, state.columns); } + +TEST(TransformQueryForExternalDatabase, Aliases) +{ + const State & state = State::instance(); + + check("SELECT field AS value, field AS display WHERE field NOT IN ('') AND display LIKE '%test%'", + R"(SELECT "field" FROM "test"."table" WHERE ("field" NOT IN ('')) AND ("field" LIKE '%test%'))", + state.context, state.columns); +} diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 11b98b782e0..0d34f0b3068 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -71,6 +71,24 @@ public: } }; +class DropAliasesMatcher +{ +public: + struct Data {}; + Data data; + + static bool needChildVisit(ASTPtr &, const ASTPtr &) + { + return true; + } + + static void visit(ASTPtr & node, Data) + { + if (!node->tryGetAlias().empty()) + node->setAlias({}); + } +}; + void replaceConstantExpressions(ASTPtr & node, const Context & context, const NamesAndTypesList & all_columns) { auto syntax_result = SyntaxAnalyzer(context).analyze(node, all_columns); @@ -80,6 +98,13 @@ void replaceConstantExpressions(ASTPtr & node, const Context & context, const Na visitor.visit(node); } +void dropAliases(ASTPtr & node) +{ + DropAliasesMatcher::Data data; + InDepthNodeVisitor visitor(data); + visitor.visit(node); +} + bool isCompatible(const IAST & node) { @@ -192,6 +217,9 @@ String transformQueryForExternalDatabase( } } + ASTPtr select_ptr = select; + dropAliases(select_ptr); + std::stringstream out; IAST::FormatSettings settings(out, true); settings.identifier_quoting_style = identifier_quoting_style; From f08ff9010eb8bd8628cf009c5d6543502432ed41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 04:57:10 +0300 Subject: [PATCH 275/330] Whitespace --- src/Interpreters/MutationsInterpreter.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 51b0cf92484..1fd422efcab 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -506,7 +506,9 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } const ASTPtr select_query = prepareInterpreterSelectQuery(stages_copy, /* dry_run = */ true); - InterpreterSelectQuery interpreter{select_query, context, storage, metadata_snapshot, SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits()}; + InterpreterSelectQuery interpreter{ + select_query, context, storage, metadata_snapshot, + SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits()}; auto first_stage_header = interpreter.getSampleBlock(); auto in = std::make_shared(first_stage_header); @@ -530,7 +532,6 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & { NamesAndTypesList all_columns = metadata_snapshot->getColumns().getAllPhysical(); - /// Next, for each stage calculate columns changed by this and previous stages. for (size_t i = 0; i < prepared_stages.size(); ++i) { From cb04c503d7a9acbe88154034ab20cb445f1d6938 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Mon, 6 Jul 2020 10:07:38 +0800 Subject: [PATCH 276/330] Add MySQL to ClickHouse query replacement mapping table --- src/Server/MySQLHandler.cpp | 106 +++++++++++++++++++++--------------- src/Server/MySQLHandler.h | 5 +- 2 files changed, 65 insertions(+), 46 deletions(-) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 6892ebbd31a..9e42f5ebc05 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -45,6 +45,10 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } +static String select_empty_replacement_query(const String & query); +static String show_table_status_replacement_query(const String & query); +static String kill_connection_id_replacement_query(const String & query); + MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_) : Poco::Net::TCPServerConnection(socket_) @@ -57,6 +61,10 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; if (ssl_enabled) server_capability_flags |= CLIENT_SSL; + + replacements.emplace("KILL QUERY", kill_connection_id_replacement_query); + replacements.emplace("SHOW TABLE STATUS LIKE", show_table_status_replacement_query); + replacements.emplace("SHOW VARIABLES", select_empty_replacement_query); } void MySQLHandler::run() @@ -285,26 +293,18 @@ void MySQLHandler::comQuery(ReadBuffer & payload) } else { - String replacement_query = "SELECT ''"; + String replacement_query; bool should_replace = false; bool with_output = false; - // This is a workaround in order to support adding ClickHouse to MySQL using federated server. - if (0 == strncasecmp("SHOW TABLE STATUS LIKE", query.c_str(), 22)) + for (auto const & x : replacements) { - should_replace = true; - replacement_query = boost::replace_all_copy(query, "SHOW TABLE STATUS LIKE ", show_table_status_replacement_query); - } - - if (0 == strncasecmp("KILL QUERY", query.c_str(), 10)) - { - should_replace = true; - replacement_query = kill_connection_id_replacement_query(query); - } - - if (0 == strncasecmp("SHOW VARIABLES", query.c_str(), 13)) - { - should_replace = true; + if (0 == strncasecmp(x.first.c_str(), query.c_str(), x.first.size())) + { + should_replace = true; + replacement_query = x.second(query); + break; + } } ReadBufferFromString replacement(replacement_query); @@ -379,40 +379,58 @@ static bool isFederatedServerSetupSetCommand(const String & query) return 1 == std::regex_match(query, expr); } -const String MySQLHandler::show_table_status_replacement_query("SELECT" - " name AS Name," - " engine AS Engine," - " '10' AS Version," - " 'Dynamic' AS Row_format," - " 0 AS Rows," - " 0 AS Avg_row_length," - " 0 AS Data_length," - " 0 AS Max_data_length," - " 0 AS Index_length," - " 0 AS Data_free," - " 'NULL' AS Auto_increment," - " metadata_modification_time AS Create_time," - " metadata_modification_time AS Update_time," - " metadata_modification_time AS Check_time," - " 'utf8_bin' AS Collation," - " 'NULL' AS Checksum," - " '' AS Create_options," - " '' AS Comment" - " FROM system.tables" - " WHERE name LIKE "); - -String MySQLHandler::kill_connection_id_replacement_query(const String & query) +/// Replace "[query(such as SHOW VARIABLES...)]" into "". +static String select_empty_replacement_query(const String & query) { - const String s = "KILL QUERY "; + std::ignore = query; + return "select ''"; +} - if (query.size() > s.size()) +/// Replace "SHOW TABLE STATUS LIKE 'xx'" into "SELECT ... FROM system.tables WHERE name LIKE 'xx'". +static String show_table_status_replacement_query(const String & query) +{ + const String prefix = "SHOW TABLE STATUS LIKE "; + if (query.size() > prefix.size()) { - String process_id = query.data() + s.length(); + String suffix = query.data() + prefix.length(); + return ( + "SELECT" + " name AS Name," + " engine AS Engine," + " '10' AS Version," + " 'Dynamic' AS Row_format," + " 0 AS Rows," + " 0 AS Avg_row_length," + " 0 AS Data_length," + " 0 AS Max_data_length," + " 0 AS Index_length," + " 0 AS Data_free," + " 'NULL' AS Auto_increment," + " metadata_modification_time AS Create_time," + " metadata_modification_time AS Update_time," + " metadata_modification_time AS Check_time," + " 'utf8_bin' AS Collation," + " 'NULL' AS Checksum," + " '' AS Create_options," + " '' AS Comment" + " FROM system.tables" + " WHERE name LIKE " + + suffix); + } + return query; +} +/// Replace "KILL QUERY [connection_id]" into "KILL QUERY WHERE query_id = 'mysql:[connection_id]'". +static String kill_connection_id_replacement_query(const String & query) +{ + const String prefix = "KILL QUERY "; + if (query.size() > prefix.size()) + { + String suffix = query.data() + prefix.length(); static const std::regex expr{"^[0-9]"}; - if (std::regex_match(process_id, expr)) + if (std::regex_match(suffix, expr)) { - String replacement = Poco::format("KILL QUERY WHERE query_id = 'mysql:%s'", process_id); + String replacement = Poco::format("KILL QUERY WHERE query_id = 'mysql:%s'", suffix); return replacement; } } diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 41d4cc9a483..f7596850a8b 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -72,8 +72,9 @@ protected: bool secure_connection = false; private: - static const String show_table_status_replacement_query; - String kill_connection_id_replacement_query(const String & query); + using ReplacementFn = std::function; + using Replacements = std::unordered_map; + Replacements replacements; }; #if USE_SSL From 6483ba7d529c5c0e6e1d02dfaf5fe4d97b14b39d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 05:07:55 +0300 Subject: [PATCH 277/330] Style --- src/Interpreters/MutationsInterpreter.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 1fd422efcab..dc5880bf1ca 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -36,8 +36,9 @@ namespace ErrorCodes namespace { + /// Helps to detect situations, where non-deterministic functions may be used in mutations of Replicated*MergeTree. -class FirstNonDeterministicFuncMatcher +class FirstNonDeterministicFunctionMatcher { public: struct Data @@ -70,18 +71,18 @@ public: } }; -using FirstNonDeterministicFuncFinder = InDepthNodeVisitor; +using FirstNonDeterministicFunctionFinder = InDepthNodeVisitor; -std::optional findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context) +std::optional findFirstNonDeterministicFunctionName(const MutationCommand & command, const Context & context) { - FirstNonDeterministicFuncMatcher::Data finder_data{context, std::nullopt}; + FirstNonDeterministicFunctionMatcher::Data finder_data{context, std::nullopt}; switch (command.type) { case MutationCommand::UPDATE: { auto update_assignments_ast = command.ast->as().update_assignments->clone(); - FirstNonDeterministicFuncFinder(finder_data).visit(update_assignments_ast); + FirstNonDeterministicFunctionFinder(finder_data).visit(update_assignments_ast); if (finder_data.nondeterministic_function_name) return finder_data.nondeterministic_function_name; @@ -92,7 +93,7 @@ std::optional findFirstNonDeterministicFuncName(const MutationCommand & case MutationCommand::DELETE: { auto predicate_ast = command.predicate->clone(); - FirstNonDeterministicFuncFinder(finder_data).visit(predicate_ast); + FirstNonDeterministicFunctionFinder(finder_data).visit(predicate_ast); return finder_data.nondeterministic_function_name; } @@ -682,7 +683,7 @@ void MutationsInterpreter::validate() { for (const auto & command : commands) { - const auto nondeterministic_func_name = findFirstNonDeterministicFuncName(command, context); + const auto nondeterministic_func_name = findFirstNonDeterministicFunctionName(command, context); if (nondeterministic_func_name) throw Exception( "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions! " From 569a89280cdc5818aae9a6bb5fd9fe753769814d Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Mon, 6 Jul 2020 10:06:45 +0700 Subject: [PATCH 278/330] update ya.make --- src/Functions/ya.make | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 0c1a181471d..2f44de4dccd 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -229,6 +229,7 @@ SRCS( IFunction.cpp ignore.cpp in.cpp + initializeAggregation.cpp intDiv.cpp intDivOrZero.cpp intExp10.cpp From 0124eeb2d87b6826ed3fba4f8ce00e360dcf8bfc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 06:21:29 +0300 Subject: [PATCH 279/330] Added function "hasThreadFuzzer" --- src/Functions/hasThreadFuzzer.cpp | 50 +++++++++++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 + src/Functions/ya.make | 1 + 3 files changed, 53 insertions(+) create mode 100644 src/Functions/hasThreadFuzzer.cpp diff --git a/src/Functions/hasThreadFuzzer.cpp b/src/Functions/hasThreadFuzzer.cpp new file mode 100644 index 00000000000..1420efb5dde --- /dev/null +++ b/src/Functions/hasThreadFuzzer.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include + + +namespace DB +{ + +/** Returns whether Thread Fuzzer is effective. + * It can be used in tests to prevent too long runs. + */ +class FunctionHasThreadFuzzer : public IFunction +{ +public: + static constexpr auto name = "hasThreadFuzzer"; + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override + { + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, ThreadFuzzer::instance().isEffective()); + } +}; + + +void registerFunctionHasThreadFuzzer(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 5eb1e3e47c0..f23066bc6e0 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -58,6 +58,7 @@ void registerFunctionGetMacro(FunctionFactory &); void registerFunctionGetScalar(FunctionFactory &); void registerFunctionIsConstant(FunctionFactory &); void registerFunctionGlobalVariable(FunctionFactory &); +void registerFunctionHasThreadFuzzer(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -116,6 +117,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionGetScalar(factory); registerFunctionIsConstant(factory); registerFunctionGlobalVariable(factory); + registerFunctionHasThreadFuzzer(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 86d2425eac4..0cafe71caf9 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -219,6 +219,7 @@ SRCS( h3ToParent.cpp h3ToString.cpp hasColumnInTable.cpp + hasThreadFuzzer.cpp hasTokenCaseInsensitive.cpp hasToken.cpp hostName.cpp From 8179b34857a1f3b6790629150b0712d0a6e0f3fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 06:22:23 +0300 Subject: [PATCH 280/330] Fix test under ThreadFuzzer --- tests/queries/0_stateless/01193_metadata_loading.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh index de74b3ec1af..234d1df1e86 100755 --- a/tests/queries/0_stateless/01193_metadata_loading.sh +++ b/tests/queries/0_stateless/01193_metadata_loading.sh @@ -19,7 +19,7 @@ threads=10 count_multiplier=1 max_time_ms=1000 -debug_or_sanitizer_build=`$CLICKHOUSE_CLIENT -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%'"` +debug_or_sanitizer_build=`$CLICKHOUSE_CLIENT -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%' OR hasThreadFuzzer()"` if [[ debug_or_sanitizer_build -eq 1 ]]; then tables=100; count_multiplier=10; max_time_ms=1500; fi From 935b943703da250fe7031541b74a1fcefe851172 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 3 Jul 2020 12:03:30 +0200 Subject: [PATCH 281/330] Test for issue #9088 ALTER DELETE unexpectedly deletes NULL rows --- .../01358_mutation_delete_null_rows.reference | 20 ++++++++++++++ .../01358_mutation_delete_null_rows.sql | 26 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 tests/queries/0_stateless/01358_mutation_delete_null_rows.reference create mode 100644 tests/queries/0_stateless/01358_mutation_delete_null_rows.sql diff --git a/tests/queries/0_stateless/01358_mutation_delete_null_rows.reference b/tests/queries/0_stateless/01358_mutation_delete_null_rows.reference new file mode 100644 index 00000000000..126874237b7 --- /dev/null +++ b/tests/queries/0_stateless/01358_mutation_delete_null_rows.reference @@ -0,0 +1,20 @@ +-------- +0 1 x=0 +1 0 x<>0 +3 0 x<>0 +\N \N x<>0 +-------- +2020-01-01 2 0 leave +2020-01-02 aaa 0 1 delete +2020-01-03 2 0 leave +2020-01-04 2 0 leave +2020-01-05 \N 2 0 leave +2020-01-06 aaa 0 1 delete +2020-01-07 aaa 0 1 delete +2020-01-08 aaa \N \N leave +-------- +2020-01-01 2 +2020-01-03 2 +2020-01-04 2 +2020-01-05 \N 2 +2020-01-08 aaa \N diff --git a/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql b/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql new file mode 100644 index 00000000000..cf240c2062b --- /dev/null +++ b/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql @@ -0,0 +1,26 @@ +select '--------'; +SELECT arrayJoin([0, 1, 3, NULL]) AS x, x = 0, if(x = 0, 'x=0', 'x<>0') ORDER BY x; + +select '--------'; +drop table if exists mutation_delete_null_rows; + +CREATE TABLE mutation_delete_null_rows +( + `EventDate` Date, + `CounterID` Nullable(String), + `UserID` Nullable(UInt32) +) +ENGINE = MergeTree() +ORDER BY EventDate; + +INSERT INTO mutation_delete_null_rows VALUES ('2020-01-01', '', 2)('2020-01-02', 'aaa', 0); +INSERT INTO mutation_delete_null_rows VALUES ('2020-01-03', '', 2)('2020-01-04', '', 2)('2020-01-05', NULL, 2)('2020-01-06', 'aaa', 0)('2020-01-07', 'aaa', 0)('2020-01-08', 'aaa', NULL); + +SELECT *,UserID = 0 as UserIDEquals0, if(UserID = 0, 'delete', 'leave') as verdict FROM mutation_delete_null_rows ORDER BY EventDate; + +ALTER TABLE mutation_delete_null_rows DELETE WHERE UserID = 0 SETTINGS mutations_sync=1; + +select '--------'; +SELECT * FROM mutation_delete_null_rows ORDER BY EventDate; + +drop table mutation_delete_null_rows; \ No newline at end of file From 71b410eb1095a5c5f83b281aaec733b336c40f17 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 05:11:33 +0300 Subject: [PATCH 282/330] Remove unused potentially dangerous function --- src/Parsers/makeASTForLogicalFunction.cpp | 16 +--------------- src/Parsers/makeASTForLogicalFunction.h | 3 --- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/src/Parsers/makeASTForLogicalFunction.cpp b/src/Parsers/makeASTForLogicalFunction.cpp index eaae38740aa..02c9da926c9 100644 --- a/src/Parsers/makeASTForLogicalFunction.cpp +++ b/src/Parsers/makeASTForLogicalFunction.cpp @@ -7,21 +7,6 @@ namespace DB { -ASTPtr makeASTForLogicalNot(ASTPtr argument) -{ - bool b; - if (tryGetLiteralBool(argument.get(), b)) - return std::make_shared(Field{UInt8(!b)}); - - auto function = std::make_shared(); - auto exp_list = std::make_shared(); - function->name = "not"; - function->arguments = exp_list; - function->children.push_back(exp_list); - exp_list->children.push_back(argument); - return function; -} - ASTPtr makeASTForLogicalAnd(ASTs && arguments) { @@ -100,4 +85,5 @@ bool tryGetLiteralBool(const IAST * ast, bool & value) return false; } } + } diff --git a/src/Parsers/makeASTForLogicalFunction.h b/src/Parsers/makeASTForLogicalFunction.h index 5c1096cab6e..8c3718bfcde 100644 --- a/src/Parsers/makeASTForLogicalFunction.h +++ b/src/Parsers/makeASTForLogicalFunction.h @@ -5,9 +5,6 @@ namespace DB { -/// Makes an AST calculating NOT argument. -ASTPtr makeASTForLogicalNot(ASTPtr argument); - /// Makes an AST calculating argument1 AND argument2 AND ... AND argumentN. ASTPtr makeASTForLogicalAnd(ASTs && arguments); From 1ca45c1b7eb8a8ffe68c142b5c725985331d4986 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 05:41:28 +0300 Subject: [PATCH 283/330] Fix mutations interpreter #9088 --- src/Functions/isZeroOrNull.cpp | 118 ++++++++++++++++++++++ src/Functions/registerFunctionsNull.cpp | 2 + src/Functions/ya.make | 1 + src/Interpreters/MutationsInterpreter.cpp | 2 +- 4 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 src/Functions/isZeroOrNull.cpp diff --git a/src/Functions/isZeroOrNull.cpp b/src/Functions/isZeroOrNull.cpp new file mode 100644 index 00000000000..ee2b87e9bab --- /dev/null +++ b/src/Functions/isZeroOrNull.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; +} + +/// Returns 1 if argument is zero or NULL. +/// It can be used to negate filter in WHERE condition. +/// "WHERE isZeroOrNull(expr)" will return exactly the same rows that "WHERE expr" will filter out. +class FunctionIsZeroOrNull : public IFunction +{ +public: + static constexpr auto name = "isZeroOrNull"; + + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const DataTypes & types) const override + { + if (!isNumber(removeNullable(types.at(0)))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The argument of function {} must have simple numeric type, possibly Nullable", name); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const ColumnPtr & input_column = block.getByPosition(arguments[0]).column; + + if (const ColumnNullable * input_column_nullable = checkAndGetColumn(input_column.get())) + { + const NullMap & null_map = input_column_nullable->getNullMapData(); + const IColumn * nested_column = &input_column_nullable->getNestedColumn(); + + if (!castTypeToEither< + ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, + ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, + ColumnFloat32, ColumnFloat64>( + nested_column, [&](const auto & column) + { + auto res = ColumnUInt8::create(input_rows_count); + processNullable(column.getData(), null_map, res->getData(), input_rows_count); + block.getByPosition(result).column = std::move(res); + return true; + })) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must have simple numeric type, possibly Nullable", name); + } + } + else + { + if (!castTypeToEither< + ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, + ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, + ColumnFloat32, ColumnFloat64>( + input_column.get(), [&](const auto & column) + { + auto res = ColumnUInt8::create(input_rows_count); + processNotNullable(column.getData(), res->getData(), input_rows_count); + block.getByPosition(result).column = std::move(res); + return true; + })) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must have simple numeric type, possibly Nullable", name); + } + } + } + +private: + template + void processNotNullable(const InputData & input_data, ColumnUInt8::Container & result_data, size_t input_rows_count) + { + for (size_t i = 0; i < input_rows_count; ++i) + result_data[i] = !input_data[i]; + } + + template + void processNullable(const InputData & input_data, const NullMap & input_null_map, + ColumnUInt8::Container & result_data, size_t input_rows_count) + { + for (size_t i = 0; i < input_rows_count; ++i) + result_data[i] = input_null_map[i] || !input_data[i]; + } +}; + + +void registerFunctionIsZeroOrNull(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/src/Functions/registerFunctionsNull.cpp b/src/Functions/registerFunctionsNull.cpp index e8894e19907..238133fbb67 100644 --- a/src/Functions/registerFunctionsNull.cpp +++ b/src/Functions/registerFunctionsNull.cpp @@ -10,6 +10,7 @@ void registerFunctionIfNull(FunctionFactory & factory); void registerFunctionNullIf(FunctionFactory & factory); void registerFunctionAssumeNotNull(FunctionFactory & factory); void registerFunctionToNullable(FunctionFactory & factory); +void registerFunctionIsZeroOrNull(FunctionFactory & factory); void registerFunctionsNull(FunctionFactory & factory) @@ -21,6 +22,7 @@ void registerFunctionsNull(FunctionFactory & factory) registerFunctionNullIf(factory); registerFunctionAssumeNotNull(factory); registerFunctionToNullable(factory); + registerFunctionIsZeroOrNull(factory); } } diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 86d2425eac4..c820916e222 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -241,6 +241,7 @@ SRCS( isNotNull.cpp isNull.cpp isValidUTF8.cpp + isZeroOrNull.cpp jumpConsistentHash.cpp lcm.cpp least.cpp diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index dc5880bf1ca..6ea656f0056 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -344,7 +344,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) if (stages.empty() || !stages.back().column_to_updated.empty()) stages.emplace_back(context); - auto negated_predicate = makeASTFunction("not", command.predicate->clone()); + auto negated_predicate = makeASTFunction("isZeroOrNull", command.predicate->clone()); stages.back().filters.push_back(negated_predicate); } else if (command.type == MutationCommand::UPDATE) From eb2cffab37f19837b9eff2239bf2e7f7dae2fe50 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 05:48:35 +0300 Subject: [PATCH 284/330] Added a test --- .../01373_is_zero_or_null.reference | 27 +++++++++++++++++++ .../0_stateless/01373_is_zero_or_null.sql | 23 ++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tests/queries/0_stateless/01373_is_zero_or_null.reference create mode 100644 tests/queries/0_stateless/01373_is_zero_or_null.sql diff --git a/tests/queries/0_stateless/01373_is_zero_or_null.reference b/tests/queries/0_stateless/01373_is_zero_or_null.reference new file mode 100644 index 00000000000..2525f532197 --- /dev/null +++ b/tests/queries/0_stateless/01373_is_zero_or_null.reference @@ -0,0 +1,27 @@ +1 1 +1 1 +0 0 +\N 1 +--- +1 1 +1 1 +0 0 +--- +hello +world +--- +hello +world +--- +hello +world +\N +--- +hello +world +xyz +--- +hello +world +xyz +\N diff --git a/tests/queries/0_stateless/01373_is_zero_or_null.sql b/tests/queries/0_stateless/01373_is_zero_or_null.sql new file mode 100644 index 00000000000..0b37045ba07 --- /dev/null +++ b/tests/queries/0_stateless/01373_is_zero_or_null.sql @@ -0,0 +1,23 @@ +SELECT NOT x, isZeroOrNull(x) FROM (SELECT arrayJoin([1, 2, 3, NULL]) = 3 AS x); +SELECT '---'; +SELECT NOT x, isZeroOrNull(x) FROM (SELECT arrayJoin([1, 2, 3]) = 3 AS x); +SELECT '---'; +CREATE TEMPORARY TABLE test (x String NULL); +INSERT INTO test VALUES ('hello'), ('world'), ('xyz'), (NULL); + +SELECT * FROM test WHERE x != 'xyz'; +SELECT '---'; +SELECT * FROM test WHERE NOT x = 'xyz'; +SELECT '---'; +SELECT * FROM test WHERE isZeroOrNull(x = 'xyz'); +SELECT '---'; + +SELECT * FROM test WHERE x != 'xyz' +UNION ALL +SELECT * FROM test WHERE NOT x != 'xyz'; + +SELECT '---'; + +SELECT * FROM test WHERE x != 'xyz' +UNION ALL +SELECT * FROM test WHERE isZeroOrNull(x != 'xyz'); From 45088760082beb774a5848efb09427a319f8d737 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 05:57:58 +0300 Subject: [PATCH 285/330] Added another test just in case --- .../0_stateless/01374_if_nullable_filimonov.reference | 3 +++ tests/queries/0_stateless/01374_if_nullable_filimonov.sql | 5 +++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/01374_if_nullable_filimonov.reference create mode 100644 tests/queries/0_stateless/01374_if_nullable_filimonov.sql diff --git a/tests/queries/0_stateless/01374_if_nullable_filimonov.reference b/tests/queries/0_stateless/01374_if_nullable_filimonov.reference new file mode 100644 index 00000000000..c17da4640ff --- /dev/null +++ b/tests/queries/0_stateless/01374_if_nullable_filimonov.reference @@ -0,0 +1,3 @@ +2 0 leave +0 1 delete +\N \N leave diff --git a/tests/queries/0_stateless/01374_if_nullable_filimonov.sql b/tests/queries/0_stateless/01374_if_nullable_filimonov.sql new file mode 100644 index 00000000000..159cf7b52ee --- /dev/null +++ b/tests/queries/0_stateless/01374_if_nullable_filimonov.sql @@ -0,0 +1,5 @@ +SELECT + UserID, + UserID = 0, + if(UserID = 0, 'delete', 'leave') +FROM VALUES('UserID Nullable(UInt8)', (2), (0), (NULL)); From d8ae9c571c4f270cadfc320055434aadf581d7a0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 05:59:44 +0300 Subject: [PATCH 286/330] Added yet another test just in case --- .../0_stateless/01374_if_nullable_filimonov.reference | 5 +++++ tests/queries/0_stateless/01374_if_nullable_filimonov.sql | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/tests/queries/0_stateless/01374_if_nullable_filimonov.reference b/tests/queries/0_stateless/01374_if_nullable_filimonov.reference index c17da4640ff..ebe52278bb3 100644 --- a/tests/queries/0_stateless/01374_if_nullable_filimonov.reference +++ b/tests/queries/0_stateless/01374_if_nullable_filimonov.reference @@ -1,3 +1,8 @@ 2 0 leave 0 1 delete \N \N leave +--- +0 1 Definitely x = 0 +1 0 We cannot say that x = 0 +3 0 We cannot say that x = 0 +\N \N We cannot say that x = 0 diff --git a/tests/queries/0_stateless/01374_if_nullable_filimonov.sql b/tests/queries/0_stateless/01374_if_nullable_filimonov.sql index 159cf7b52ee..0fadfb85fe4 100644 --- a/tests/queries/0_stateless/01374_if_nullable_filimonov.sql +++ b/tests/queries/0_stateless/01374_if_nullable_filimonov.sql @@ -3,3 +3,7 @@ SELECT UserID = 0, if(UserID = 0, 'delete', 'leave') FROM VALUES('UserID Nullable(UInt8)', (2), (0), (NULL)); + +SELECT '---'; + +SELECT arrayJoin([0, 1, 3, NULL]) AS x, x = 0, if(x = 0, 'Definitely x = 0', 'We cannot say that x = 0'); From ced1272368b695604d63d9fb5222bc0dd037cad3 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 6 Jul 2020 06:01:03 +0300 Subject: [PATCH 287/330] Update 01358_mutation_delete_null_rows.sql --- tests/queries/0_stateless/01358_mutation_delete_null_rows.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql b/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql index cf240c2062b..e8aabf1aa37 100644 --- a/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql +++ b/tests/queries/0_stateless/01358_mutation_delete_null_rows.sql @@ -23,4 +23,4 @@ ALTER TABLE mutation_delete_null_rows DELETE WHERE UserID = 0 SETTINGS mutations select '--------'; SELECT * FROM mutation_delete_null_rows ORDER BY EventDate; -drop table mutation_delete_null_rows; \ No newline at end of file +drop table mutation_delete_null_rows; From 1650bf90b95bde9a1b2d78234957e352cbad50e5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 09:04:23 +0300 Subject: [PATCH 288/330] Fix test --- .../01373_is_zero_or_null.reference | 9 ++------- .../0_stateless/01373_is_zero_or_null.sql | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/01373_is_zero_or_null.reference b/tests/queries/0_stateless/01373_is_zero_or_null.reference index 2525f532197..d9caaa2089a 100644 --- a/tests/queries/0_stateless/01373_is_zero_or_null.reference +++ b/tests/queries/0_stateless/01373_is_zero_or_null.reference @@ -17,11 +17,6 @@ hello world \N --- -hello -world -xyz +3 --- -hello -world -xyz -\N +4 diff --git a/tests/queries/0_stateless/01373_is_zero_or_null.sql b/tests/queries/0_stateless/01373_is_zero_or_null.sql index 0b37045ba07..32458dc9f62 100644 --- a/tests/queries/0_stateless/01373_is_zero_or_null.sql +++ b/tests/queries/0_stateless/01373_is_zero_or_null.sql @@ -12,12 +12,18 @@ SELECT '---'; SELECT * FROM test WHERE isZeroOrNull(x = 'xyz'); SELECT '---'; -SELECT * FROM test WHERE x != 'xyz' -UNION ALL -SELECT * FROM test WHERE NOT x != 'xyz'; +SELECT count() FROM +( + SELECT * FROM test WHERE x != 'xyz' + UNION ALL + SELECT * FROM test WHERE NOT x != 'xyz' +); SELECT '---'; -SELECT * FROM test WHERE x != 'xyz' -UNION ALL -SELECT * FROM test WHERE isZeroOrNull(x != 'xyz'); +SELECT count() FROM +( + SELECT * FROM test WHERE x != 'xyz' + UNION ALL + SELECT * FROM test WHERE isZeroOrNull(x != 'xyz') +); From 7df372cb1d70a25865e13e903081c656b9dcd591 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 09:09:23 +0300 Subject: [PATCH 289/330] Fix flaky test --- tests/queries/0_stateless/01361_fover_remote_num_tries.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh index eb30a9b10ee..97e1a5eda51 100755 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh @@ -3,4 +3,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o 'connect timed out' | wc -l +$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'connect timed out|Network is unreachable' | wc -l From 63e1e2303bb44b440a340fe211477a9b2d4ab7e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 09:13:26 +0300 Subject: [PATCH 290/330] Attempt to fix flaky test 00721_force_by_identical_result_after_merge_zookeeper --- .../00721_force_by_identical_result_after_merge_zookeeper.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql b/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql index d9034c9d6cd..956b1eb0249 100644 --- a/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql +++ b/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql @@ -10,6 +10,8 @@ SYSTEM SYNC REPLICA byte_identical_r2; -- Add a column with a default expression that will yield different values on different replicas. -- Call optimize to materialize it. Replicas should compare checksums and restore consistency. ALTER TABLE byte_identical_r1 ADD COLUMN y UInt64 DEFAULT rand(); +SYSTEM SYNC REPLICA byte_identical_r1; +SYSTEM SYNC REPLICA byte_identical_r2; OPTIMIZE TABLE byte_identical_r1 PARTITION tuple() FINAL; SELECT x, t1.y - t2.y FROM byte_identical_r1 t1 SEMI LEFT JOIN byte_identical_r2 t2 USING x ORDER BY x; From 0e8fad6844f8578f30c03bd4242dbca8250f92b9 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 6 Jul 2020 11:19:48 +0300 Subject: [PATCH 291/330] [anchore] more detailed Dockerfile scan reports (#12159) --- .github/workflows/anchore-analysis.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml index f0b54bef3e1..01cd0e271c8 100644 --- a/.github/workflows/anchore-analysis.yml +++ b/.github/workflows/anchore-analysis.yml @@ -10,7 +10,9 @@ name: Docker Container Scan (clickhouse-server) on: pull_request: - paths: docker/server/Dockerfile + paths: + - docker/server/Dockerfile + - .github/workflows/anchore-analysis.yml schedule: - cron: '0 21 * * *' @@ -31,6 +33,12 @@ jobs: image-reference: "localbuild/testimage:latest" dockerfile-path: "docker/server/Dockerfile" acs-report-enable: true + fail-build: true + - name: Upload artifact + uses: actions/upload-artifact@v1.0.0 + with: + name: AnchoreReports + path: ./anchore-reports/ - name: Upload Anchore Scan Report uses: github/codeql-action/upload-sarif@v1 with: From 96eb38121ea2974b32ed6a6aec6861b69f6c1a25 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 6 Jul 2020 11:20:56 +0300 Subject: [PATCH 292/330] Update Dockerfile --- docker/server/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 4d6b37aac16..0cb84bd1f6e 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -15,7 +15,7 @@ RUN apt-get update \ && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ && apt-get update \ && env DEBIAN_FRONTEND=noninteractive \ - apt-get -y -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ + apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --allow-unauthenticated --yes --no-install-recommends \ clickhouse-common-static=$version \ From 30f05d03f1613b07a211549a261e8403a1acb2db Mon Sep 17 00:00:00 2001 From: yhgcn Date: Mon, 6 Jul 2020 16:25:28 +0800 Subject: [PATCH 293/330] Update array-functions.md (#12130) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修改编码错误 --- .../sql-reference/functions/array-functions.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index 546b916b942..858a97940c6 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -1,6 +1,6 @@ # 数组函数 {#shu-zu-han-shu} -## 空 {#empty} +## empty {#empty} 对于空数组返回1,对于非空数组返回0。 结果类型是UInt8。 @@ -12,7 +12,7 @@ 结果类型是UInt8。 该函数也适用于字符串。 -## 长度 {#array_functions-length} +## length {#array_functions-length} 返回数组中的元素个数。 结果类型是UInt64。 @@ -24,9 +24,9 @@ ## emptyArrayFloat32,emptyArrayFloat64 {#emptyarrayfloat32-emptyarrayfloat64} -## 空空漫步,空空漫步时间 {#emptyarraydate-emptyarraydatetime} +## emptyArrayDate,emptyArrayDateTime {#emptyarraydate-emptyarraydatetime} -## 空字符串 {#emptyarraystring} +## emptyArrayString {#emptyarraystring} 不接受任何参数并返回适当类型的空数组。 @@ -34,7 +34,7 @@ 接受一个空数组并返回一个仅包含一个默认值元素的数组。 -## 范围(N) {#rangen} +## range(N) {#rangen} 返回从0到N-1的数字数组。 以防万一,如果在数据块中创建总长度超过100,000,000个元素的数组,则抛出异常。 @@ -74,7 +74,7 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res 如果索引超出数组的边界,则返回默认值(数字为0,字符串为空字符串等)。 -## 有(arr,elem) {#hasarr-elem} +## has(arr,elem) {#hasarr-elem} 检查’arr’数组是否具有’elem’元素。 如果元素不在数组中,则返回0;如果在,则返回1。 @@ -186,7 +186,7 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res │ 2 │ └──────────────────────────────────────┘ -## ツ暗ェツ氾环催ツ団ツ法ツ人) {#array_functions-arrayenumerate} +## arrayEnumerate(arr) {#array_functions-arrayenumerate} 返回 Array \[1, 2, 3, …, length (arr) \] @@ -658,7 +658,7 @@ SELECT arrayReduce(‘agg\_func’,arr1,…) - 将聚合函数`agg_func`应用于数组`arr1 ...`。如果传递了多个数组,则相应位置上的元素将作为多个参数传递给聚合函数。例如:SELECT arrayReduce(‘max’,\[1,2,3\])= 3 -## ツ暗ェツ氾环催ツ団ツ法ツ人) {#arrayreversearr} +## arrayReverse(arr) {#arrayreversearr} 返回与源数组大小相同的数组,包含反转源数组的所有元素的结果。 From 18c48ce12c9d86b1626c168a113993b32b3f131e Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Mon, 6 Jul 2020 16:26:06 +0800 Subject: [PATCH 294/330] Add integration test for mysql replacement query --- tests/integration/test_mysql_protocol/test.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 4ab225aee20..507445537b8 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -138,6 +138,34 @@ def test_mysql_client(mysql_client, server_address): assert stdout == '\n'.join(['column', '0', '0', '1', '1', '5', '5', 'tmp_column', '0', '1', '']) + # Show table status. + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "show table status like 'xx';" + '''.format(host=server_address, port=server_port), demux=True) + assert code == 0 + + # show variables. + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "show variables;" + '''.format(host=server_address, port=server_port), demux=True) + assert code == 0 + + # Kill query. + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "kill query 0;" + '''.format(host=server_address, port=server_port), demux=True) + assert code == 0 + + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default + --password=123 -e "kill query where query_id='mysql:0';" + '''.format(host=server_address, port=server_port), demux=True) + assert code == 0 + + def test_mysql_federated(mysql_server, server_address): # For some reason it occasionally fails without retries. retries = 100 From b444b7def5d08a7372f86be5063347aa720be18f Mon Sep 17 00:00:00 2001 From: yhgcn Date: Mon, 6 Jul 2020 16:26:16 +0800 Subject: [PATCH 295/330] Update array-functions.md (#12129) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修改编码错误 From dac7a39256dc61609d9ee4a188a023b051d35d29 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 6 Jul 2020 12:29:22 +0300 Subject: [PATCH 296/330] fix ubsan final --- contrib/FastMemcpy/FastMemcpy.h | 5 ++++- src/Processors/Formats/Impl/ORCBlockInputFormat.cpp | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/contrib/FastMemcpy/FastMemcpy.h b/contrib/FastMemcpy/FastMemcpy.h index dd89a55dbe9..5dcbfcf1656 100644 --- a/contrib/FastMemcpy/FastMemcpy.h +++ b/contrib/FastMemcpy/FastMemcpy.h @@ -86,7 +86,10 @@ static INLINE void memcpy_sse2_128(void *dst, const void *src) { //--------------------------------------------------------------------- // tiny memory copy with jump table optimized //--------------------------------------------------------------------- -static INLINE void *memcpy_tiny(void *dst, const void *src, size_t size) { +/// Attribute is used to avoid an error with undefined behaviour sanitizer +/// ../contrib/FastMemcpy/FastMemcpy.h:91:56: runtime error: applying zero offset to null pointer +/// Found by 01307_orc_output_format.sh, cause - ORCBlockInputFormat and external ORC library. +__attribute__((__no_sanitize__("undefined"))) static INLINE void *memcpy_tiny(void *dst, const void *src, size_t size) { unsigned char *dd = ((unsigned char*)dst) + size; const unsigned char *ss = ((const unsigned char*)src) + size; diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 12f1a27cf36..a12ca09eec0 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -23,9 +23,7 @@ ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_) : IInp { } -/// Attribute is used to avoid an error with undefined behaviour sanitizer -/// ../contrib/FastMemcpy/FastMemcpy.h:91:56: runtime error: applying zero offset to null pointer -__attribute__((__no_sanitize__("undefined"))) Chunk ORCBlockInputFormat::generate() +Chunk ORCBlockInputFormat::generate() { Chunk res; const Block & header = getPort().getHeader(); From 9e776e6907bfe94f73db2f34f29dfe37c2d253c8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 6 Jul 2020 12:56:30 +0300 Subject: [PATCH 297/330] [docs] improve redirects destination --- docs/tools/build.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index 1c8165fb36f..ac675897fca 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -169,7 +169,8 @@ def build_docs(args): if lang: tasks.append((lang, args,)) util.run_function_in_parallel(build_for_lang, tasks, threads=False) - redirects.build_docs_redirects(args) + if not args.version_prefix: + redirects.build_docs_redirects(args) def build(args): From 5a2cb7869af35db6ae0fad968934aca15349102e Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 6 Jul 2020 13:41:32 +0300 Subject: [PATCH 298/330] Update SECURITY.md (#12161) --- SECURITY.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index 7210db23183..b95b8b30a3d 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -11,7 +11,10 @@ currently being supported with security updates: | 18.x | :x: | | 19.x | :x: | | 19.14 | :white_check_mark: | -| 20.x | :white_check_mark: | +| 20.1 | :x: | +| 20.3 | :white_check_mark: | +| 20.4 | :white_check_mark: | +| 20.5 | :white_check_mark: | ## Reporting a Vulnerability From 9b26afc885f9318cd7ace15441c872b035757a73 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 6 Jul 2020 13:44:47 +0300 Subject: [PATCH 299/330] Update Dockerfile --- docker/server/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 0cb84bd1f6e..1d23cda5fb3 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG version=20.6.1.* From 0c7a261bc6e49c5c52f2c64bfdddd2652d4eecb8 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 6 Jul 2020 16:45:54 +0300 Subject: [PATCH 300/330] Place common docker compose files to integration docker container --- docker/test/integration/README.md | 4 +- docker/test/integration/runner/Dockerfile | 1 + .../compose/docker_compose_cassandra.yml | 0 .../compose/docker_compose_hdfs.yml | 0 .../compose/docker_compose_kafka.yml | 0 .../compose/docker_compose_minio.yml | 0 .../compose/docker_compose_mongo.yml | 0 .../compose/docker_compose_mysql.yml | 0 .../compose/docker_compose_net.yml | 0 .../compose/docker_compose_postgres.yml | 0 .../compose/docker_compose_rabbitmq.yml | 0 .../compose/docker_compose_redis.yml | 0 .../compose/docker_compose_zookeeper.yml | 0 tests/integration/README.md | 3 +- tests/integration/helpers/cluster.py | 67 +++++++++++-------- 15 files changed, 45 insertions(+), 30 deletions(-) rename docker/test/integration/{ => runner}/compose/docker_compose_cassandra.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_hdfs.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_kafka.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_minio.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_mongo.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_mysql.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_net.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_postgres.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_rabbitmq.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_redis.yml (100%) rename docker/test/integration/{ => runner}/compose/docker_compose_zookeeper.yml (100%) diff --git a/docker/test/integration/README.md b/docker/test/integration/README.md index 4aa10d6db80..a11cf059655 100644 --- a/docker/test/integration/README.md +++ b/docker/test/integration/README.md @@ -1,6 +1,6 @@ ## Docker containers for integration tests - `base` container with required packages - `runner` container with that runs integration tests in docker -- `compose` contains docker_compose YaML files that are used in tests +- `runnner/compose` contains docker\_compose YaML files that are used in tests -How to run integration tests is described in tests/integration/README.md \ No newline at end of file +How to run integration tests is described in tests/integration/README.md diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 423ecb06122..7608666ecc2 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -63,6 +63,7 @@ RUN set -eux; \ COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ +COPY compose/ /compose/ RUN set -x \ && addgroup --system dockremap \ diff --git a/docker/test/integration/compose/docker_compose_cassandra.yml b/docker/test/integration/runner/compose/docker_compose_cassandra.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_cassandra.yml rename to docker/test/integration/runner/compose/docker_compose_cassandra.yml diff --git a/docker/test/integration/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_hdfs.yml rename to docker/test/integration/runner/compose/docker_compose_hdfs.yml diff --git a/docker/test/integration/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_kafka.yml rename to docker/test/integration/runner/compose/docker_compose_kafka.yml diff --git a/docker/test/integration/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_minio.yml rename to docker/test/integration/runner/compose/docker_compose_minio.yml diff --git a/docker/test/integration/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_mongo.yml rename to docker/test/integration/runner/compose/docker_compose_mongo.yml diff --git a/docker/test/integration/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_mysql.yml rename to docker/test/integration/runner/compose/docker_compose_mysql.yml diff --git a/docker/test/integration/compose/docker_compose_net.yml b/docker/test/integration/runner/compose/docker_compose_net.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_net.yml rename to docker/test/integration/runner/compose/docker_compose_net.yml diff --git a/docker/test/integration/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_postgres.yml rename to docker/test/integration/runner/compose/docker_compose_postgres.yml diff --git a/docker/test/integration/compose/docker_compose_rabbitmq.yml b/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_rabbitmq.yml rename to docker/test/integration/runner/compose/docker_compose_rabbitmq.yml diff --git a/docker/test/integration/compose/docker_compose_redis.yml b/docker/test/integration/runner/compose/docker_compose_redis.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_redis.yml rename to docker/test/integration/runner/compose/docker_compose_redis.yml diff --git a/docker/test/integration/compose/docker_compose_zookeeper.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper.yml similarity index 100% rename from docker/test/integration/compose/docker_compose_zookeeper.yml rename to docker/test/integration/runner/compose/docker_compose_zookeeper.yml diff --git a/tests/integration/README.md b/tests/integration/README.md index c72c009a0d6..a3eb577d609 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -25,12 +25,13 @@ To check, that you have access to Docker, run `docker ps`. Run the tests with the `pytest` command. To select which tests to run, use: `pytest -k ` By default tests are run with system-wide client binary, server binary and base configs. To change that, -set the following environment variables: +set the following environment variables:` * `CLICKHOUSE_TESTS_SERVER_BIN_PATH` to choose the server binary. * `CLICKHOUSE_TESTS_CLIENT_BIN_PATH` to choose the client binary. * `CLICKHOUSE_TESTS_BASE_CONFIG_DIR` to choose the directory from which base configs (`config.xml` and `users.xml`) are taken. +For tests that use common docker compose files you may need to set up their path with environment variable: `DOCKER_COMPOSE_DIR=$HOME/ClickHouse/docker/test/integration/runner/compose` ### Running with runner script diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 861f178b433..f3e5dc1fab5 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -31,7 +31,7 @@ from .hdfs_api import HDFSApi HELPERS_DIR = p.dirname(__file__) CLICKHOUSE_ROOT_DIR = p.join(p.dirname(__file__), "../../..") -DOCKER_COMPOSE_DIR = p.join(CLICKHOUSE_ROOT_DIR, "docker/test/integration/compose/") +LOCAL_DOCKER_COMPOSE_DIR = p.join(CLICKHOUSE_ROOT_DIR, "docker/test/integration/runner/compose/") DEFAULT_ENV_NAME = 'env_file' SANITIZER_SIGN = "==================" @@ -52,7 +52,7 @@ def subprocess_check_call(args): def subprocess_call(args): - # Uncomment for debugging + # Uncomment for debugging..; # print('run:', ' ' . join(args)) subprocess.call(args) @@ -67,6 +67,17 @@ def get_odbc_bridge_path(): return '/usr/bin/clickhouse-odbc-bridge' return path +def get_docker_compose_path(): + compose_path = os.environ.get('DOCKER_COMPOSE_DIR') + if compose_path is not None: + return os.path.dirname(compose_path) + else: + if os.path.exists(os.path.dirname('/compose/')): + return os.path.dirname('/compose/') #default in docker runner container + else: + print("Fallback docker_compose_path to LOCAL_DOCKER_COMPOSE_DIR: {}".format(LOCAL_DOCKER_COMPOSE_DIR)) + return LOCAL_DOCKER_COMPOSE_DIR + class ClickHouseCluster: """ClickHouse cluster with several instances and (possibly) ZooKeeper. @@ -181,17 +192,19 @@ class ClickHouseCluster: ipv6_address=ipv6_address, with_installed_binary=with_installed_binary, tmpfs=tmpfs or []) + docker_compose_yml_dir = get_docker_compose_path() + self.instances[name] = instance if ipv4_address is not None or ipv6_address is not None: self.with_net_trics = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_net.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_net.yml')]) self.base_cmd.extend(['--file', instance.docker_compose_path]) cmds = [] if with_zookeeper and not self.with_zookeeper: if not zookeeper_docker_compose_path: - zookeeper_docker_compose_path = p.join(DOCKER_COMPOSE_DIR, 'docker_compose_zookeeper.yml') + zookeeper_docker_compose_path = p.join(docker_compose_yml_dir, 'docker_compose_zookeeper.yml') self.with_zookeeper = True self.zookeeper_use_tmpfs = zookeeper_use_tmpfs @@ -202,86 +215,86 @@ class ClickHouseCluster: if with_mysql and not self.with_mysql: self.with_mysql = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mysql.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]) self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mysql.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')] cmds.append(self.base_mysql_cmd) if with_postgres and not self.with_postgres: self.with_postgres = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_postgres.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]) self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_postgres.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')] cmds.append(self.base_postgres_cmd) if with_odbc_drivers and not self.with_odbc_drivers: self.with_odbc_drivers = True if not self.with_mysql: self.with_mysql = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mysql.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')]) self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mysql.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mysql.yml')] cmds.append(self.base_mysql_cmd) if not self.with_postgres: self.with_postgres = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_postgres.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')]) self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', - p.join(DOCKER_COMPOSE_DIR, 'docker_compose_postgres.yml')] + p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')] cmds.append(self.base_postgres_cmd) if with_kafka and not self.with_kafka: self.with_kafka = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_kafka.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')]) self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_kafka.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_kafka.yml')] cmds.append(self.base_kafka_cmd) if with_rabbitmq and not self.with_rabbitmq: self.with_rabbitmq = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_rabbitmq.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')]) self.base_rabbitmq_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_rabbitmq.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_rabbitmq.yml')] cmds.append(self.base_rabbitmq_cmd) if with_hdfs and not self.with_hdfs: self.with_hdfs = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_hdfs.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')]) self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_hdfs.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_hdfs.yml')] cmds.append(self.base_hdfs_cmd) if with_mongo and not self.with_mongo: self.with_mongo = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mongo.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]) self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_mongo.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')] cmds.append(self.base_mongo_cmd) if self.with_net_trics: for cmd in cmds: - cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_net.yml')]) + cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_net.yml')]) if with_redis and not self.with_redis: self.with_redis = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_redis.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')]) self.base_redis_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_redis.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_redis.yml')] if with_minio and not self.with_minio: self.with_minio = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_minio.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')]) self.base_minio_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_minio.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_minio.yml')] cmds.append(self.base_minio_cmd) if with_cassandra and not self.with_cassandra: self.with_cassandra = True - self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_cassandra.yml')]) + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')]) self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_cassandra.yml')] + self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_cassandra.yml')] return instance From a0661953a6b19db58d25d0a8f1885856affe143e Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Mon, 6 Jul 2020 19:46:53 +0500 Subject: [PATCH 301/330] add cluster() adn clusterAllReplicas() table functions description, add signatures to remoteSecure() table function (#12156) * add cluster() adn clusterAllReplicas() table functions description, add signatures to remoteSecure() table function Signed-off-by: Slach * small russian fixes Signed-off-by: Slach * Update docs/en/sql-reference/table-functions/cluster.md Co-authored-by: Ivan Blinkov * Update docs/en/sql-reference/table-functions/cluster.md Co-authored-by: Ivan Blinkov * Update docs/en/sql-reference/table-functions/cluster.md Co-authored-by: Ivan Blinkov * Update docs/en/sql-reference/table-functions/cluster.md Co-authored-by: Ivan Blinkov * Update docs/en/sql-reference/table-functions/cluster.md Co-authored-by: Ivan Blinkov * Update docs/en/sql-reference/table-functions/cluster.md Co-authored-by: Ivan Blinkov Co-authored-by: Ivan Blinkov --- .../sql-reference/table-functions/cluster.md | 39 +++++++++++++++++++ .../sql-reference/table-functions/remote.md | 2 + .../sql-reference/table-functions/remote.md | 2 + .../sql-reference/table-functions/remote.md | 2 + .../sql-reference/table-functions/remote.md | 2 + .../sql-reference/table-functions/remote.md | 2 + .../sql-reference/table-functions/cluster.md | 36 +++++++++++++++++ .../sql-reference/table-functions/remote.md | 2 + .../sql-reference/table-functions/remote.md | 2 + .../sql-reference/table-functions/remote.md | 2 + 10 files changed, 91 insertions(+) create mode 100644 docs/en/sql-reference/table-functions/cluster.md create mode 100644 docs/ru/sql-reference/table-functions/cluster.md diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md new file mode 100644 index 00000000000..2cd5b14caa4 --- /dev/null +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -0,0 +1,39 @@ +--- +toc_priority: 50 +toc_title: cluster +--- + +# cluster, clusterAllReplicas {#cluster-clusterallreplicas} + +Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried. +`clusterAllReplicas` - same as `cluster` but all replicas are queried. Each replica in a cluster is used as separate shard/connection. + +!!! note "Note" + All available clusters are listed in the `system.clusters` table. + + +Signatures: + +``` sql +cluster('cluster_name', db.table) +cluster('cluster_name', db, table) +clusterAllReplicas('cluster_name', db.table) +clusterAllReplicas('cluster_name', db, table) +``` + +`cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. + +Using the `cluster` and `clusterAllReplicas` table functions are less efficient than creating a `Distributed` table because in this case, the server connection is re-established for every request. When processing a large number of queries, please always create the `Distributed` table ahead of time, and don’t use the `cluster` and `clusterAllReplicas` table functions. + +The `cluster` and `clusterAllReplicas` table functions can be useful in the following cases: + +- Accessing a specific cluster for data comparison, debugging, and testing. +- Queries to various ClickHouse clusters and replicas for research purposes. +- Infrequent distributed requests that are made manually. + +Connection settings like `host`, `port`, `user`, `password`, `compression`, `secure` are taken from `` config section. See details in [Distributed engine](../../engines/table-engines/special/distributed.md). + +**See Also** + +- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [load\_balancing](../../operations/settings/settings.md#settings-load_balancing) diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index a2ba0046cfb..a7bdca6218e 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -12,6 +12,8 @@ Signatures: ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. The port is the TCP port on the remote server. If the port is omitted, it uses `tcp_port` from the server’s config file (by default, 9000). diff --git a/docs/es/sql-reference/table-functions/remote.md b/docs/es/sql-reference/table-functions/remote.md index 8d49348c76e..d614c7b3c0e 100644 --- a/docs/es/sql-reference/table-functions/remote.md +++ b/docs/es/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ Firma: ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port` o simplemente `host`. El host se puede especificar como nombre de servidor o como dirección IPv4 o IPv6. Una dirección IPv6 se especifica entre corchetes. El puerto es el puerto TCP del servidor remoto. Si se omite el puerto, utiliza `tcp_port` del archivo de configuración del servidor (por defecto, 9000). diff --git a/docs/fa/sql-reference/table-functions/remote.md b/docs/fa/sql-reference/table-functions/remote.md index 23a6753fd26..dd6e6725be4 100644 --- a/docs/fa/sql-reference/table-functions/remote.md +++ b/docs/fa/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ toc_title: "\u062F\u0648\u0631" ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port` یا فقط `host`. میزبان را می توان به عنوان نام سرور مشخص, و یا به عنوان ایپو4 یا ایپو6 نشانی. نشانی اینترنتی6 در براکت مربع مشخص شده است. پورت پورت تی سی پی بر روی سرور از راه دور است. اگر پورت حذف شده است, با استفاده از `tcp_port` از فایل پیکربندی سرور (به طور پیش فرض, 9000). diff --git a/docs/fr/sql-reference/table-functions/remote.md b/docs/fr/sql-reference/table-functions/remote.md index 2a2fa7d829d..3e911b61d75 100644 --- a/docs/fr/sql-reference/table-functions/remote.md +++ b/docs/fr/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ Signature: ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port` ou juste `host`. L'hôte peut être spécifié comme nom de serveur ou l'adresse IPv4 ou IPv6. Une adresse IPv6 est indiquée entre crochets. Le port est le port TCP sur le serveur distant. Si le port est omis, il utilise `tcp_port` à partir du fichier de configuration du serveur (par défaut, 9000). diff --git a/docs/ja/sql-reference/table-functions/remote.md b/docs/ja/sql-reference/table-functions/remote.md index 5a68776d93f..368584551fc 100644 --- a/docs/ja/sql-reference/table-functions/remote.md +++ b/docs/ja/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ toc_title: "\u30EA\u30E2\u30FC\u30C8" ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`、または単に `host`. ホストは、サーバー名またはIPv4またはIPv6アドレスとして指定できます。 IPv6アドレスは角かっこで指定します。 ポートは、リモートサーバー上のTCPポートです。 ポートが省略されると、次のようになります `tcp_port` サーバーの設定ファイルから(デフォルトでは9000)。 diff --git a/docs/ru/sql-reference/table-functions/cluster.md b/docs/ru/sql-reference/table-functions/cluster.md new file mode 100644 index 00000000000..7663e89b64d --- /dev/null +++ b/docs/ru/sql-reference/table-functions/cluster.md @@ -0,0 +1,36 @@ +--- +toc_priority: 50 +toc_title: cluster +--- + +# cluster, clusterAllReplicas {#cluster-clusterallreplicas} + +Позволяет обратиться ко всем серверам существующего кластера, который присутствует в таблице `system.clusters` и сконфигурирован в секцци `remote_servers` без создания таблицы типа `Distributed`. +`clusterAllReplicas` - работает также как `cluster` но каждая реплика в кластере будет использована как отдельный шард/отдельное соединение. + + +Сигнатуры: + +``` sql +cluster('cluster_name', db.table) +cluster('cluster_name', db, table) +clusterAllReplicas('cluster_name', db.table) +clusterAllReplicas('cluster_name', db, table) +``` + +`cluster_name` – имя кластера, который обязан присутствовать в таблице `system.clusters` и обозначает подмножество адресов и параметров подключения к удаленным и локальным серверам, входящим в кластер. + +Использование табличных функций `cluster` и `clusterAllReplicas` менее оптимальное чем создание таблицы типа `Distributed`, поскольку в этом случае соединение с сервером переустанавливается на каждый запрос. При обработке большого количества запросов, всегда создавайте `Distributed` таблицу заранее и не используйте табличные функции `cluster` и `clusterAllReplicas`. + +Табличные функции `cluster` and `clusterAllReplicas` могут быть полезны в следующих случаях: + +- Чтение данных из конкретного кластера для сравнения данных, отладки и тестирования. +- Запросы к разным ClickHouse кластерам и репликам в целях исследования. +- Нечастых распределенных запросов которые делаются вручную. + +Настройки соединения `user`, `password`, `host`, `post`, `compression`, `secure` берутся из секции `` файлов конфигурации. См. подробности в разделе [Distributed](../../engines/table-engines/special/distributed.md) + +**See Also** + +- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [load\_balancing](../../operations/settings/settings.md#settings-load_balancing) \ No newline at end of file diff --git a/docs/ru/sql-reference/table-functions/remote.md b/docs/ru/sql-reference/table-functions/remote.md index 55c602d4bbf..ab216d5de36 100644 --- a/docs/ru/sql-reference/table-functions/remote.md +++ b/docs/ru/sql-reference/table-functions/remote.md @@ -7,6 +7,8 @@ ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` - выражение, генерирующее адреса удалённых серверов. Это может быть просто один адрес сервера. Адрес сервера - это `хост:порт`, или только `хост`. Хост может быть указан в виде имени сервера, или в виде IPv4 или IPv6 адреса. IPv6 адрес указывается в квадратных скобках. Порт - TCP-порт удалённого сервера. Если порт не указан, используется `tcp_port` из конфигурационного файла сервера (по умолчанию - 9000). diff --git a/docs/tr/sql-reference/table-functions/remote.md b/docs/tr/sql-reference/table-functions/remote.md index e324564b97b..059e8aba72b 100644 --- a/docs/tr/sql-reference/table-functions/remote.md +++ b/docs/tr/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ Oluşturmadan uzak sunuculara erişmenizi sağlar. `Distributed` Tablo. ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port` ya da sadece `host`. Ana bilgisayar sunucu adı veya IPv4 veya IPv6 adresi olarak belirtilebilir. Köşeli parantez içinde bir IPv6 adresi belirtilir. Bağlantı noktası, uzak sunucudaki TCP bağlantı noktasıdır. Bağlantı noktası atlanırsa, kullanır `tcp_port` sunucunun yapılandırma dosyasından (varsayılan olarak, 9000). diff --git a/docs/zh/sql-reference/table-functions/remote.md b/docs/zh/sql-reference/table-functions/remote.md index 1f3bc58111b..1125353e2fa 100644 --- a/docs/zh/sql-reference/table-functions/remote.md +++ b/docs/zh/sql-reference/table-functions/remote.md @@ -14,6 +14,8 @@ toc_title: "\u8FDC\u7A0B" ``` sql remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db.table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db, table[, 'user'[, 'password']]) +remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) ``` `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`,或者只是 `host`. 主机可以指定为服务器名称,也可以指定为IPv4或IPv6地址。 IPv6地址在方括号中指定。 端口是远程服务器上的TCP端口。 如果省略端口,它使用 `tcp_port` 从服务器的配置文件(默认情况下,9000)。 From 7ce3464810c632ddf56ec79096d0cc9bdcb7eff4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 6 Jul 2020 18:13:08 +0300 Subject: [PATCH 302/330] Remove -v from ninja --- docker/packager/binary/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 070e1f8c2db..30a576a5d76 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -18,7 +18,7 @@ ccache --zero-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: rm -f CMakeCache.txt cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS .. -ninja -v clickhouse-bundle +ninja clickhouse-bundle mv ./programs/clickhouse* /output mv ./src/unit_tests_dbms /output find . -name '*.so' -print -exec mv '{}' /output \; From a2047de471f599512d39fafca7c1249a6aa1821e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 6 Jul 2020 20:18:37 +0300 Subject: [PATCH 303/330] [docs] introduction for integration table engines (#12167) * [docs] introduction for integration table engines * Update jdbc.md * Update odbc.md * Update mysql.md * Update kafka.md * Update hdfs.md --- docs/en/engines/table-engines/integrations/hdfs.md | 2 +- docs/en/engines/table-engines/integrations/index.md | 10 ++++++++++ docs/en/engines/table-engines/integrations/jdbc.md | 2 +- docs/en/engines/table-engines/integrations/kafka.md | 2 +- docs/en/engines/table-engines/integrations/mysql.md | 4 ++-- docs/en/engines/table-engines/integrations/odbc.md | 2 +- 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 847b6fa90ee..565d67ce010 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -1,5 +1,5 @@ --- -toc_priority: 36 +toc_priority: 4 toc_title: HDFS --- diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index dd005919ad1..cfe94c8d305 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -3,4 +3,14 @@ toc_folder_title: Integrations toc_priority: 30 --- +# Table Engines for Integrations +ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like external dictionaries or table functions, which require to use custom query methods on each use. + +List of supported integrations: + +- [ODBC](odbc.md) +- [JDBC](jdbc.md) +- [MySQL](mysql.md) +- [HDFS](hdfs.md) +- [Kafka](kafka.md) diff --git a/docs/en/engines/table-engines/integrations/jdbc.md b/docs/en/engines/table-engines/integrations/jdbc.md index 08ddc19520a..2144be9f1e3 100644 --- a/docs/en/engines/table-engines/integrations/jdbc.md +++ b/docs/en/engines/table-engines/integrations/jdbc.md @@ -1,5 +1,5 @@ --- -toc_priority: 34 +toc_priority: 2 toc_title: JDBC --- diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index cff9ab3a0c4..3324386e1c5 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -1,5 +1,5 @@ --- -toc_priority: 32 +toc_priority: 5 toc_title: Kafka --- diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index c98d492322f..805cb4817a5 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -1,9 +1,9 @@ --- -toc_priority: 33 +toc_priority: 3 toc_title: MySQL --- -# Mysql {#mysql} +# MySQL {#mysql} The MySQL engine allows you to perform `SELECT` queries on data that is stored on a remote MySQL server. diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index 04387760fb4..b2924df4831 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -1,5 +1,5 @@ --- -toc_priority: 35 +toc_priority: 1 toc_title: ODBC --- From 8038383f06c03a1cc6a81d0f8f0715ead1f636ba Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Mon, 6 Jul 2020 19:24:33 +0200 Subject: [PATCH 304/330] Fix #10437, CR fixes --- src/Interpreters/SystemLog.h | 7 ++-- src/Storages/StorageBuffer.cpp | 5 ++- ...terialized_view_with_join_on_query_log.sql | 6 ++- ...ble_flush_with_materialized_view.reference | 3 ++ ...fer_table_flush_with_materialized_view.sql | 37 +++++++++++++++++++ 5 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.reference create mode 100644 tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.sql diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 1b5bcbacc6d..7d533a3bab7 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -161,7 +161,6 @@ protected: private: /* Saving thread data */ Context & context; - Context insert_context; const StorageID table_id; const String storage_def; StoragePtr table; @@ -208,13 +207,11 @@ SystemLog::SystemLog(Context & context_, const String & storage_def_, size_t flush_interval_milliseconds_) : context(context_) - , insert_context(Context(context_)) , table_id(database_name_, table_name_) , storage_def(storage_def_) , flush_interval_milliseconds(flush_interval_milliseconds_) { assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE); - insert_context.makeQueryContext(); // we need query context to do inserts to target table with MV containing subqueries or joins log = &Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")"); } @@ -428,6 +425,10 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert->table_id = table_id; ASTPtr query_ptr(insert.release()); + // we need query context to do inserts to target table with MV containing subqueries or joins + auto insert_context = Context(context); + insert_context.makeQueryContext(); + InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 85b61dd34f9..3bc88f5a289 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -713,7 +713,10 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl for (const auto & column : block_to_write) list_of_columns->children.push_back(std::make_shared(column.name)); - InterpreterInsertQuery interpreter{insert, global_context, allow_materialized}; + auto insert_context = Context(global_context); + insert_context.makeQueryContext(); + + InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; auto block_io = interpreter.execute(); block_io.out->writePrefix(); diff --git a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql index 3d1b464f164..4f216ae647f 100644 --- a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql +++ b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql @@ -4,6 +4,10 @@ DROP TABLE IF EXISTS expected_times; CREATE TABLE expected_times (QUERY_GROUP_ID String, max_query_duration_ms UInt64) Engine=Memory; INSERT INTO expected_times VALUES('main_dashboard_top_query', 100), ('main_dashboard_bottom_query', 100); +SET log_queries=1; +SELECT 1; +SYSTEM FLUSH LOGS; + CREATE MATERIALIZED VIEW slow_log Engine=Memory AS ( SELECT * FROM @@ -18,8 +22,6 @@ CREATE MATERIALIZED VIEW slow_log Engine=Memory AS WHERE query_duration_ms > max_query_duration_ms ); -SET log_queries=1; - SELECT 1 /* QUERY_GROUP_ID:main_dashboard_top_query */; SELECT 1 /* QUERY_GROUP_ID:main_dashboard_bottom_query */; diff --git a/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.reference b/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.reference new file mode 100644 index 00000000000..083edaac248 --- /dev/null +++ b/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.reference @@ -0,0 +1,3 @@ +2 +2 +2 diff --git a/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.sql b/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.sql new file mode 100644 index 00000000000..424c38d5590 --- /dev/null +++ b/tests/queries/0_stateless/01361_buffer_table_flush_with_materialized_view.sql @@ -0,0 +1,37 @@ +DROP TABLE IF EXISTS t1_01361; +DROP TABLE IF EXISTS t2_01361; +DROP TABLE IF EXISTS mv1_01361; +DROP TABLE IF EXISTS b1_01361; + +CREATE TABLE t1_01361 ( + i UInt32, + time DateTime +) ENGINE = MergeTree() +PARTITION BY time +ORDER BY time; + +CREATE TABLE t2_01361 ( + i UInt32, + time DateTime +) ENGINE = MergeTree() +PARTITION BY time +ORDER BY time; + +CREATE MATERIALIZED VIEW mv1_01361 +TO t2_01361 +AS SELECT * FROM (SELECT * FROM t1_01361); + +CREATE TABLE b1_01361 AS t1_01361 +ENGINE = Buffer(currentDatabase(), t1_01361, 1, 0, 0, 1, 1, 1, 1); + +INSERT INTO b1_01361 VALUES (1, now()); +INSERT INTO b1_01361 VALUES (2, now()); + +SELECT count() FROM b1_01361; +SELECT count() FROM t1_01361; +SELECT count() FROM t2_01361; + +DROP TABLE IF EXISTS t1_01361; +DROP TABLE IF EXISTS t2_01361; +DROP TABLE IF EXISTS mv1_01361; +DROP TABLE IF EXISTS b1_01361; From 93517b4e827ca6b059e0940eb1a84421916b3b78 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Mon, 6 Jul 2020 20:48:33 +0200 Subject: [PATCH 305/330] Same change for Kafka - just in case, and to make it conform. --- src/Storages/Kafka/StorageKafka.cpp | 8 ++++---- src/Storages/Kafka/StorageKafka.h | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 6499941a68d..f1d7650c785 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -125,7 +125,6 @@ StorageKafka::StorageKafka( std::unique_ptr kafka_settings_) : IStorage(table_id_) , global_context(context_.getGlobalContext()) - , kafka_context(std::make_shared(global_context)) , kafka_settings(std::move(kafka_settings_)) , topics(parseTopics(global_context.getMacros()->expand(kafka_settings->kafka_topic_list.value))) , brokers(global_context.getMacros()->expand(kafka_settings->kafka_broker_list.value)) @@ -145,9 +144,6 @@ StorageKafka::StorageKafka( setInMemoryMetadata(storage_metadata); task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); - - kafka_context->makeQueryContext(); - kafka_context->applySettingsChanges(settings_adjustments); } SettingsChanges StorageKafka::createSettingsAdjustments() @@ -530,6 +526,10 @@ bool StorageKafka::streamToViews() size_t block_size = getMaxBlockSize(); + auto kafka_context = std::make_shared(global_context); + kafka_context->makeQueryContext(); + kafka_context->applySettingsChanges(settings_adjustments); + // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns InterpreterInsertQuery interpreter(insert, *kafka_context, false, true, true); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 6f479ba2089..b7e6ea2a7e0 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -70,7 +70,6 @@ protected: private: // Configuration and state Context & global_context; - std::shared_ptr kafka_context; std::unique_ptr kafka_settings; const Names topics; const String brokers; From 7372c4634255b18dc1348501077987f1ba9072d5 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 6 Jul 2020 23:16:41 +0300 Subject: [PATCH 306/330] [docs] add redirect from an introduction index page (#12176) --- docs/redirects.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/redirects.txt b/docs/redirects.txt index 13392e5dce5..233492f09ac 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -161,6 +161,7 @@ interfaces/third-party/client_libraries.md interfaces/third-party/client-librari interfaces/third-party_client_libraries.md interfaces/third-party/client-libraries.md interfaces/third-party_gui.md interfaces/third-party/gui.md interfaces/third_party/index.md interfaces/third-party/index.md +introduction/index.md introduction/distinctive_features.md introduction/distinctive-features.md introduction/features_considered_disadvantages.md introduction/distinctive-features.md introduction/possible_silly_questions.md faq/general.md From 685fb4669e553d8056e3cd5cdeee135991b54a2e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 6 Jul 2020 23:25:21 +0300 Subject: [PATCH 307/330] [website] add apple-touch-icon (#12164) --- website/images/logo-180x180.png | Bin 0 -> 702 bytes website/templates/common_meta.html | 1 + 2 files changed, 1 insertion(+) create mode 100644 website/images/logo-180x180.png diff --git a/website/images/logo-180x180.png b/website/images/logo-180x180.png new file mode 100644 index 0000000000000000000000000000000000000000..ee9bae1f61ef701514b88f892655ce6b6862a5c4 GIT binary patch literal 702 zcmeAS@N?(olHy`uVBq!ia0vp^TR@nD8Ax&oe*=;XEa{HEjtmSN`?>!lvI6-E$sR$z z3=CCj3=9n|3=F@3LJcn%7)lKo7+xhXFj&oCU=S~uvn$XBD3K806XN>+43Nd}{}jXj za}574GW<_t_z&cs0VALwgaIUhvT!ys1E>X}3!)LC7jDk~|3CSH-vKR@Jdae!?zkJ^vl!5F{I+w+nbhrha3c44<2OWiM#Rr?Z4wHu6gV% z3cX9KKJsh5^ZOpL)njtp(M^53m+|i26n*sWD(&C9ub35v>vx|FSIr<1y!g22a>2~S z*JtPIlz+*qU*WWKu{iVY>TfT&q_BWr%(fd&%h{e<{7d7$cyf&hFFP-O@K^bZa+!^< zvAccp^V2%TU$o2io?VCv{ErERK(!v>gTe~DWM4f{UP|E literal 0 HcmV?d00001 diff --git a/website/templates/common_meta.html b/website/templates/common_meta.html index 350bcf18f05..89a650fba6a 100644 --- a/website/templates/common_meta.html +++ b/website/templates/common_meta.html @@ -6,6 +6,7 @@ {% if title %}{{ title }}{% else %}{{ _('ClickHouse - fast open-source OLAP DBMS') }}{% endif %} + From 2e8ba3427a5f57599f25208c0acc330275f26aa9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 6 Jul 2020 23:40:32 +0300 Subject: [PATCH 308/330] Better assert --- src/Functions/GeoHash.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/GeoHash.cpp b/src/Functions/GeoHash.cpp index 679c0b87975..3ebc6f3d0fc 100644 --- a/src/Functions/GeoHash.cpp +++ b/src/Functions/GeoHash.cpp @@ -115,7 +115,7 @@ inline Encoded merge(const Encoded & encodedLon, const Encoded & encodedLat, uin result.fill(0); const auto bits = (precision * BITS_PER_SYMBOL) / 2; - assert(bits <= 255); + assert(bits < 255); uint8_t i = 0; for (; i < bits; ++i) { From 4733504b517a89f5157ddc5dc756546349b8069e Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 6 Jul 2020 15:23:36 +0300 Subject: [PATCH 309/330] Don't split dictionary source's table name into schema and table name itself if ODBC driver doesn't support schema. --- programs/odbc-bridge/CMakeLists.txt | 1 + programs/odbc-bridge/HandlerFactory.cpp | 6 ++ programs/odbc-bridge/HandlerFactory.h | 3 +- programs/odbc-bridge/SchemaAllowedHandler.cpp | 76 +++++++++++++++++++ programs/odbc-bridge/SchemaAllowedHandler.h | 31 ++++++++ src/Common/XDBCBridgeHelper.h | 24 ++++++ .../CassandraDictionarySource.cpp | 2 +- .../ClickHouseDictionarySource.cpp | 4 +- src/Dictionaries/ExternalQueryBuilder.cpp | 16 +--- src/Dictionaries/ExternalQueryBuilder.h | 3 +- src/Dictionaries/MySQLDictionarySource.cpp | 4 +- src/Dictionaries/XDBCDictionarySource.cpp | 39 +++++++++- src/Dictionaries/XDBCDictionarySource.h | 1 + .../test_dictionaries_dependency/test.py | 1 - 14 files changed, 188 insertions(+), 23 deletions(-) create mode 100644 programs/odbc-bridge/SchemaAllowedHandler.cpp create mode 100644 programs/odbc-bridge/SchemaAllowedHandler.h diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 4b63ed2596d..8cfa110adad 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -8,6 +8,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES ODBCBlockOutputStream.cpp ODBCBridge.cpp PingHandler.cpp + SchemaAllowedHandler.cpp validateODBCConnectionString.cpp ) set (CLICKHOUSE_ODBC_BRIDGE_LINK diff --git a/programs/odbc-bridge/HandlerFactory.cpp b/programs/odbc-bridge/HandlerFactory.cpp index a02fcadea52..0cc40480b87 100644 --- a/programs/odbc-bridge/HandlerFactory.cpp +++ b/programs/odbc-bridge/HandlerFactory.cpp @@ -29,6 +29,12 @@ Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco: return new IdentifierQuoteHandler(keep_alive_timeout, context); #else return nullptr; +#endif + else if (uri.getPath() == "/schema_allowed") +#if USE_ODBC + return new SchemaAllowedHandler(keep_alive_timeout, context); +#else + return nullptr; #endif else if (uri.getPath() == "/write") return new ODBCHandler(pool_map, keep_alive_timeout, context, "write"); diff --git a/programs/odbc-bridge/HandlerFactory.h b/programs/odbc-bridge/HandlerFactory.h index 35835de5dad..1d4edfc9dd1 100644 --- a/programs/odbc-bridge/HandlerFactory.h +++ b/programs/odbc-bridge/HandlerFactory.h @@ -6,6 +6,7 @@ #include "MainHandler.h" #include "ColumnInfoHandler.h" #include "IdentifierQuoteHandler.h" +#include "SchemaAllowedHandler.h" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -15,7 +16,7 @@ namespace DB { -/** Factory for '/ping', '/', '/columns_info', '/identifier_quote' handlers. +/** Factory for '/ping', '/', '/columns_info', '/identifier_quote', '/schema_allowed' handlers. * Also stores Session pools for ODBC connections */ class HandlerFactory : public Poco::Net::HTTPRequestHandlerFactory diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp new file mode 100644 index 00000000000..5aaba57399e --- /dev/null +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -0,0 +1,76 @@ +#include "SchemaAllowedHandler.h" + +#if USE_ODBC + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include "validateODBCConnectionString.h" + +# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC + +namespace DB +{ +namespace +{ + bool isSchemaAllowed(SQLHDBC hdbc) + { + std::string identifier; + + SQLSMALLINT t; + SQLRETURN r = POCO_SQL_ODBC_CLASS::SQLGetInfo(hdbc, SQL_SCHEMA_USAGE, nullptr, 0, &t); + + if (POCO_SQL_ODBC_CLASS::Utility::isError(r)) + throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc); + + return t != 0; + } +} + + +void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +{ + Poco::Net::HTMLForm params(request, request.stream()); + LOG_TRACE(log, "Request URI: {}", request.getURI()); + + auto process_error = [&response, this](const std::string & message) + { + response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); + if (!response.sent()) + response.send() << message << std::endl; + LOG_WARNING(log, message); + }; + + if (!params.has("connection_string")) + { + process_error("No 'connection_string' in request URL"); + return; + } + + try + { + std::string connection_string = params.get("connection_string"); + POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC); + SQLHDBC hdbc = session.dbc().handle(); + + bool result = isSchemaAllowed(hdbc); + + WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); + writeBoolText(result, out); + } + catch (...) + { + process_error("Error getting schema usage from ODBC '" + getCurrentExceptionMessage(false) + "'"); + tryLogCurrentException(log); + } +} + +} + +#endif diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h new file mode 100644 index 00000000000..76aa23b903c --- /dev/null +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +#if USE_ODBC + +namespace DB +{ +class Context; + + +/// This handler establishes connection to database, and retrieve whether schema is allowed. +class SchemaAllowedHandler : public Poco::Net::HTTPRequestHandler +{ +public: + SchemaAllowedHandler(size_t keep_alive_timeout_, Context &) + : log(&Poco::Logger::get("SchemaAllowedHandler")), keep_alive_timeout(keep_alive_timeout_) + { + } + + void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + +private: + Poco::Logger * log; + size_t keep_alive_timeout; +}; + +} + +#endif diff --git a/src/Common/XDBCBridgeHelper.h b/src/Common/XDBCBridgeHelper.h index 9f20c75182d..a425cd36b11 100644 --- a/src/Common/XDBCBridgeHelper.h +++ b/src/Common/XDBCBridgeHelper.h @@ -41,6 +41,7 @@ public: virtual Poco::URI getMainURI() const = 0; virtual Poco::URI getColumnsInfoURI() const = 0; virtual IdentifierQuotingStyle getIdentifierQuotingStyle() = 0; + virtual bool isSchemaAllowed() = 0; virtual String getName() const = 0; virtual ~IXDBCBridgeHelper() = default; @@ -61,6 +62,7 @@ private: Poco::Logger * log = &Poco::Logger::get(BridgeHelperMixin::getName() + "BridgeHelper"); std::optional quote_style; + std::optional is_schema_allowed; protected: auto getConnectionString() const @@ -80,6 +82,7 @@ public: static constexpr inline auto MAIN_HANDLER = "/"; static constexpr inline auto COL_INFO_HANDLER = "/columns_info"; static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote"; + static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed"; static constexpr inline auto PING_OK_ANSWER = "Ok."; XDBCBridgeHelper(const Context & global_context_, const Poco::Timespan & http_timeout_, const std::string & connection_string_) @@ -128,6 +131,27 @@ public: return *quote_style; } + bool isSchemaAllowed() override + { + if (!is_schema_allowed.has_value()) + { + startBridgeSync(); + + auto uri = createBaseURI(); + uri.setPath(SCHEMA_ALLOWED_HANDLER); + uri.addQueryParameter("connection_string", getConnectionString()); + + ReadWriteBufferFromHTTP buf( + uri, Poco::Net::HTTPRequest::HTTP_POST, {}, ConnectionTimeouts::getHTTPTimeouts(context)); + + bool res; + readBoolText(res, buf); + is_schema_allowed = res; + } + + return *is_schema_allowed; + } + /** * @todo leaky abstraction - used by external API's */ diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index c41f528db91..5c7fd4f50fd 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -102,7 +102,7 @@ CassandraDictionarySource::CassandraDictionarySource( , dict_struct(dict_struct_) , settings(settings_) , sample_block(sample_block_) - , query_builder(dict_struct, settings.db, settings.table, settings.where, IdentifierQuotingStyle::DoubleQuotes) + , query_builder(dict_struct, settings.db, "", settings.table, settings.where, IdentifierQuotingStyle::DoubleQuotes) { cassandraCheck(cass_cluster_set_contact_points(cluster, settings.host.c_str())); if (settings.port) diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 9d3f6063a21..180750d143a 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -66,7 +66,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , where{config.getString(config_prefix + ".where", "")} , update_field{config.getString(config_prefix + ".update_field", "")} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} - , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , sample_block{sample_block_} , context(context_) , is_local{isLocalAddress({host, port}, secure ? context.getTCPPortSecure().value_or(0) : context.getTCPPort())} @@ -97,7 +97,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionar , update_field{other.update_field} , invalidate_query{other.invalidate_query} , invalidate_query_response{other.invalidate_query_response} - , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , sample_block{other.sample_block} , context(other.context) , is_local{other.is_local} diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index e64f04d28f2..0cf7e28eb29 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -19,22 +19,12 @@ namespace ErrorCodes ExternalQueryBuilder::ExternalQueryBuilder( const DictionaryStructure & dict_struct_, const std::string & db_, + const std::string & schema_, const std::string & table_, const std::string & where_, IdentifierQuotingStyle quoting_style_) - : dict_struct(dict_struct_), db(db_), where(where_), quoting_style(quoting_style_) -{ - if (auto pos = table_.find('.'); pos != std::string::npos) - { - schema = table_.substr(0, pos); - table = table_.substr(pos + 1); - } - else - { - schema = ""; - table = table_; - } -} + : dict_struct(dict_struct_), db(db_), schema(schema_), table(table_), where(where_), quoting_style(quoting_style_) +{} void ExternalQueryBuilder::writeQuoted(const std::string & s, WriteBuffer & out) const diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h index 3011efbc895..4c0e876b5db 100644 --- a/src/Dictionaries/ExternalQueryBuilder.h +++ b/src/Dictionaries/ExternalQueryBuilder.h @@ -18,8 +18,8 @@ struct ExternalQueryBuilder { const DictionaryStructure & dict_struct; std::string db; - std::string table; std::string schema; + std::string table; const std::string & where; IdentifierQuotingStyle quoting_style; @@ -28,6 +28,7 @@ struct ExternalQueryBuilder ExternalQueryBuilder( const DictionaryStructure & dict_struct_, const std::string & db_, + const std::string & schema_, const std::string & table_, const std::string & where_, IdentifierQuotingStyle quoting_style_); diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 505ce7b0c12..f016f2bf537 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -68,7 +68,7 @@ MySQLDictionarySource::MySQLDictionarySource( , dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)} , sample_block{sample_block_} , pool{mysqlxx::PoolFactory::instance().get(config, config_prefix)} - , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , load_all_query{query_builder.composeLoadAllQuery()} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} , close_connection{config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false)} @@ -87,7 +87,7 @@ MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other , dont_check_update_time{other.dont_check_update_time} , sample_block{other.sample_block} , pool{other.pool} - , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , load_all_query{other.load_all_query} , last_modification{other.last_modification} , invalidate_query{other.invalidate_query} diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 92af20e646b..b3393d55e5d 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -27,6 +27,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } namespace @@ -60,6 +61,39 @@ namespace std::unique_ptr read_buf; BlockInputStreamPtr reader; }; + + + ExternalQueryBuilder makeExternalQueryBuilder(const DictionaryStructure & dict_struct_, + const std::string & db_, + const std::string & schema_, + const std::string & table_, + const std::string & where_, + IXDBCBridgeHelper & bridge_) + { + std::string schema = schema_; + std::string table = table_; + + if (bridge_.isSchemaAllowed()) + { + if (schema.empty()) + { + if (auto pos = table.find('.'); pos != std::string::npos) + { + schema = table.substr(0, pos); + table = table.substr(pos + 1); + } + } + } + else + { + if (!schema.empty()) + throw Exception{"Dictionary source of type " + bridge_.getName() + " specifies a schema but schema is not supported by " + + bridge_.getName() + "-driver", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + } + + return {dict_struct_, db_, schema, table, where_, bridge_.getIdentifierQuotingStyle()}; + } } static const UInt64 max_block_size = 8192; @@ -76,11 +110,12 @@ XDBCDictionarySource::XDBCDictionarySource( , update_time{std::chrono::system_clock::from_time_t(0)} , dict_struct{dict_struct_} , db{config_.getString(config_prefix_ + ".db", "")} + , schema{config_.getString(config_prefix_ + ".schema", "")} , table{config_.getString(config_prefix_ + ".table")} , where{config_.getString(config_prefix_ + ".where", "")} , update_field{config_.getString(config_prefix_ + ".update_field", "")} , sample_block{sample_block_} - , query_builder{dict_struct, db, table, where, bridge_->getIdentifierQuotingStyle()} + , query_builder{makeExternalQueryBuilder(dict_struct, db, schema, table, where, *bridge_)} , load_all_query{query_builder.composeLoadAllQuery()} , invalidate_query{config_.getString(config_prefix_ + ".invalidate_query", "")} , bridge_helper{bridge_} @@ -104,7 +139,7 @@ XDBCDictionarySource::XDBCDictionarySource(const XDBCDictionarySource & other) , where{other.where} , update_field{other.update_field} , sample_block{other.sample_block} - , query_builder{dict_struct, db, table, where, other.bridge_helper->getIdentifierQuotingStyle()} + , query_builder{other.query_builder} , load_all_query{other.load_all_query} , invalidate_query{other.invalidate_query} , invalidate_query_response{other.invalidate_query_response} diff --git a/src/Dictionaries/XDBCDictionarySource.h b/src/Dictionaries/XDBCDictionarySource.h index 253f802d8fd..87bc42c76ab 100644 --- a/src/Dictionaries/XDBCDictionarySource.h +++ b/src/Dictionaries/XDBCDictionarySource.h @@ -69,6 +69,7 @@ private: std::chrono::time_point update_time; const DictionaryStructure dict_struct; const std::string db; + const std::string schema; const std::string table; const std::string where; const std::string update_field; diff --git a/tests/integration/test_dictionaries_dependency/test.py b/tests/integration/test_dictionaries_dependency/test.py index 31c5a6c549a..4505bf73a7c 100644 --- a/tests/integration/test_dictionaries_dependency/test.py +++ b/tests/integration/test_dictionaries_dependency/test.py @@ -87,7 +87,6 @@ def test_dependency_via_explicit_table(node): check() -@pytest.mark.skip(reason="TODO: should be fixed") @pytest.mark.parametrize("node", nodes) def test_dependency_via_dictionary_database(node): node.query("CREATE DATABASE dict_db ENGINE=Dictionary") From c78d44cb555f62f6212f944a77db8a32a7c59184 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Mon, 6 Jul 2020 23:24:11 +0200 Subject: [PATCH 310/330] Sync reference file with changes in sql file --- .../01360_materialized_view_with_join_on_query_log.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference index dfc14d94653..f6349a0b9b4 100644 --- a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference +++ b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.reference @@ -2,6 +2,7 @@ 1 1 1 +1 === system.query_log === main_dashboard_bottom_query 2 main_dashboard_top_query 2 From 30cc78a7d2587a9ffac5343c4a896c500bbbd6d6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 01:47:53 +0300 Subject: [PATCH 311/330] [docs] introduction for third-party interfaces (#12175) * [docs] introduction for third-party interfaces * Update index.md * Update index.md --- docs/en/interfaces/third-party/index.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/en/interfaces/third-party/index.md b/docs/en/interfaces/third-party/index.md index 84291a199eb..9d7d9106e63 100644 --- a/docs/en/interfaces/third-party/index.md +++ b/docs/en/interfaces/third-party/index.md @@ -3,4 +3,15 @@ toc_folder_title: Third-Party toc_priority: 24 --- +# Third-Party Interfaces +This is a collection of links to third-party tools that provide some sort of interface to ClickHouse. It can be either visual interface, command-line interface or an API: + +- [Client libraries](client-libraries.md) +- [Integrations](integrations.md) +- [GUI](gui.md) +- [Proxies](proxy.md) + + +!!! note "Note" + Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. From 8d65deaf85bc9730e4728869769d1708094da3d3 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 01:48:11 +0300 Subject: [PATCH 312/330] [docs] introduction for special table engines (#12170) * [docs] introduction for integration table engines * Update jdbc.md * Update odbc.md * Update mysql.md * Update kafka.md * Update hdfs.md * [docs] introduction for special table engines * Update index.md * Update index.md --- docs/en/engines/table-engines/special/index.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/en/engines/table-engines/special/index.md b/docs/en/engines/table-engines/special/index.md index b1789f34347..2c796a109da 100644 --- a/docs/en/engines/table-engines/special/index.md +++ b/docs/en/engines/table-engines/special/index.md @@ -3,4 +3,12 @@ toc_folder_title: Special toc_priority: 31 --- +# Special Table Engines +There are three main categories of table engines: + +- [MergeTree engine family](../../../engines/table-engines/mergetree-family/index.md) for main production use. +- [Log engine family](../../../engines/table-engines/log-family/index.md) for small temporary data. +- [Table engines for integrations](../../../engines/table-engines/integrations/index.md). + +The remaining engines are unique in their purpose and are not grouped into families yet, thus they are placed in this “special” category. From 7dba89eaa8ed460b237d6663fa32a687f613dbbd Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 7 Jul 2020 12:49:14 +0300 Subject: [PATCH 313/330] changelog fixes --- CHANGELOG.md | 2 ++ utils/simple-backport/backport.sh | 14 ++++++++------ utils/simple-backport/changelog.sh | 6 +++--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5211da4203b..816aa556e81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,9 +20,11 @@ * Support writes in ODBC Table function [#10554](https://github.com/ClickHouse/ClickHouse/pull/10554) ([ageraab](https://github.com/ageraab)). [#10901](https://github.com/ClickHouse/ClickHouse/pull/10901) ([tavplubix](https://github.com/tavplubix)). * Add query performance metrics based on Linux `perf_events` (these metrics are calculated with hardware CPU counters and OS counters). It is optional and requires `CAP_SYS_ADMIN` to be set on clickhouse binary. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Now support `NULL` and `NOT NULL` modifiers for data types in `CREATE` query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). +* Add `ArrowStream` input and output format. [#11088](https://github.com/ClickHouse/ClickHouse/pull/11088) ([hcz](https://github.com/hczhcz)). * Added a new layout `direct` which loads all the data directly from the source for each query, without storing or caching data. [#10622](https://github.com/ClickHouse/ClickHouse/pull/10622) ([Artem Streltsov](https://github.com/kekekekule)). * Added new `complex_key_direct` layout to dictionaries, that does not store anything locally during query execution. [#10850](https://github.com/ClickHouse/ClickHouse/pull/10850) ([Artem Streltsov](https://github.com/kekekekule)). * Added support for MySQL style global variables syntax (stub). This is needed for compatibility of MySQL protocol. [#11832](https://github.com/ClickHouse/ClickHouse/pull/11832) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added syntax highligting to `clickhouse-client` using `replxx`. [#11422](https://github.com/ClickHouse/ClickHouse/pull/11422) ([Tagir Kuskarov](https://github.com/kuskarov)). * `minMap` and `maxMap` functions were added. [#11603](https://github.com/ClickHouse/ClickHouse/pull/11603) ([Ildus Kurbangaliev](https://github.com/ildus)). * Add the `system.asynchronous_metric_log` table that logs historical metrics from `system.asynchronous_metrics`. [#11588](https://github.com/ClickHouse/ClickHouse/pull/11588) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Add functions `extractAllGroupsHorizontal(haystack, re)` and `extractAllGroupsVertical(haystack, re)`. [#11554](https://github.com/ClickHouse/ClickHouse/pull/11554) ([Vasily Nemkov](https://github.com/Enmk)). diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index 80a5d82d6f0..dcab4106136 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -26,16 +26,18 @@ then echo Some commits will be missed, review these manually. fi -# NOTE keep in sync with ./changelog.sh. +# NOTE keep in sync with ./backport.sh. # Search for PR numbers in commit messages. First variant is normal merge, and second # variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; +find_prs=(sed -n "s/^.*merge[d]*.*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ed of]*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ed of]*#\([[:digit:]]\+\).*$/\1/Ip") + s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") -"${find_prs[@]}" master-log.txt | sort -rn > master-prs.txt -"${find_prs[@]}" "$branch-log.txt" | sort -rn > "$branch-prs.txt" +# awk is to filter out small task numbers from different task tracker, which are +# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. +"${find_prs[@]}" master-log.txt | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > master-prs.txt +"${find_prs[@]}" "$branch-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "$branch-prs.txt" # Find all master PRs that are not in branch by calculating differences of two PR lists. grep -f "$branch-prs.txt" -F -x -v master-prs.txt > "$branch-diff-prs.txt" diff --git a/utils/simple-backport/changelog.sh b/utils/simple-backport/changelog.sh index b9cbbf0d5a2..4d768226e03 100755 --- a/utils/simple-backport/changelog.sh +++ b/utils/simple-backport/changelog.sh @@ -21,10 +21,10 @@ fi # NOTE keep in sync with ./backport.sh. # Search for PR numbers in commit messages. First variant is normal merge, and second # variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; +find_prs=(sed -n "s/^.*merge[d]*.*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ed of]*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ed of]*#\([[:digit:]]\+\).*$/\1/Ip") + s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") # awk is to filter out small task numbers from different task tracker, which are # referenced by documentation commits like '* DOCSUP-824: query log (#115)'. From c8f51bdc8a805f476c29dd27da69fe2499cdd229 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 7 Jul 2020 12:00:39 +0200 Subject: [PATCH 314/330] Tests for fixed issues #10846 and #7347 --- .../0_stateless/01323_if_with_nulls.reference | 4 ++++ .../queries/0_stateless/01323_if_with_nulls.sql | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/tests/queries/0_stateless/01323_if_with_nulls.reference b/tests/queries/0_stateless/01323_if_with_nulls.reference index 6bf2d206e0b..80697f97f5c 100644 --- a/tests/queries/0_stateless/01323_if_with_nulls.reference +++ b/tests/queries/0_stateless/01323_if_with_nulls.reference @@ -18,3 +18,7 @@ Nullable(UInt8) \N 1 ok ok ok Nullable(UInt8) \N 1 ok ok ok Nullable(UInt8) \N 1 ok ok ok \N 1 Nullable(Int8) \N ok +\N Nullable(Float64) 0 +\N Nullable(Float64) 0 +1 +1 diff --git a/tests/queries/0_stateless/01323_if_with_nulls.sql b/tests/queries/0_stateless/01323_if_with_nulls.sql index f2cd943988f..6a4df79d765 100644 --- a/tests/queries/0_stateless/01323_if_with_nulls.sql +++ b/tests/queries/0_stateless/01323_if_with_nulls.sql @@ -36,3 +36,20 @@ SELECT b_num, isNull(b_num), toTypeName(b_num), b_num = 0, if(b_num = 0, 'fail', FROM (SELECT 1 k, toInt8(1) a_num) AS x LEFT JOIN (SELECT 2 k, toInt8(1) b_num) AS y USING (k); + +-- test case from https://github.com/ClickHouse/ClickHouse/issues/7347 +DROP TABLE IF EXISTS test_nullable_float_issue7347; +CREATE TABLE test_nullable_float_issue7347 (ne UInt64,test Nullable(Float64)) ENGINE = MergeTree() PRIMARY KEY (ne) ORDER BY (ne); +INSERT INTO test_nullable_float_issue7347 VALUES (1,NULL); + +SELECT test, toTypeName(test), IF(test = 0, 1, 0) FROM test_nullable_float_issue7347; + +WITH materialize(CAST(NULL, 'Nullable(Float64)')) AS test SELECT test, toTypeName(test), IF(test = 0, 1, 0); + +DROP TABLE test_nullable_float_issue7347; + +-- test case from https://github.com/ClickHouse/ClickHouse/issues/10846 + +SELECT if(isFinite(toUInt64OrZero(toNullable('123'))), 1, 0); + +SELECT if(materialize(isFinite(toUInt64OrZero(toNullable('123')))), 1, 0); From 688f185f9ed990b676430057f61773646f2f0f96 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 7 Jul 2020 13:28:45 +0300 Subject: [PATCH 315/330] style fix for #12152 --- src/Server/MySQLHandler.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 9e42f5ebc05..7d91b18983d 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -45,9 +45,9 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } -static String select_empty_replacement_query(const String & query); -static String show_table_status_replacement_query(const String & query); -static String kill_connection_id_replacement_query(const String & query); +static String selectEmptyReplacementQuery(const String & query); +static String showTableStatusReplacementQuery(const String & query); +static String killConnectionIdReplacementQuery(const String & query); MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_) @@ -62,9 +62,9 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so if (ssl_enabled) server_capability_flags |= CLIENT_SSL; - replacements.emplace("KILL QUERY", kill_connection_id_replacement_query); - replacements.emplace("SHOW TABLE STATUS LIKE", show_table_status_replacement_query); - replacements.emplace("SHOW VARIABLES", select_empty_replacement_query); + replacements.emplace("KILL QUERY", killConnectionIdReplacementQuery); + replacements.emplace("SHOW TABLE STATUS LIKE", showTableStatusReplacementQuery); + replacements.emplace("SHOW VARIABLES", selectEmptyReplacementQuery); } void MySQLHandler::run() @@ -380,14 +380,14 @@ static bool isFederatedServerSetupSetCommand(const String & query) } /// Replace "[query(such as SHOW VARIABLES...)]" into "". -static String select_empty_replacement_query(const String & query) +static String selectEmptyReplacementQuery(const String & query) { std::ignore = query; return "select ''"; } /// Replace "SHOW TABLE STATUS LIKE 'xx'" into "SELECT ... FROM system.tables WHERE name LIKE 'xx'". -static String show_table_status_replacement_query(const String & query) +static String showTableStatusReplacementQuery(const String & query) { const String prefix = "SHOW TABLE STATUS LIKE "; if (query.size() > prefix.size()) @@ -421,7 +421,7 @@ static String show_table_status_replacement_query(const String & query) } /// Replace "KILL QUERY [connection_id]" into "KILL QUERY WHERE query_id = 'mysql:[connection_id]'". -static String kill_connection_id_replacement_query(const String & query) +static String killConnectionIdReplacementQuery(const String & query) { const String prefix = "KILL QUERY "; if (query.size() > prefix.size()) From 6170bfd668c856ee2037961d1c07de3dca7e730f Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 7 Jul 2020 12:33:43 +0200 Subject: [PATCH 316/330] Added test for #3767 --- .../queries/0_stateless/01375_null_issue_3767.reference | 0 tests/queries/0_stateless/01375_null_issue_3767.sql | 9 +++++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/queries/0_stateless/01375_null_issue_3767.reference create mode 100644 tests/queries/0_stateless/01375_null_issue_3767.sql diff --git a/tests/queries/0_stateless/01375_null_issue_3767.reference b/tests/queries/0_stateless/01375_null_issue_3767.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01375_null_issue_3767.sql b/tests/queries/0_stateless/01375_null_issue_3767.sql new file mode 100644 index 00000000000..88b18e001f9 --- /dev/null +++ b/tests/queries/0_stateless/01375_null_issue_3767.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS null_issue_3767; + +CREATE TABLE null_issue_3767 (value Nullable(String)) ENGINE=Memory; + +INSERT INTO null_issue_3767 (value) VALUES ('A String'), (NULL); + +SELECT value FROM null_issue_3767 WHERE value NOT IN ('A String'); + +DROP TABLE null_issue_3767; From dc85f590b7485a801a15c70b68c1b990894bae0a Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 7 Jul 2020 18:35:16 +0800 Subject: [PATCH 317/330] Update zh kafka.md title (#12192) --- docs/zh/engines/table-engines/integrations/kafka.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/table-engines/integrations/kafka.md b/docs/zh/engines/table-engines/integrations/kafka.md index c96b3bc22ed..557ab6a7845 100644 --- a/docs/zh/engines/table-engines/integrations/kafka.md +++ b/docs/zh/engines/table-engines/integrations/kafka.md @@ -1,4 +1,4 @@ -# 卡夫卡 {#kafka} +# Kafka {#kafka} 此引擎与 [Apache Kafka](http://kafka.apache.org/) 结合使用。 From 241b897e4bd40aa845b509c411d418ac8e338267 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Tue, 7 Jul 2020 18:35:39 +0800 Subject: [PATCH 318/330] Update index.md (#12191) Fix merge link broken --- docs/zh/engines/table-engines/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/table-engines/index.md b/docs/zh/engines/table-engines/index.md index b025eb1f6c9..f31fa257135 100644 --- a/docs/zh/engines/table-engines/index.md +++ b/docs/zh/engines/table-engines/index.md @@ -52,7 +52,7 @@ - [Distributed](special/distributed.md#distributed) - [MaterializedView](special/materializedview.md#materializedview) - [Dictionary](special/dictionary.md#dictionary) -- [Merge](special/merge.md#merge +- [Merge](special/merge.md#merge) - [File](special/file.md#file) - [Null](special/null.md#null) - [Set](special/set.md#set) From f8d584d5090a53d2306628d6a56afaca770d7773 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 7 Jul 2020 13:49:18 +0300 Subject: [PATCH 319/330] changelog fixes --- CHANGELOG.md | 7 +++++++ utils/simple-backport/backport.sh | 2 +- utils/simple-backport/changelog.sh | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 816aa556e81..54f574cc347 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ * Add query performance metrics based on Linux `perf_events` (these metrics are calculated with hardware CPU counters and OS counters). It is optional and requires `CAP_SYS_ADMIN` to be set on clickhouse binary. [#9545](https://github.com/ClickHouse/ClickHouse/pull/9545) [Andrey Skobtsov](https://github.com/And42). [#11226](https://github.com/ClickHouse/ClickHouse/pull/11226) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Now support `NULL` and `NOT NULL` modifiers for data types in `CREATE` query. [#11057](https://github.com/ClickHouse/ClickHouse/pull/11057) ([Павел Потемкин](https://github.com/Potya)). * Add `ArrowStream` input and output format. [#11088](https://github.com/ClickHouse/ClickHouse/pull/11088) ([hcz](https://github.com/hczhcz)). +* Support Cassandra as external dictionary source. [#4978](https://github.com/ClickHouse/ClickHouse/pull/4978) ([favstovol](https://github.com/favstovol)). * Added a new layout `direct` which loads all the data directly from the source for each query, without storing or caching data. [#10622](https://github.com/ClickHouse/ClickHouse/pull/10622) ([Artem Streltsov](https://github.com/kekekekule)). * Added new `complex_key_direct` layout to dictionaries, that does not store anything locally during query execution. [#10850](https://github.com/ClickHouse/ClickHouse/pull/10850) ([Artem Streltsov](https://github.com/kekekekule)). * Added support for MySQL style global variables syntax (stub). This is needed for compatibility of MySQL protocol. [#11832](https://github.com/ClickHouse/ClickHouse/pull/11832) ([alexey-milovidov](https://github.com/alexey-milovidov)). @@ -32,6 +33,7 @@ * Add `netloc` function for extracting network location, similar to `urlparse(url)`, `netloc` in python. [#11356](https://github.com/ClickHouse/ClickHouse/pull/11356) ([Guillaume Tassery](https://github.com/YiuRULE)). * Add 2 more virtual columns for engine=Kafka to access message headers. [#11283](https://github.com/ClickHouse/ClickHouse/pull/11283) ([filimonov](https://github.com/filimonov)). * Add `_timestamp_ms` virtual column for Kafka engine (type is `Nullable(DateTime64(3))`). [#11260](https://github.com/ClickHouse/ClickHouse/pull/11260) ([filimonov](https://github.com/filimonov)). +* Add function `randomFixedString`. [#10866](https://github.com/ClickHouse/ClickHouse/pull/10866) ([Andrei Nekrashevich](https://github.com/xolm)). * Add function `fuzzBits` that randomly flips bits in a string with given probability. [#11237](https://github.com/ClickHouse/ClickHouse/pull/11237) ([Andrei Nekrashevich](https://github.com/xolm)). * Allow comparison of numbers with constant string in comparison operators, IN and VALUES sections. [#11647](https://github.com/ClickHouse/ClickHouse/pull/11647) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Add `round_robin` load_balancing mode. [#11645](https://github.com/ClickHouse/ClickHouse/pull/11645) ([Azat Khuzhin](https://github.com/azat)). @@ -140,6 +142,7 @@ * Fixed parseDateTime64BestEffort argument resolution bugs. [#10925](https://github.com/ClickHouse/ClickHouse/issues/10925). [#11038](https://github.com/ClickHouse/ClickHouse/pull/11038) ([Vasily Nemkov](https://github.com/Enmk)). * Now it's possible to `ADD/DROP` and `RENAME` the same one column in a single `ALTER` query. Exception message for simultaneous `MODIFY` and `RENAME` became more clear. Partially fixes [#10669](https://github.com/ClickHouse/ClickHouse/issues/10669). [#11037](https://github.com/ClickHouse/ClickHouse/pull/11037) ([alesapin](https://github.com/alesapin)). * Fixed parsing of S3 URLs. [#11036](https://github.com/ClickHouse/ClickHouse/pull/11036) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix memory tracking for two-level `GROUP BY` when there is a `LIMIT`. [#11022](https://github.com/ClickHouse/ClickHouse/pull/11022) ([Azat Khuzhin](https://github.com/azat)). * Fix very rare potential use-after-free error in MergeTree if table was not created successfully. [#10986](https://github.com/ClickHouse/ClickHouse/pull/10986) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix metadata (relative path for rename) and data (relative path for symlink) handling for Atomic database. [#10980](https://github.com/ClickHouse/ClickHouse/pull/10980) ([Azat Khuzhin](https://github.com/azat)). * Fix server crash on concurrent `ALTER` and `DROP DATABASE` queries with `Atomic` database engine. [#10968](https://github.com/ClickHouse/ClickHouse/pull/10968) ([tavplubix](https://github.com/tavplubix)). @@ -208,6 +211,7 @@ * Default user and database creation on docker image starting. [#10637](https://github.com/ClickHouse/ClickHouse/pull/10637) ([Paramtamtam](https://github.com/tarampampam)). * When multiline query is printed to server log, the lines are joined. Make it to work correct in case of multiline string literals, identifiers and single-line comments. This fixes [#3853](https://github.com/ClickHouse/ClickHouse/issues/3853). [#11686](https://github.com/ClickHouse/ClickHouse/pull/11686) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Multiple names are now allowed in commands: CREATE USER, CREATE ROLE, ALTER USER, SHOW CREATE USER, SHOW GRANTS and so on. [#11670](https://github.com/ClickHouse/ClickHouse/pull/11670) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add support for distributed DDL (`UPDATE/DELETE/DROP PARTITION`) on cross replication clusters. [#11508](https://github.com/ClickHouse/ClickHouse/pull/11508) ([frank lee](https://github.com/etah000)). * Clear password from command line in `clickhouse-client` and `clickhouse-benchmark` if the user has specified it with explicit value. This prevents password exposure by `ps` and similar tools. [#11665](https://github.com/ClickHouse/ClickHouse/pull/11665) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Don't use debug info from ELF file if it doesn't correspond to the running binary. It is needed to avoid printing wrong function names and source locations in stack traces. This fixes [#7514](https://github.com/ClickHouse/ClickHouse/issues/7514). [#11657](https://github.com/ClickHouse/ClickHouse/pull/11657) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([alexey-milovidov](https://github.com/alexey-milovidov)). @@ -223,6 +227,7 @@ * Automatically update DNS cache, which is used to check if user is allowed to connect from an address. [#11487](https://github.com/ClickHouse/ClickHouse/pull/11487) ([tavplubix](https://github.com/tavplubix)). * OPTIMIZE FINAL will force merge even if concurrent merges are performed. This closes [#11309](https://github.com/ClickHouse/ClickHouse/issues/11309) and closes [#11322](https://github.com/ClickHouse/ClickHouse/issues/11322). [#11346](https://github.com/ClickHouse/ClickHouse/pull/11346) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Suppress output of cancelled queries in clickhouse-client. In previous versions result may continue to print in terminal even after you press Ctrl+C to cancel query. This closes [#9473](https://github.com/ClickHouse/ClickHouse/issues/9473). [#11342](https://github.com/ClickHouse/ClickHouse/pull/11342) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now history file is updated after each query and there is no race condition if multiple clients use one history file. This fixes [#9897](https://github.com/ClickHouse/ClickHouse/issues/9897). [#11453](https://github.com/ClickHouse/ClickHouse/pull/11453) ([Tagir Kuskarov](https://github.com/kuskarov)). * Better log messages in while reloading configuration. [#11341](https://github.com/ClickHouse/ClickHouse/pull/11341) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Remove trailing whitespaces from formatted queries in `clickhouse-client` or `clickhouse-format` in some cases. [#11325](https://github.com/ClickHouse/ClickHouse/pull/11325) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Add setting "output_format_pretty_max_value_width". If value is longer, it will be cut to avoid output of too large values in terminal. This closes [#11140](https://github.com/ClickHouse/ClickHouse/issues/11140). [#11324](https://github.com/ClickHouse/ClickHouse/pull/11324) ([alexey-milovidov](https://github.com/alexey-milovidov)). @@ -231,6 +236,7 @@ * Support kafka_client_id parameter for Kafka tables. It also changes the default `client.id` used by ClickHouse when communicating with Kafka to be more verbose and usable. [#11252](https://github.com/ClickHouse/ClickHouse/pull/11252) ([filimonov](https://github.com/filimonov)). * Keep the value of `DistributedFilesToInsert` metric on exceptions. In previous versions, the value was set when we are going to send some files, but it is zero, if there was an exception and some files are still pending. Now it corresponds to the number of pending files in filesystem. [#11220](https://github.com/ClickHouse/ClickHouse/pull/11220) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Add support for multi-word data type names (such as `DOUBLE PRECISION` and `CHAR VARYING`) for better SQL compatibility. [#11214](https://github.com/ClickHouse/ClickHouse/pull/11214) ([Павел Потемкин](https://github.com/Potya)). +* Provide synonyms for some data types. [#10856](https://github.com/ClickHouse/ClickHouse/pull/10856) ([Павел Потемкин](https://github.com/Potya)). * The query log is now enabled by default. [#11184](https://github.com/ClickHouse/ClickHouse/pull/11184) ([Ivan Blinkov](https://github.com/blinkov)). * Show authentication type in table system.users and while executing SHOW CREATE USER query. [#11080](https://github.com/ClickHouse/ClickHouse/pull/11080) ([Vitaly Baranov](https://github.com/vitlibar)). * Remove data on explicit `DROP DATABASE` for `Memory` database engine. Fixes [#10557](https://github.com/ClickHouse/ClickHouse/issues/10557). [#11021](https://github.com/ClickHouse/ClickHouse/pull/11021) ([tavplubix](https://github.com/tavplubix)). @@ -257,6 +263,7 @@ * New optimization that takes all operations out of `any` function, enabled with `optimize_move_functions_out_of_any` [#11529](https://github.com/ClickHouse/ClickHouse/pull/11529) ([Ruslan](https://github.com/kamalov-ruslan)). * Improve performance of `clickhouse-client` in interactive mode when Pretty formats are used. In previous versions, significant amount of time can be spent calculating visible width of UTF-8 string. This closes [#11323](https://github.com/ClickHouse/ClickHouse/issues/11323). [#11323](https://github.com/ClickHouse/ClickHouse/pull/11323) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improved performance for queries with `ORDER BY` and small `LIMIT` (less, then `max_block_size`). [#11171](https://github.com/ClickHouse/ClickHouse/pull/11171) ([Albert Kidrachev](https://github.com/Provet)). +* Add runtime CPU detection to select and dispatch the best function implementation. Add support for codegeneration for multiple targets. This closes [#1017](https://github.com/ClickHouse/ClickHouse/issues/1017). [#10058](https://github.com/ClickHouse/ClickHouse/pull/10058) ([DimasKovas](https://github.com/DimasKovas)). * Enable `mlock` of clickhouse binary by default. It will prevent clickhouse executable from being paged out under high IO load. [#11139](https://github.com/ClickHouse/ClickHouse/pull/11139) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Make queries with `sum` aggregate function and without GROUP BY keys to run multiple times faster. [#10992](https://github.com/ClickHouse/ClickHouse/pull/10992) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improving radix sort (used in `ORDER BY` with simple keys) by removing some redundant data moves. [#10981](https://github.com/ClickHouse/ClickHouse/pull/10981) ([Arslan Gumerov](https://github.com/g-arslan)). diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index dcab4106136..71920304d56 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -29,7 +29,7 @@ fi # NOTE keep in sync with ./backport.sh. # Search for PR numbers in commit messages. First variant is normal merge, and second # variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*merge[d]*.*#\([[:digit:]]\+\).*$/\1/Ip; +find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*(#\([[:digit:]]\+\))$/\1/p; s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") diff --git a/utils/simple-backport/changelog.sh b/utils/simple-backport/changelog.sh index 4d768226e03..33908414235 100755 --- a/utils/simple-backport/changelog.sh +++ b/utils/simple-backport/changelog.sh @@ -21,7 +21,7 @@ fi # NOTE keep in sync with ./backport.sh. # Search for PR numbers in commit messages. First variant is normal merge, and second # variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*merge[d]*.*#\([[:digit:]]\+\).*$/\1/Ip; +find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*(#\([[:digit:]]\+\))$/\1/p; s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") From d5c3cadcfc0e450ae024c41ba86843ab50724f69 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 7 Jul 2020 14:53:27 +0300 Subject: [PATCH 320/330] DOCSUP-1348 Russian translation for new functions (#133) (#12194) * Russian translation for new functions * Apply suggestions from code review Co-authored-by: BayoNet * Minor updates to russian text. Co-authored-by: Olga Revyakina Co-authored-by: BayoNet Co-authored-by: Sergei Shtykov Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> Co-authored-by: Olga Revyakina --- docs/en/sql-reference/functions/geo.md | 85 ++++--- docs/ru/sql-reference/functions/geo.md | 293 ++++++++++++++++++++++++- 2 files changed, 342 insertions(+), 36 deletions(-) diff --git a/docs/en/sql-reference/functions/geo.md b/docs/en/sql-reference/functions/geo.md index 65925f8a64b..0e8deb7a2c7 100644 --- a/docs/en/sql-reference/functions/geo.md +++ b/docs/en/sql-reference/functions/geo.md @@ -267,7 +267,7 @@ SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos ## h3GetBaseCell {#h3getbasecell} -Returns the base cell number of the index. +Returns the base cell number of the H3 index. **Syntax** @@ -275,20 +275,22 @@ Returns the base cell number of the index. h3GetBaseCell(index) ``` -**Parameters** +**Parameter** - `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- Hexagon base cell number. Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- Hexagon base cell number. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). **Example** Query: ``` sql -SELECT h3GetBaseCell(612916788725809151) as basecell +SELECT h3GetBaseCell(612916788725809151) as basecell; ``` Result: @@ -301,7 +303,7 @@ Result: ## h3HexAreaM2 {#h3hexaream2} -Average hexagon area in square meters at the given resolution. +Returns average hexagon area in square meters at the given resolution. **Syntax** @@ -309,20 +311,22 @@ Average hexagon area in square meters at the given resolution. h3HexAreaM2(resolution) ``` -**Parameters** +**Parameter** - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- Area in m². Type: [Float64](../../sql-reference/data-types/float.md). +- Area in square meters. + +Type: [Float64](../../sql-reference/data-types/float.md). **Example** Query: ``` sql -SELECT h3HexAreaM2(13) as area +SELECT h3HexAreaM2(13) as area; ``` Result: @@ -335,7 +339,7 @@ Result: ## h3IndexesAreNeighbors {#h3indexesareneighbors} -Returns whether or not the provided H3Indexes are neighbors. +Returns whether or not the provided H3 indexes are neighbors. **Syntax** @@ -348,16 +352,19 @@ h3IndexesAreNeighbors(index1, index2) - `index1` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). - `index2` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- Returns `1` if the indexes are neighbors, `0` otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` — Indexes are neighbours. +- `0` — Indexes are not neighbours. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). **Example** Query: ``` sql -SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n +SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n; ``` Result: @@ -370,7 +377,7 @@ Result: ## h3ToChildren {#h3tochildren} -Returns an array with the child indexes of the given index. +Returns an array of child indexes for the given H3 index. **Syntax** @@ -385,14 +392,16 @@ h3ToChildren(index, resolution) **Returned values** -- Array with the child H3 indexes. Array of type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Array of the child H3-indexes. + +Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). **Example** Query: ``` sql -SELECT h3ToChildren(599405990164561919, 6) AS children +SELECT h3ToChildren(599405990164561919, 6) AS children; ``` Result: @@ -405,7 +414,7 @@ Result: ## h3ToParent {#h3toparent} -Returns the parent (coarser) index containing the given index. +Returns the parent (coarser) index containing the given H3 index. **Syntax** @@ -418,16 +427,18 @@ h3ToParent(index, resolution) - `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- Parent H3 index. Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Parent H3 index. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md). **Example** Query: ``` sql -SELECT h3ToParent(599405990164561919, 3) as parent +SELECT h3ToParent(599405990164561919, 3) as parent; ``` Result: @@ -440,26 +451,28 @@ Result: ## h3ToString {#h3tostring} -Converts the H3Index representation of the index to the string representation. +Converts the `H3Index` representation of the index to the string representation. ``` sql h3ToString(index) ``` -**Parameters** +**Parameter** - `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- String representation of the H3 index. Type: [String](../../sql-reference/data-types/string.md). +- String representation of the H3 index. + +Type: [String](../../sql-reference/data-types/string.md). **Example** Query: ``` sql -SELECT h3ToString(617420388352917503) as h3_string +SELECT h3ToString(617420388352917503) as h3_string; ``` Result: @@ -472,17 +485,19 @@ Result: ## stringToH3 {#stringtoh3} -Converts the string representation to H3Index (UInt64) representation. +Converts the string representation to the `H3Index` (UInt64) representation. + +**Syntax** ``` sql stringToH3(index_str) ``` -**Parameters** +**Parameter** - `index_str` — String representation of the H3 index. Type: [String](../../sql-reference/data-types/string.md). -**Returned values** +**Returned value** - Hexagon index number. Returns 0 on error. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -491,7 +506,7 @@ stringToH3(index_str) Query: ``` sql -SELECT stringToH3('89184926cc3ffff') as index +SELECT stringToH3('89184926cc3ffff') as index; ``` Result: @@ -504,7 +519,7 @@ Result: ## h3GetResolution {#h3getresolution} -Returns the resolution of the index. +Returns the resolution of the H3 index. **Syntax** @@ -512,11 +527,11 @@ Returns the resolution of the index. h3GetResolution(index) ``` -**Parameters** +**Parameter** - `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** - Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -525,7 +540,7 @@ h3GetResolution(index) Query: ``` sql -SELECT h3GetResolution(617420388352917503) as res +SELECT h3GetResolution(617420388352917503) as res; ``` Result: @@ -536,4 +551,4 @@ Result: └─────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/functions/geo/) +[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/) diff --git a/docs/ru/sql-reference/functions/geo.md b/docs/ru/sql-reference/functions/geo.md index 45c30b3c2cd..bf3f90ed47e 100644 --- a/docs/ru/sql-reference/functions/geo.md +++ b/docs/ru/sql-reference/functions/geo.md @@ -382,4 +382,295 @@ SELECT arrayJoin(h3kRing(644325529233966508, 1)) AS h3index └────────────────────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/geo/) +## h3GetBaseCell {#h3getbasecell} + +Определяет номер базовой (верхнеуровневой) шестиугольной H3-ячейки для указанной ячейки. + +**Синтаксис** + +``` sql +h3GetBaseCell(index) +``` + +**Параметр** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Индекс базовой шестиугольной ячейки. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3GetBaseCell(612916788725809151) as basecell; +``` + +Результат: + +``` text +┌─basecell─┐ +│ 12 │ +└──────────┘ +``` + +## h3HexAreaM2 {#h3hexaream2} + +Определяет среднюю площадь шестиугольной H3-ячейки заданного разрешения в квадратных метрах. + +**Синтаксис** + +``` sql +h3HexAreaM2(resolution) +``` + +**Параметр** + +- `resolution` — разрешение. Диапазон: `[0, 15]`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Площадь в квадратных метрах. Тип: [Float64](../../sql-reference/data-types/float.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3HexAreaM2(13) as area; +``` + +Результат: + +``` text +┌─area─┐ +│ 43.9 │ +└──────┘ +``` + +## h3IndexesAreNeighbors {#h3indexesareneighbors} + +Определяет, являются ли H3-ячейки соседями. + +**Синтаксис** + +``` sql +h3IndexesAreNeighbors(index1, index2) +``` + +**Параметры** + +- `index1` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). +- `index2` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- `1` — ячейки являются соседями. +- `0` — ячейки не являются соседями. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n; +``` + +Результат: + +``` text +┌─n─┐ +│ 1 │ +└───┘ +``` + +## h3ToChildren {#h3tochildren} + +Формирует массив дочерних (вложенных) H3-ячеек для указанной ячейки. + +**Синтаксис** + +``` sql +h3ToChildren(index, resolution) +``` + +**Параметры** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). +- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Массив дочерних H3-ячеек. + +Тип: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). + +**Пример** + +Запрос: + +``` sql +SELECT h3ToChildren(599405990164561919, 6) AS children; +``` + +Результат: + +``` text +┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## h3ToParent {#h3toparent} + +Определяет родительскую (более крупную) H3-ячейку, содержащую указанную ячейку. + +**Синтаксис** + +``` sql +h3ToParent(index, resolution) +``` + +**Параметры** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). +- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Индекс родительской H3-ячейки. + +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3ToParent(599405990164561919, 3) as parent; +``` + +Результат: + +``` text +┌─────────────parent─┐ +│ 590398848891879423 │ +└────────────────────┘ +``` + +## h3ToString {#h3tostring} + +Преобразует H3-индекс из числового представления `H3Index` в строковое. + +``` sql +h3ToString(index) +``` + +**Параметр** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Строковое представление H3-индекса. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3ToString(617420388352917503) as h3_string; +``` + +Результат: + +``` text +┌─h3_string───────┐ +│ 89184926cdbffff │ +└─────────────────┘ +``` + +## stringToH3 {#stringtoh3} + +Преобразует H3-индекс из строкового представления в числовое представление `H3Index`. + +**Синтаксис** + +``` sql +stringToH3(index_str) +``` + +**Параметр** + +- `index_str` — строковое представление H3-индекса. Тип: [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Числовое представление индекса шестиугольной ячейки. +- `0`, если при преобразовании возникла ошибка. + +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT stringToH3('89184926cc3ffff') as index; +``` + +Результат: + +``` text +┌──────────────index─┐ +│ 617420388351344639 │ +└────────────────────┘ +``` + +## h3GetResolution {#h3getresolution} + +Определяет разрешение H3-ячейки. + +**Синтаксис** + +``` sql +h3GetResolution(index) +``` + +**Параметр** + +- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Разрешение ячейки. Диапазон: `[0, 15]`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT h3GetResolution(617420388352917503) as res; +``` + +Результат: + +``` text +┌─res─┐ +│ 9 │ +└─────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/) From 13f601c9c09b2eff892dcf81d0e4e4c90c8d3e85 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 17:17:22 +0300 Subject: [PATCH 321/330] [docs] add intrdocution for commercial page (#12187) --- docs/en/commercial/index.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/en/commercial/index.md b/docs/en/commercial/index.md index f9065c7cd50..a8358f48b97 100644 --- a/docs/en/commercial/index.md +++ b/docs/en/commercial/index.md @@ -1,7 +1,18 @@ --- toc_folder_title: Commercial toc_priority: 70 -toc_title: Commercial +toc_title: Introduction --- +# ClickHouse Commercial Services +This section is a directory of commercial service providers specializing in ClickHouse. They are independent companies not necessarily affiliated with Yandex. + +Service categories: + +- [Cloud](cloud.md) +- [Support](support.md) + + +!!! note "For service providers" + If you happen to represent one of them, feel free to open a pull request adding your company to the respective section (or even adding a new section if the service doesn't fit into existing categories). The easiest way to open a pull-request for documentation page is by using a “pencil” edit button in the top-right corner. If your service available in some local market, make sure to mention it in a localized documentation page as well (or at least point it out in a pull-request description). From ff4505123189d229bd4a93eb28045880924b4c72 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 7 Jul 2020 17:25:44 +0300 Subject: [PATCH 322/330] DOCS-647: toStartOfSecond (#12190) * DOCSUP-1120 Documentation for the toStartOfSecond function (#131) * Doc toStartOfSecond function * Update docs/en/sql-reference/functions/date-time-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/date-time-functions.md Co-authored-by: BayoNet * Minor update for english text, russian translation added. Co-authored-by: Olga Revyakina Co-authored-by: BayoNet * CLICKHOUSEDOCS-647: Minor text edits. * Update docs/en/sql-reference/functions/date-time-functions.md * Update docs/en/sql-reference/functions/date-time-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> Co-authored-by: Olga Revyakina Co-authored-by: Sergei Shtykov Co-authored-by: Ivan Blinkov --- .../functions/date-time-functions.md | 57 +++++++++++++++++++ .../functions/date-time-functions.md | 56 ++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 3cbc7c73543..4eb316e0455 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -149,6 +149,63 @@ Rounds down a date with time to the start of the hour. Rounds down a date with time to the start of the minute. +## toStartOfSecond {#tostartofsecond} + +Truncates sub-seconds. + +**Syntax** + +``` sql +toStartOfSecond(value[, timezone]) +``` + +**Parameters** + +- `value` — Date and time. [DateTime64](../data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md). + +**Returned value** + +- Input value without sub-seconds. + +Type: [DateTime64](../data-types/datetime64.md). + +**Examples** + +Query without timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 +SELECT toStartOfSecond(dt64); +``` + +Result: + +``` text +┌───toStartOfSecond(dt64)─┐ +│ 2020-01-01 10:20:30.000 │ +└─────────────────────────┘ +``` + +Query with timezone: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 +SELECT toStartOfSecond(dt64, 'Europe/Moscow'); +``` + +Result: + +``` text +┌─toStartOfSecond(dt64, 'Europe/Moscow')─┐ +│ 2020-01-01 13:20:30.000 │ +└────────────────────────────────────────┘ +``` + +**See also** + +- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. + ## toStartOfFiveMinute {#tostartoffiveminute} Rounds down a date with time to the start of the five-minute interval. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index ecd9c760fbc..c4bc42c6ece 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -121,6 +121,62 @@ Result: Округляет дату-с-временем вниз до начала минуты. +## toStartOfSecond {#tostartofsecond} + +Отсекает доли секунды. + +**Синтаксис** + +``` sql +toStartOfSecond(value[, timezone]) +``` + +**Параметры** + +- `value` — Дата и время. [DateTime64](../data-types/datetime64.md). +- `timezone` — [Часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). Если параметр не задан, используется часовой пояс параметра `value`. [String](../data-types/string.md). + +**Возвращаемое значение** + +- Входное значение с отсеченными долями секунды. + +Тип: [DateTime64](../data-types/datetime64.md). + +**Примеры** + +Пример без часового пояса: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(dt64); +``` + +Результат: + +``` text +┌───toStartOfSecond(dt64)─┐ +│ 2020-01-01 10:20:30.000 │ +└─────────────────────────┘ +``` + +Пример с часовым поясом: + +``` sql +WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(dt64, 'Europe/Moscow'); +``` + +Результат: + +``` text +┌─toStartOfSecond(dt64, 'Europe/Moscow')─┐ +│ 2020-01-01 13:20:30.000 │ +└────────────────────────────────────────┘ +``` + +**См. также** + +- Часовая зона сервера, конфигурационный параметр [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). + + ## toStartOfFiveMinute {#tostartoffiveminute} Округляет дату-с-временем вниз до начала пятиминутного интервала. From eefb498517a16c346061c07b5cc5280b21c0c68b Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 17:45:04 +0300 Subject: [PATCH 323/330] [docs] refactor Domains overview (#12186) --- .../sql-reference/data-types/domains/index.md | 27 +++++++++++++++- .../data-types/domains/overview.md | 30 ----------------- .../sql-reference/data-types/domains/index.md | 27 +++++++++++++++- .../data-types/domains/overview.md | 32 ------------------- .../sql-reference/data-types/domains/index.md | 27 +++++++++++++++- .../data-types/domains/overview.md | 32 ------------------- .../sql-reference/data-types/domains/index.md | 25 +++++++++++++++ .../data-types/domains/overview.md | 32 ------------------- .../sql-reference/data-types/domains/index.md | 25 +++++++++++++++ .../data-types/domains/overview.md | 32 ------------------- docs/redirects.txt | 1 + .../sql-reference/data-types/domains/index.md | 29 ++++++++++++++++- .../data-types/domains/overview.md | 26 --------------- .../sql-reference/data-types/domains/index.md | 26 +++++++++++++++ .../data-types/domains/overview.md | 32 ------------------- .../sql-reference/data-types/domains/index.md | 30 +++++++++++++++-- .../data-types/domains/overview.md | 26 --------------- 17 files changed, 211 insertions(+), 248 deletions(-) delete mode 100644 docs/en/sql-reference/data-types/domains/overview.md delete mode 100644 docs/es/sql-reference/data-types/domains/overview.md delete mode 100644 docs/fa/sql-reference/data-types/domains/overview.md delete mode 100644 docs/fr/sql-reference/data-types/domains/overview.md delete mode 100644 docs/ja/sql-reference/data-types/domains/overview.md delete mode 100644 docs/ru/sql-reference/data-types/domains/overview.md delete mode 100644 docs/tr/sql-reference/data-types/domains/overview.md delete mode 100644 docs/zh/sql-reference/data-types/domains/overview.md diff --git a/docs/en/sql-reference/data-types/domains/index.md b/docs/en/sql-reference/data-types/domains/index.md index ddcb5b21d82..30aca1eb059 100644 --- a/docs/en/sql-reference/data-types/domains/index.md +++ b/docs/en/sql-reference/data-types/domains/index.md @@ -1,6 +1,31 @@ --- -toc_folder_title: Domains toc_priority: 56 +toc_folder_title: Domains +toc_title: Overview --- +# Domains {#domains} +Domains are special-purpose types that add some extra features atop of existing base type, but leaving on-wire and on-disc format of the underlying data type intact. At the moment, ClickHouse does not support user-defined domains. + +You can use domains anywhere corresponding base type can be used, for example: + +- Create a column of a domain type +- Read/write values from/to domain column +- Use it as an index if a base type can be used as an index +- Call functions with values of domain column + +### Extra Features of Domains {#extra-features-of-domains} + +- Explicit column type name in `SHOW CREATE TABLE` or `DESCRIBE TABLE` +- Input from human-friendly format with `INSERT INTO domain_table(domain_column) VALUES(...)` +- Output to human-friendly format for `SELECT domain_column FROM domain_table` +- Loading data from an external source in the human-friendly format: `INSERT INTO domain_table FORMAT CSV ...` + +### Limitations {#limitations} + +- Can’t convert index column of base type to domain type via `ALTER TABLE`. +- Can’t implicitly convert string values into domain values when inserting data from another column or table. +- Domain adds no constrains on stored values. + +[Original article](https://clickhouse.tech/docs/en/data_types/domains/) diff --git a/docs/en/sql-reference/data-types/domains/overview.md b/docs/en/sql-reference/data-types/domains/overview.md deleted file mode 100644 index aea7307d048..00000000000 --- a/docs/en/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -toc_priority: 58 -toc_title: Overview ---- - -# Domains {#domains} - -Domains are special-purpose types that add some extra features atop of existing base type, but leaving on-wire and on-disc format of the underlying data type intact. At the moment, ClickHouse does not support user-defined domains. - -You can use domains anywhere corresponding base type can be used, for example: - -- Create a column of a domain type -- Read/write values from/to domain column -- Use it as an index if a base type can be used as an index -- Call functions with values of domain column - -### Extra Features of Domains {#extra-features-of-domains} - -- Explicit column type name in `SHOW CREATE TABLE` or `DESCRIBE TABLE` -- Input from human-friendly format with `INSERT INTO domain_table(domain_column) VALUES(...)` -- Output to human-friendly format for `SELECT domain_column FROM domain_table` -- Loading data from an external source in the human-friendly format: `INSERT INTO domain_table FORMAT CSV ...` - -### Limitations {#limitations} - -- Can’t convert index column of base type to domain type via `ALTER TABLE`. -- Can’t implicitly convert string values into domain values when inserting data from another column or table. -- Domain adds no constrains on stored values. - -[Original article](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/es/sql-reference/data-types/domains/index.md b/docs/es/sql-reference/data-types/domains/index.md index f4bfc581b91..136058e35c8 100644 --- a/docs/es/sql-reference/data-types/domains/index.md +++ b/docs/es/sql-reference/data-types/domains/index.md @@ -1,8 +1,33 @@ --- machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: Dominio toc_priority: 56 +toc_folder_title: Dominio +toc_title: "Descripci\xF3n" --- +# Dominio {#domains} +Los dominios son tipos de propósito especial que agregan algunas características adicionales encima del tipo base existente, pero dejando intacto el formato en cable y en disco del tipo de datos subyacente. Por el momento, ClickHouse no admite dominios definidos por el usuario. + +Puede usar dominios en cualquier lugar que se pueda usar el tipo base correspondiente, por ejemplo: + +- Crear una columna de un tipo de dominio +- Leer/escribir valores desde/a la columna de dominio +- Úselo como un índice si un tipo base se puede usar como un índice +- Funciones de llamada con valores de la columna de dominio + +### Características adicionales de los dominios {#extra-features-of-domains} + +- Nombre de tipo de columna explícito en `SHOW CREATE TABLE` o `DESCRIBE TABLE` +- Entrada del formato humano-amistoso con `INSERT INTO domain_table(domain_column) VALUES(...)` +- Salida al formato humano-amistoso para `SELECT domain_column FROM domain_table` +- Carga de datos desde una fuente externa en el formato de uso humano: `INSERT INTO domain_table FORMAT CSV ...` + +### Limitacion {#limitations} + +- No se puede convertir la columna de índice del tipo base al tipo de dominio a través de `ALTER TABLE`. +- No se pueden convertir implícitamente valores de cadena en valores de dominio al insertar datos de otra columna o tabla. +- Domain no agrega restricciones en los valores almacenados. + +[Artículo Original](https://clickhouse.tech/docs/en/data_types/domains/) diff --git a/docs/es/sql-reference/data-types/domains/overview.md b/docs/es/sql-reference/data-types/domains/overview.md deleted file mode 100644 index c6fc9057c81..00000000000 --- a/docs/es/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "Descripci\xF3n" ---- - -# Dominio {#domains} - -Los dominios son tipos de propósito especial que agregan algunas características adicionales encima del tipo base existente, pero dejando intacto el formato en cable y en disco del tipo de datos subyacente. Por el momento, ClickHouse no admite dominios definidos por el usuario. - -Puede usar dominios en cualquier lugar que se pueda usar el tipo base correspondiente, por ejemplo: - -- Crear una columna de un tipo de dominio -- Leer/escribir valores desde/a la columna de dominio -- Úselo como un índice si un tipo base se puede usar como un índice -- Funciones de llamada con valores de la columna de dominio - -### Características adicionales de los dominios {#extra-features-of-domains} - -- Nombre de tipo de columna explícito en `SHOW CREATE TABLE` o `DESCRIBE TABLE` -- Entrada del formato humano-amistoso con `INSERT INTO domain_table(domain_column) VALUES(...)` -- Salida al formato humano-amistoso para `SELECT domain_column FROM domain_table` -- Carga de datos desde una fuente externa en el formato de uso humano: `INSERT INTO domain_table FORMAT CSV ...` - -### Limitacion {#limitations} - -- No se puede convertir la columna de índice del tipo base al tipo de dominio a través de `ALTER TABLE`. -- No se pueden convertir implícitamente valores de cadena en valores de dominio al insertar datos de otra columna o tabla. -- Domain no agrega restricciones en los valores almacenados. - -[Artículo Original](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/fa/sql-reference/data-types/domains/index.md b/docs/fa/sql-reference/data-types/domains/index.md index 089e1c43eed..a05eea1b59c 100644 --- a/docs/fa/sql-reference/data-types/domains/index.md +++ b/docs/fa/sql-reference/data-types/domains/index.md @@ -1,8 +1,33 @@ --- machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: "\u062F\u0627\u0645\u0646\u0647" toc_priority: 56 +toc_folder_title: "\u062F\u0627\u0645\u0646\u0647" +toc_title: "\u0628\u0631\u0631\u0633\u06CC \u0627\u062C\u0645\u0627\u0644\u06CC" --- +# دامنه {#domains} +دامنه انواع خاصی است که اضافه کردن برخی از ویژگی های اضافی در بالای نوع پایه موجود, اما ترک بر روی سیم و بر روی دیسک فرمت از نوع داده اساسی دست نخورده. درحال حاضر, تاتر می کند دامنه تعریف شده توسط کاربر را پشتیبانی نمی کند. + +شما می توانید دامنه در هر نقطه نوع پایه مربوطه استفاده می شود, مثلا: + +- ایجاد یک ستون از یک نوع دامنه +- خواندن / نوشتن مقادیر از / به ستون دامنه +- اگر یک نوع پایه می تواند به عنوان یک شاخص استفاده می شود به عنوان شاخص استفاده می شود +- توابع تماس با مقادیر ستون دامنه + +### ویژگی های اضافی از دامنه {#extra-features-of-domains} + +- صریح نام نوع ستون در `SHOW CREATE TABLE` یا `DESCRIBE TABLE` +- ورودی از فرمت انسان دوستانه با `INSERT INTO domain_table(domain_column) VALUES(...)` +- خروجی به فرمت انسان دوستانه برای `SELECT domain_column FROM domain_table` +- بارگیری داده ها از یک منبع خارجی در قالب انسان دوستانه: `INSERT INTO domain_table FORMAT CSV ...` + +### محدودیت ها {#limitations} + +- می توانید ستون شاخص از نوع پایه به نوع دامنه از طریق تبدیل کنید `ALTER TABLE`. +- نمی تواند به طور ضمنی تبدیل مقادیر رشته به ارزش دامنه در هنگام قرار دادن داده ها از ستون یا جدول دیگر. +- دامنه می افزاید: هیچ محدودیتی در مقادیر ذخیره شده. + +[مقاله اصلی](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/fa/sql-reference/data-types/domains/overview.md b/docs/fa/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 4507ca850ef..00000000000 --- a/docs/fa/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "\u0628\u0631\u0631\u0633\u06CC \u0627\u062C\u0645\u0627\u0644\u06CC" ---- - -# دامنه {#domains} - -دامنه انواع خاصی است که اضافه کردن برخی از ویژگی های اضافی در بالای نوع پایه موجود, اما ترک بر روی سیم و بر روی دیسک فرمت از نوع داده اساسی دست نخورده. درحال حاضر, تاتر می کند دامنه تعریف شده توسط کاربر را پشتیبانی نمی کند. - -شما می توانید دامنه در هر نقطه نوع پایه مربوطه استفاده می شود, مثلا: - -- ایجاد یک ستون از یک نوع دامنه -- خواندن / نوشتن مقادیر از / به ستون دامنه -- اگر یک نوع پایه می تواند به عنوان یک شاخص استفاده می شود به عنوان شاخص استفاده می شود -- توابع تماس با مقادیر ستون دامنه - -### ویژگی های اضافی از دامنه {#extra-features-of-domains} - -- صریح نام نوع ستون در `SHOW CREATE TABLE` یا `DESCRIBE TABLE` -- ورودی از فرمت انسان دوستانه با `INSERT INTO domain_table(domain_column) VALUES(...)` -- خروجی به فرمت انسان دوستانه برای `SELECT domain_column FROM domain_table` -- بارگیری داده ها از یک منبع خارجی در قالب انسان دوستانه: `INSERT INTO domain_table FORMAT CSV ...` - -### محدودیت ها {#limitations} - -- می توانید ستون شاخص از نوع پایه به نوع دامنه از طریق تبدیل کنید `ALTER TABLE`. -- نمی تواند به طور ضمنی تبدیل مقادیر رشته به ارزش دامنه در هنگام قرار دادن داده ها از ستون یا جدول دیگر. -- دامنه می افزاید: هیچ محدودیتی در مقادیر ذخیره شده. - -[مقاله اصلی](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/fr/sql-reference/data-types/domains/index.md b/docs/fr/sql-reference/data-types/domains/index.md index ffe64acb834..7e11f9a8a68 100644 --- a/docs/fr/sql-reference/data-types/domains/index.md +++ b/docs/fr/sql-reference/data-types/domains/index.md @@ -3,6 +3,31 @@ machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: Domaine toc_priority: 56 +toc_title: "Aper\xE7u" --- +# Domaine {#domains} +Les domaines sont des types spéciaux qui ajoutent des fonctionnalités supplémentaires au sommet du type de base existant, mais en laissant le format on-wire et on-disc du type de données sous-jacent intact. À l'heure actuelle, ClickHouse ne prend pas en charge les domaines définis par l'utilisateur. + +Vous pouvez utiliser des domaines partout type de base correspondant peut être utilisé, par exemple: + +- Créer une colonne d'un type de domaine +- Valeurs de lecture / écriture depuis / vers la colonne de domaine +- L'utiliser comme un indice si un type de base peut être utilisée comme un indice +- Fonctions d'appel avec des valeurs de colonne de domaine + +### Fonctionnalités supplémentaires des domaines {#extra-features-of-domains} + +- Nom de type de colonne explicite dans `SHOW CREATE TABLE` ou `DESCRIBE TABLE` +- Entrée du format convivial avec `INSERT INTO domain_table(domain_column) VALUES(...)` +- Sortie au format convivial pour `SELECT domain_column FROM domain_table` +- Chargement de données à partir d'une source externe dans un format convivial: `INSERT INTO domain_table FORMAT CSV ...` + +### Limitation {#limitations} + +- Impossible de convertir la colonne d'index du type de base en type de domaine via `ALTER TABLE`. +- Impossible de convertir implicitement des valeurs de chaîne en valeurs de domaine lors de l'insertion de données d'une autre colonne ou table. +- Le domaine n'ajoute aucune contrainte sur les valeurs stockées. + +[Article Original](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/fr/sql-reference/data-types/domains/overview.md b/docs/fr/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 149ea84ba7b..00000000000 --- a/docs/fr/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "Aper\xE7u" ---- - -# Domaine {#domains} - -Les domaines sont des types spéciaux qui ajoutent des fonctionnalités supplémentaires au sommet du type de base existant, mais en laissant le format on-wire et on-disc du type de données sous-jacent intact. À l'heure actuelle, ClickHouse ne prend pas en charge les domaines définis par l'utilisateur. - -Vous pouvez utiliser des domaines partout type de base correspondant peut être utilisé, par exemple: - -- Créer une colonne d'un type de domaine -- Valeurs de lecture / écriture depuis / vers la colonne de domaine -- L'utiliser comme un indice si un type de base peut être utilisée comme un indice -- Fonctions d'appel avec des valeurs de colonne de domaine - -### Fonctionnalités supplémentaires des domaines {#extra-features-of-domains} - -- Nom de type de colonne explicite dans `SHOW CREATE TABLE` ou `DESCRIBE TABLE` -- Entrée du format convivial avec `INSERT INTO domain_table(domain_column) VALUES(...)` -- Sortie au format convivial pour `SELECT domain_column FROM domain_table` -- Chargement de données à partir d'une source externe dans un format convivial: `INSERT INTO domain_table FORMAT CSV ...` - -### Limitation {#limitations} - -- Impossible de convertir la colonne d'index du type de base en type de domaine via `ALTER TABLE`. -- Impossible de convertir implicitement des valeurs de chaîne en valeurs de domaine lors de l'insertion de données d'une autre colonne ou table. -- Le domaine n'ajoute aucune contrainte sur les valeurs stockées. - -[Article Original](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/ja/sql-reference/data-types/domains/index.md b/docs/ja/sql-reference/data-types/domains/index.md index d476fcefb51..4f8c2b7add8 100644 --- a/docs/ja/sql-reference/data-types/domains/index.md +++ b/docs/ja/sql-reference/data-types/domains/index.md @@ -3,6 +3,31 @@ machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: "\u30C9\u30E1\u30A4\u30F3" toc_priority: 56 +toc_title: "\u6982\u8981" --- +# ドメイン {#domains} +ドメインは、既存の基本型の上にいくつかの余分な機能を追加する特殊な目的の型ですが、基になるデータ型のオンワイヤおよびオンディスク形式は 現時点では、ClickHouseはユーザー定義ドメインをサポートしていません。 + +たとえば、対応する基本タイプを使用できる任意の場所でドメインを使用できます: + +- ドメイン型の列を作成する +- ドメイン列から/への読み取り/書き込み値 +- 基本型をインデックスとして使用できる場合は、インデックスとして使用します +- ドメイン列の値を持つ関数の呼び出し + +### ドメインの追加機能 {#extra-features-of-domains} + +- 明示的な列タイプ名 `SHOW CREATE TABLE` または `DESCRIBE TABLE` +- 人間に優しいフォーマットからの入力 `INSERT INTO domain_table(domain_column) VALUES(...)` +- 人間に優しいフォーマットへの出力 `SELECT domain_column FROM domain_table` +- 人間に優しい形式で外部ソースからデータを読み込む: `INSERT INTO domain_table FORMAT CSV ...` + +### 制限 {#limitations} + +- 基本型のインデックス列をドメイン型に変換できません `ALTER TABLE`. +- 別の列または表からデータを挿入するときに、文字列値を暗黙的にドメイン値に変換できません。 +- ドメインは、格納された値に制約を追加しません。 + +[元の記事](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/ja/sql-reference/data-types/domains/overview.md b/docs/ja/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 958fce5beb4..00000000000 --- a/docs/ja/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "\u6982\u8981" ---- - -# ドメイン {#domains} - -ドメインは、既存の基本型の上にいくつかの余分な機能を追加する特殊な目的の型ですが、基になるデータ型のオンワイヤおよびオンディスク形式は 現時点では、ClickHouseはユーザー定義ドメインをサポートしていません。 - -たとえば、対応する基本タイプを使用できる任意の場所でドメインを使用できます: - -- ドメイン型の列を作成する -- ドメイン列から/への読み取り/書き込み値 -- 基本型をインデックスとして使用できる場合は、インデックスとして使用します -- ドメイン列の値を持つ関数の呼び出し - -### ドメインの追加機能 {#extra-features-of-domains} - -- 明示的な列タイプ名 `SHOW CREATE TABLE` または `DESCRIBE TABLE` -- 人間に優しいフォーマットからの入力 `INSERT INTO domain_table(domain_column) VALUES(...)` -- 人間に優しいフォーマットへの出力 `SELECT domain_column FROM domain_table` -- 人間に優しい形式で外部ソースからデータを読み込む: `INSERT INTO domain_table FORMAT CSV ...` - -### 制限 {#limitations} - -- 基本型のインデックス列をドメイン型に変換できません `ALTER TABLE`. -- 別の列または表からデータを挿入するときに、文字列値を暗黙的にドメイン値に変換できません。 -- ドメインは、格納された値に制約を追加しません。 - -[元の記事](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/redirects.txt b/docs/redirects.txt index 233492f09ac..b4c93ac4908 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -290,6 +290,7 @@ query_language/table_functions/remote.md sql-reference/table-functions/remote.md query_language/table_functions/url.md sql-reference/table-functions/url.md roadmap.md whats-new/roadmap.md security_changelog.md whats-new/security-changelog.md +sql-reference/data-types/domains/overview.md sql-reference/data-types/domains/index.md sql_reference/aggregate_functions/combinators.md sql-reference/aggregate-functions/combinators.md sql_reference/aggregate_functions/index.md sql-reference/aggregate-functions/index.md sql_reference/aggregate_functions/parametric_functions.md sql-reference/aggregate-functions/parametric-functions.md diff --git a/docs/ru/sql-reference/data-types/domains/index.md b/docs/ru/sql-reference/data-types/domains/index.md index ddcb5b21d82..fe5c7ab7349 100644 --- a/docs/ru/sql-reference/data-types/domains/index.md +++ b/docs/ru/sql-reference/data-types/domains/index.md @@ -1,6 +1,33 @@ --- -toc_folder_title: Domains +toc_folder_title: Домены +toc_title_title: Обзор toc_priority: 56 --- +# Домены {#domeny} + +Домены — это типы данных специального назначения, которые добавляют некоторые дополнительные функции поверх существующего базового типа. На данный момент ClickHouse не поддерживает пользовательские домены. + +Вы можете использовать домены везде, где можно использовать соответствующий базовый тип: + +- Создание столбца с доменным типом данных. +- Чтение/запись значений из/в столбец с доменным типом данных. +- Используйте его как индекс, если базовый тип можно использовать в качестве индекса. +- Вызов функций со значениями столбца, имеющего доменный тип данных. +- и так далее. + +### Дополнительные возможности доменов {#dopolnitelnye-vozmozhnosti-domenov} + +- Явное название типа данных столбца в запросах `SHOW CREATE TABLE` и `DESCRIBE TABLE` +- Ввод данных в удобном человеку формате `INSERT INTO domain_table(domain_column) VALUES(...)` +- Вывод данных в удобном человеку формате `SELECT domain_column FROM domain_table` +- Загрузка данных из внешнего источника в удобном для человека формате: `INSERT INTO domain_table FORMAT CSV ...` + +### Ограничения {#ogranicheniia} + +- Невозможно преобразовать базовый тип данных в доменный для индексного столбца с помощью `ALTER TABLE`. +- Невозможно неявно преобразовывать строковые значение в значения с доменным типом данных при вставке данных из другого столбца или таблицы. +- Домен не добавляет ограничения на хранимые значения. + +[Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/domains/overview) diff --git a/docs/ru/sql-reference/data-types/domains/overview.md b/docs/ru/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 6feac834e5e..00000000000 --- a/docs/ru/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,26 +0,0 @@ -# Домены {#domeny} - -Домены — это типы данных специального назначения, которые добавляют некоторые дополнительные функции поверх существующего базового типа. На данный момент ClickHouse не поддерживает пользовательские домены. - -Вы можете использовать домены везде, где можно использовать соответствующий базовый тип: - -- Создание столбца с доменным типом данных. -- Чтение/запись значений из/в столбец с доменным типом данных. -- Используйте его как индекс, если базовый тип можно использовать в качестве индекса. -- Вызов функций со значениями столбца, имеющего доменный тип данных. -- и так далее. - -### Дополнительные возможности доменов {#dopolnitelnye-vozmozhnosti-domenov} - -- Явное название типа данных столбца в запросах `SHOW CREATE TABLE` и `DESCRIBE TABLE` -- Ввод данных в удобном человеку формате `INSERT INTO domain_table(domain_column) VALUES(...)` -- Вывод данных в удобном человеку формате `SELECT domain_column FROM domain_table` -- Загрузка данных из внешнего источника в удобном для человека формате: `INSERT INTO domain_table FORMAT CSV ...` - -### Ограничения {#ogranicheniia} - -- Невозможно преобразовать базовый тип данных в доменный для индексного столбца с помощью `ALTER TABLE`. -- Невозможно неявно преобразовывать строковые значение в значения с доменным типом данных при вставке данных из другого столбца или таблицы. -- Домен не добавляет ограничения на хранимые значения. - -[Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/domains/overview) diff --git a/docs/tr/sql-reference/data-types/domains/index.md b/docs/tr/sql-reference/data-types/domains/index.md index e439d110325..7ef688b3578 100644 --- a/docs/tr/sql-reference/data-types/domains/index.md +++ b/docs/tr/sql-reference/data-types/domains/index.md @@ -1,8 +1,34 @@ --- machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd +toc_priority: 58 +toc_title: "Genel bak\u0131\u015F" toc_folder_title: Etkiler toc_priority: 56 --- +# Etkiler {#domains} +Etki alanları, varolan temel türün üstüne bazı ek özellikler ekleyen, ancak temel veri türünün kablolu ve disk üstü biçimini sağlam bırakan özel amaçlı türlerdir. Şu anda, ClickHouse kullanıcı tanımlı etki alanlarını desteklemiyor. + +Örneğin, ilgili taban türünün kullanılabileceği her yerde etki alanlarını kullanabilirsiniz: + +- Etki alanı türünde bir sütun oluşturma +- Alan sütunundan/alanına değerleri okuma / yazma +- Bir temel türü bir dizin olarak kullanılabilir, bir dizin olarak kullanın +- Etki alanı sütun değerleri ile çağrı fonksiyonları + +### Alanların ekstra özellikleri {#extra-features-of-domains} + +- Açık sütun türü adı `SHOW CREATE TABLE` veya `DESCRIBE TABLE` +- İle insan dostu format inputtan giriş `INSERT INTO domain_table(domain_column) VALUES(...)` +- İçin insan dostu forma outputta çıktı `SELECT domain_column FROM domain_table` +- Harici bir kaynaktan insan dostu biçimde veri yükleme: `INSERT INTO domain_table FORMAT CSV ...` + +### Sınırlamalar {#limitations} + +- Temel türün dizin sütununu etki alanı türüne dönüştürülemiyor `ALTER TABLE`. +- Başka bir sütun veya tablodan veri eklerken dize değerlerini dolaylı olarak etki alanı değerlerine dönüştüremez. +- Etki alanı, depolanan değerler üzerinde hiçbir kısıtlama ekler. + +[Orijinal makale](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/tr/sql-reference/data-types/domains/overview.md b/docs/tr/sql-reference/data-types/domains/overview.md deleted file mode 100644 index cfab9f3701e..00000000000 --- a/docs/tr/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 58 -toc_title: "Genel bak\u0131\u015F" ---- - -# Etkiler {#domains} - -Etki alanları, varolan temel türün üstüne bazı ek özellikler ekleyen, ancak temel veri türünün kablolu ve disk üstü biçimini sağlam bırakan özel amaçlı türlerdir. Şu anda, ClickHouse kullanıcı tanımlı etki alanlarını desteklemiyor. - -Örneğin, ilgili taban türünün kullanılabileceği her yerde etki alanlarını kullanabilirsiniz: - -- Etki alanı türünde bir sütun oluşturma -- Alan sütunundan/alanına değerleri okuma / yazma -- Bir temel türü bir dizin olarak kullanılabilir, bir dizin olarak kullanın -- Etki alanı sütun değerleri ile çağrı fonksiyonları - -### Alanların ekstra özellikleri {#extra-features-of-domains} - -- Açık sütun türü adı `SHOW CREATE TABLE` veya `DESCRIBE TABLE` -- İle insan dostu format inputtan giriş `INSERT INTO domain_table(domain_column) VALUES(...)` -- İçin insan dostu forma outputta çıktı `SELECT domain_column FROM domain_table` -- Harici bir kaynaktan insan dostu biçimde veri yükleme: `INSERT INTO domain_table FORMAT CSV ...` - -### Sınırlamalar {#limitations} - -- Temel türün dizin sütununu etki alanı türüne dönüştürülemiyor `ALTER TABLE`. -- Başka bir sütun veya tablodan veri eklerken dize değerlerini dolaylı olarak etki alanı değerlerine dönüştüremez. -- Etki alanı, depolanan değerler üzerinde hiçbir kısıtlama ekler. - -[Orijinal makale](https://clickhouse.tech/docs/en/data_types/domains/overview) diff --git a/docs/zh/sql-reference/data-types/domains/index.md b/docs/zh/sql-reference/data-types/domains/index.md index e05c61e0dbb..6ef788b0650 100644 --- a/docs/zh/sql-reference/data-types/domains/index.md +++ b/docs/zh/sql-reference/data-types/domains/index.md @@ -1,8 +1,34 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: "\u57DF" +toc_title: "域" toc_priority: 56 --- +# 域 {#domains} + +Domain类型是特定实现的类型,它总是与某个现存的基础类型保持二进制兼容的同时添加一些额外的特性,以能够在维持磁盘数据不变的情况下使用这些额外的特性。目前ClickHouse暂不支持自定义domain类型。 + +如果你可以在一个地方使用与Domain类型二进制兼容的基础类型,那么在相同的地方您也可以使用Domain类型,例如: + +- 使用Domain类型作为表中列的类型 +- 对Domain类型的列进行读/写数据 +- 如果与Domain二进制兼容的基础类型可以作为索引,那么Domain类型也可以作为索引 +- 将Domain类型作为参数传递给函数使用 +- 其他 + +### Domains的额外特性 {#domainsde-e-wai-te-xing} + +- 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 +- 在INSERT INTO domain\_table(domain\_column) VALUES(…)中输入数据总是以更人性化的格式进行输入 +- 在SELECT domain\_column FROM domain\_table中数据总是以更人性化的格式输出 +- 在INSERT INTO domain\_table FORMAT CSV …中,实现外部源数据以更人性化的格式载入 + +### Domains类型的限制 {#domainslei-xing-de-xian-zhi} + +- 无法通过`ALTER TABLE`将基础类型的索引转换为Domain类型的索引。 +- 当从其他列或表插入数据时,无法将string类型的值隐式地转换为Domain类型的值。 +- 无法对存储为Domain类型的值添加约束。 + +[来源文章](https://clickhouse.tech/docs/en/data_types/domains/overview) + diff --git a/docs/zh/sql-reference/data-types/domains/overview.md b/docs/zh/sql-reference/data-types/domains/overview.md deleted file mode 100644 index 689acb9cb60..00000000000 --- a/docs/zh/sql-reference/data-types/domains/overview.md +++ /dev/null @@ -1,26 +0,0 @@ -# 域 {#domains} - -Domain类型是特定实现的类型,它总是与某个现存的基础类型保持二进制兼容的同时添加一些额外的特性,以能够在维持磁盘数据不变的情况下使用这些额外的特性。目前ClickHouse暂不支持自定义domain类型。 - -如果你可以在一个地方使用与Domain类型二进制兼容的基础类型,那么在相同的地方您也可以使用Domain类型,例如: - -- 使用Domain类型作为表中列的类型 -- 对Domain类型的列进行读/写数据 -- 如果与Domain二进制兼容的基础类型可以作为索引,那么Domain类型也可以作为索引 -- 将Domain类型作为参数传递给函数使用 -- 其他 - -### Domains的额外特性 {#domainsde-e-wai-te-xing} - -- 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 -- 在INSERT INTO domain\_table(domain\_column) VALUES(…)中输入数据总是以更人性化的格式进行输入 -- 在SELECT domain\_column FROM domain\_table中数据总是以更人性化的格式输出 -- 在INSERT INTO domain\_table FORMAT CSV …中,实现外部源数据以更人性化的格式载入 - -### Domains类型的限制 {#domainslei-xing-de-xian-zhi} - -- 无法通过`ALTER TABLE`将基础类型的索引转换为Domain类型的索引。 -- 当从其他列或表插入数据时,无法将string类型的值隐式地转换为Domain类型的值。 -- 无法对存储为Domain类型的值添加约束。 - -[来源文章](https://clickhouse.tech/docs/en/data_types/domains/overview) From a5cb0aed12b90fa4e23db81dbd5b8da7ef98d4fd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 7 Jul 2020 19:22:41 +0300 Subject: [PATCH 324/330] Sanitize LINK_LIBRARIES property for the directories (#12160) When you will try to link target with the directory (that exists), cmake will skip this without an error, only the following warning will be reported: target_link_libraries(main /tmp) WARNING: Target "main" requests linking to directory "/tmp". Targets may link only to libraries. CMake is dropping the item. And there is no cmake policy that controls this. (I guess the reason that it is allowed is because of FRAMEWORK for OSX). So to avoid error-prone cmake rules, this can be sanitized. There are the following ways: - overwrite target_link_libraries()/link_libraries() and check *before* calling real macro, but this requires duplicate all supported syntax -- too complex - overwrite target_link_libraries() and check LINK_LIBRARIES property, this works great -- but cannot be used with link_libraries() - use BUILDSYSTEM_TARGETS property to get list of all targets and sanitize -- this will work. I also tested it with the following patch: $ git di diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 26d59a57e7..35e6ff6432 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -9,4 +9,5 @@ target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickh if (USE_SENTRY) target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) + target_link_libraries (daemon PRIVATE /tmp) endif () And it works: CMake Error at cmake/sanitize_target_link_libraries.cmake:48 (message): daemon requested to link with directory: /tmp Call Stack (most recent call first): cmake/sanitize_target_link_libraries.cmake:55 (sanitize_link_libraries) CMakeLists.txt:425 (include) Refs: #12041 --- CMakeLists.txt | 2 + cmake/sanitize_target_link_libraries.cmake | 56 ++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 cmake/sanitize_target_link_libraries.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cde8f6fec3..d4d325818e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -421,3 +421,5 @@ add_subdirectory (tests) add_subdirectory (utils) include (cmake/print_include_directories.cmake) + +include (cmake/sanitize_target_link_libraries.cmake) diff --git a/cmake/sanitize_target_link_libraries.cmake b/cmake/sanitize_target_link_libraries.cmake new file mode 100644 index 00000000000..d66ea338a52 --- /dev/null +++ b/cmake/sanitize_target_link_libraries.cmake @@ -0,0 +1,56 @@ +# When you will try to link target with the directory (that exists), cmake will +# skip this without an error, only the following warning will be reported: +# +# target_link_libraries(main /tmp) +# +# WARNING: Target "main" requests linking to directory "/tmp". Targets may link only to libraries. CMake is dropping the item. +# +# And there is no cmake policy that controls this. +# (I guess the reason that it is allowed is because of FRAMEWORK for OSX). +# +# So to avoid error-prone cmake rules, this can be sanitized. +# There are the following ways: +# - overwrite target_link_libraries()/link_libraries() and check *before* +# calling real macro, but this requires duplicate all supported syntax +# -- too complex +# - overwrite target_link_libraries() and check LINK_LIBRARIES property, this +# works great +# -- but cannot be used with link_libraries() +# - use BUILDSYSTEM_TARGETS property to get list of all targets and sanitize +# -- this will work. + +# https://stackoverflow.com/a/62311397/328260 +function (get_all_targets var) + set (targets) + get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}) + set (${var} ${targets} PARENT_SCOPE) +endfunction() +macro (get_all_targets_recursive targets dir) + get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach (subdir ${subdirectories}) + get_all_targets_recursive (${targets} ${subdir}) + endforeach () + get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + list (APPEND ${targets} ${current_targets}) +endmacro () + +macro (sanitize_link_libraries target) + get_target_property(target_type ${target} TYPE) + if (${target_type} STREQUAL "INTERFACE_LIBRARY") + get_property(linked_libraries TARGET ${target} PROPERTY INTERFACE_LINK_LIBRARIES) + else() + get_property(linked_libraries TARGET ${target} PROPERTY LINK_LIBRARIES) + endif() + foreach (linked_library ${linked_libraries}) + if (TARGET ${linked_library}) + # just in case, skip if TARGET + elseif (IS_DIRECTORY ${linked_library}) + message(FATAL_ERROR "${target} requested to link with directory: ${linked_library}") + endif() + endforeach() +endmacro() + +get_all_targets (all_targets) +foreach (target ${all_targets}) + sanitize_link_libraries(${target}) +endforeach() From f514d0da77453db5cf07be4cda79abf466cf0c0a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 7 Jul 2020 19:59:42 +0300 Subject: [PATCH 325/330] Revert "Run perf tests with memory sampling (for allocations >1M)" --- .../config/users.d/perf-comparison-tweaks-users.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 8173934fa3e..6e3e3df5d39 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -6,8 +6,6 @@ 1 1 1 - 1 - 1048576 From 9c829e3046cae0711ca822998d1a6b3e59b54edb Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 20:18:38 +0300 Subject: [PATCH 326/330] [docs] add intrdocution for statements page (#12189) * [docs] add intrdocution for statements page * Update index.md --- docs/en/sql-reference/statements/index.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/en/sql-reference/statements/index.md b/docs/en/sql-reference/statements/index.md index 034c541a1c4..3a2c09d94cb 100644 --- a/docs/en/sql-reference/statements/index.md +++ b/docs/en/sql-reference/statements/index.md @@ -1,6 +1,19 @@ --- toc_folder_title: Statements toc_priority: 31 +toc_hidden: true --- +# ClickHouse SQL Statements +Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it's own syntax and usage details that are described separately: + +- [SELECT](select/index.md) +- [INSERT INTO](insert-into.md) +- [CREATE](create.md) +- [ALTER](alter.md) +- [SYSTEM](system.md) +- [SHOW](show.md) +- [GRANT](grant.md) +- [REVOKE](revoke.md) +- [Other](misc.md) From 391d31d7f6604b364c8b726d36c47246d4277777 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 20:28:20 +0300 Subject: [PATCH 327/330] Update index.md --- docs/en/operations/optimizing-performance/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/operations/optimizing-performance/index.md b/docs/en/operations/optimizing-performance/index.md index 6e6cef109c1..ca7b96268d8 100644 --- a/docs/en/operations/optimizing-performance/index.md +++ b/docs/en/operations/optimizing-performance/index.md @@ -1,6 +1,9 @@ --- toc_folder_title: Optimizing Performance toc_priority: 52 +toc_hidden: true --- +# Optimizing Performance +- [Sampling query profiler](sampling-query-profiler.md) From b0ff1fede54e2df105bf8288ec01dc989e626aae Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 21:17:43 +0300 Subject: [PATCH 328/330] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 46154ea3af8..c0de8deb945 100644 --- a/README.md +++ b/README.md @@ -16,4 +16,5 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [ClickHouse at Yandex Cloud Webinar (in Russian)](https://cloud.yandex.ru/events/144) on July 7, 2020. +* [ClickHouse for genetic data (in Russian)]https://cloud.yandex.ru/events/152) on July 14, 2020. +* [ClickHouse virtual office gours](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/271522978/) on July 15, 2020. From ba437c370f5ba030665bb108915e8431244161ed Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 21:20:40 +0300 Subject: [PATCH 329/330] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c0de8deb945..1f9f31b3f23 100644 --- a/README.md +++ b/README.md @@ -16,5 +16,5 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [ClickHouse for genetic data (in Russian)]https://cloud.yandex.ru/events/152) on July 14, 2020. +* [ClickHouse for genetic data (in Russian)](https://cloud.yandex.ru/events/152) on July 14, 2020. * [ClickHouse virtual office gours](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/271522978/) on July 15, 2020. From 5e062e851dcf7ae0e4134c6086d54f80c48a4bc8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 7 Jul 2020 21:21:36 +0300 Subject: [PATCH 330/330] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f9f31b3f23..ef4e02c5434 100644 --- a/README.md +++ b/README.md @@ -17,4 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events * [ClickHouse for genetic data (in Russian)](https://cloud.yandex.ru/events/152) on July 14, 2020. -* [ClickHouse virtual office gours](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/271522978/) on July 15, 2020. +* [ClickHouse virtual office hours](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/271522978/) on July 15, 2020.