Merge branch 'master' into enable_projections_and_zero_copy

This commit is contained in:
alesapin 2022-09-16 13:53:17 +02:00 committed by GitHub
commit b4d2d217f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
179 changed files with 4823 additions and 2140 deletions

View File

@ -22,6 +22,8 @@ Checks: '*,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result,
-bugprone-unchecked-optional-access,
-bugprone-assignment-in-if-condition,
-cert-dcl16-c,
-cert-err58-cpp,
@ -103,6 +105,7 @@ Checks: '*,
-misc-no-recursion,
-misc-non-private-member-variables-in-classes,
-misc-const-correctness,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
@ -114,6 +117,7 @@ Checks: '*,
-modernize-use-nodiscard,
-modernize-use-override,
-modernize-use-trailing-return-type,
-modernize-macro-to-enum,
-performance-inefficient-string-concatenation,
-performance-no-int-to-ptr,
@ -135,6 +139,7 @@ Checks: '*,
-readability-suspicious-call-argument,
-readability-uppercase-literal-suffix,
-readability-use-anyofallof,
-readability-simplify-boolean-expr,
-zirkon-*,
'

4
.gitignore vendored
View File

@ -58,6 +58,10 @@ cmake_install.cmake
CTestTestfile.cmake
*.a
*.o
*.so
*.dll
*.lib
*.dylib
cmake-build-*
# Python cache

View File

@ -3,7 +3,7 @@ option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF)
if (ENABLE_CLANG_TIDY)
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
if (CLANG_TIDY_PATH)
message(STATUS

View File

@ -45,6 +45,7 @@ if (CMAKE_CROSSCOMPILING)
endif ()
if (USE_MUSL)
# use of undeclared identifier 'PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP'
set (ENABLE_SENTRY OFF CACHE INTERNAL "")
set (ENABLE_ODBC OFF CACHE INTERNAL "")
set (ENABLE_GRPC OFF CACHE INTERNAL "")

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc
Subproject commit e15858f8ad0ce8aba85cf74e3763874c76bf927c

View File

@ -1,35 +1,95 @@
# Choose to build static or shared library for c-ares.
if (USE_STATIC_LIBRARIES)
set(CARES_STATIC ON CACHE BOOL "" FORCE)
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
else ()
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
set(CARES_SHARED ON CACHE BOOL "" FORCE)
endif ()
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/c-ares")
# Disable looking for libnsl on a platforms that has gethostbyname in glibc
#
# c-ares searching for gethostbyname in the libnsl library, however in the
# version that shipped with gRPC it doing it wrong [1], since it uses
# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in
# another dependent library. The upstream already contains correct macro [2],
# but it is not included in gRPC (even upstream gRPC, not the one that is
# shipped with clickhousee).
#
# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125
# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146
#
# And because if you by some reason have libnsl [3] installed, clickhouse will
# reject to start w/o it. While this is completelly different library.
#
# [3]: https://packages.debian.org/bullseye/libnsl2
if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS")
set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE)
# Generated from contrib/c-ares/src/lib/Makefile.inc
SET(SRCS
"${LIBRARY_DIR}/src/lib/ares__addrinfo2hostent.c"
"${LIBRARY_DIR}/src/lib/ares__addrinfo_localhost.c"
"${LIBRARY_DIR}/src/lib/ares__close_sockets.c"
"${LIBRARY_DIR}/src/lib/ares__get_hostent.c"
"${LIBRARY_DIR}/src/lib/ares__parse_into_addrinfo.c"
"${LIBRARY_DIR}/src/lib/ares__readaddrinfo.c"
"${LIBRARY_DIR}/src/lib/ares__sortaddrinfo.c"
"${LIBRARY_DIR}/src/lib/ares__read_line.c"
"${LIBRARY_DIR}/src/lib/ares__timeval.c"
"${LIBRARY_DIR}/src/lib/ares_android.c"
"${LIBRARY_DIR}/src/lib/ares_cancel.c"
"${LIBRARY_DIR}/src/lib/ares_data.c"
"${LIBRARY_DIR}/src/lib/ares_destroy.c"
"${LIBRARY_DIR}/src/lib/ares_expand_name.c"
"${LIBRARY_DIR}/src/lib/ares_expand_string.c"
"${LIBRARY_DIR}/src/lib/ares_fds.c"
"${LIBRARY_DIR}/src/lib/ares_free_hostent.c"
"${LIBRARY_DIR}/src/lib/ares_free_string.c"
"${LIBRARY_DIR}/src/lib/ares_freeaddrinfo.c"
"${LIBRARY_DIR}/src/lib/ares_getaddrinfo.c"
"${LIBRARY_DIR}/src/lib/ares_getenv.c"
"${LIBRARY_DIR}/src/lib/ares_gethostbyaddr.c"
"${LIBRARY_DIR}/src/lib/ares_gethostbyname.c"
"${LIBRARY_DIR}/src/lib/ares_getnameinfo.c"
"${LIBRARY_DIR}/src/lib/ares_getsock.c"
"${LIBRARY_DIR}/src/lib/ares_init.c"
"${LIBRARY_DIR}/src/lib/ares_library_init.c"
"${LIBRARY_DIR}/src/lib/ares_llist.c"
"${LIBRARY_DIR}/src/lib/ares_mkquery.c"
"${LIBRARY_DIR}/src/lib/ares_create_query.c"
"${LIBRARY_DIR}/src/lib/ares_nowarn.c"
"${LIBRARY_DIR}/src/lib/ares_options.c"
"${LIBRARY_DIR}/src/lib/ares_parse_a_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_aaaa_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_caa_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_mx_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_naptr_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_ns_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_ptr_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_soa_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_srv_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_txt_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_uri_reply.c"
"${LIBRARY_DIR}/src/lib/ares_platform.c"
"${LIBRARY_DIR}/src/lib/ares_process.c"
"${LIBRARY_DIR}/src/lib/ares_query.c"
"${LIBRARY_DIR}/src/lib/ares_search.c"
"${LIBRARY_DIR}/src/lib/ares_send.c"
"${LIBRARY_DIR}/src/lib/ares_strcasecmp.c"
"${LIBRARY_DIR}/src/lib/ares_strdup.c"
"${LIBRARY_DIR}/src/lib/ares_strerror.c"
"${LIBRARY_DIR}/src/lib/ares_strsplit.c"
"${LIBRARY_DIR}/src/lib/ares_timeout.c"
"${LIBRARY_DIR}/src/lib/ares_version.c"
"${LIBRARY_DIR}/src/lib/ares_writev.c"
"${LIBRARY_DIR}/src/lib/bitncmp.c"
"${LIBRARY_DIR}/src/lib/inet_net_pton.c"
"${LIBRARY_DIR}/src/lib/inet_ntop.c"
"${LIBRARY_DIR}/src/lib/windows_port.c"
)
if (USE_STATIC_LIBRARIES)
add_library(_c-ares STATIC ${SRCS})
target_compile_definitions(_c-ares PUBLIC CARES_STATICLIB)
else()
add_library(_c-ares SHARED ${SRCS})
target_compile_definitions(_c-ares PUBLIC CARES_BUILDING_LIBRARY)
endif()
# Force use of c-ares inet_net_pton instead of libresolv one
set(HAVE_INET_NET_PTON OFF CACHE BOOL "" FORCE)
target_compile_definitions(_c-ares PRIVATE HAVE_CONFIG_H=1)
add_subdirectory("../c-ares/" "../c-ares/")
target_include_directories(_c-ares SYSTEM PUBLIC
"${LIBRARY_DIR}/src/lib"
"${LIBRARY_DIR}/include"
)
add_library(ch_contrib::c-ares ALIAS c-ares)
# Platform-specific include directories. The original build system does a lot of checks to eventually generate two header files with defines:
# ares_build.h and ares_config.h. To update, run the original CMake build in c-ares for each platform and copy the headers into the
# platform-specific folder.
# For the platform-specific compile definitions, see c-ares top-level CMakeLists.txt.
if (OS_LINUX)
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/linux")
target_compile_definitions(_c-ares PRIVATE -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L -D_XOPEN_SOURCE=600)
elseif (OS_DARWIN)
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/darwin")
target_compile_definitions(_c-ares PRIVATE -D_DARWIN_C_SOURCE)
elseif (OS_FREEBSD)
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/freebsd")
endif()
add_library(ch_contrib::c-ares ALIAS _c-ares)

View File

@ -0,0 +1,43 @@
#ifndef __CARES_BUILD_H
#define __CARES_BUILD_H
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
* files. We need to include some dependent headers that may be system specific
* for C-Ares */
#define CARES_HAVE_SYS_TYPES_H
#define CARES_HAVE_SYS_SOCKET_H
/* #undef CARES_HAVE_WINDOWS_H */
/* #undef CARES_HAVE_WS2TCPIP_H */
/* #undef CARES_HAVE_WINSOCK2_H */
/* #undef CARES_HAVE_WINDOWS_H */
#define CARES_HAVE_ARPA_NAMESER_H
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
#ifdef CARES_HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#ifdef CARES_HAVE_SYS_SOCKET_H
# include <sys/socket.h>
#endif
#ifdef CARES_HAVE_WINSOCK2_H
# include <winsock2.h>
#endif
#ifdef CARES_HAVE_WS2TCPIP_H
# include <ws2tcpip.h>
#endif
#ifdef CARES_HAVE_WINDOWS_H
# include <windows.h>
#endif
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
#endif /* __CARES_BUILD_H */

View File

@ -0,0 +1,432 @@
/* Generated from ares_config.h.cmake */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* define this if ares is built for a big endian system */
#undef ARES_BIG_ENDIAN
/* when building as static part of libcurl */
#undef BUILDING_LIBCURL
/* Defined for build that exposes internal static functions for testing. */
#undef CARES_EXPOSE_STATICS
/* Defined for build with symbol hiding. */
#undef CARES_SYMBOL_HIDING
/* Definition to make a library symbol externally visible. */
#undef CARES_SYMBOL_SCOPE_EXTERN
/* Use resolver library to configure cares */
/* #undef CARES_USE_LIBRESOLV */
/* if a /etc/inet dir is being used */
#undef ETC_INET
/* Define to the type of arg 2 for gethostname. */
#define GETHOSTNAME_TYPE_ARG2 size_t
/* Define to the type qualifier of arg 1 for getnameinfo. */
#define GETNAMEINFO_QUAL_ARG1
/* Define to the type of arg 1 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
/* Define to the type of arg 2 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG2 socklen_t
/* Define to the type of args 4 and 6 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG46 socklen_t
/* Define to the type of arg 7 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG7 int
/* Specifies the number of arguments to getservbyport_r */
#define GETSERVBYPORT_R_ARGS
/* Specifies the number of arguments to getservbyname_r */
#define GETSERVBYNAME_R_ARGS
/* Define to 1 if you have AF_INET6. */
#define HAVE_AF_INET6
/* Define to 1 if you have the <arpa/inet.h> header file. */
#define HAVE_ARPA_INET_H
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
#define HAVE_ARPA_NAMESER_COMPAT_H
/* Define to 1 if you have the <arpa/nameser.h> header file. */
#define HAVE_ARPA_NAMESER_H
/* Define to 1 if you have the <assert.h> header file. */
#define HAVE_ASSERT_H
/* Define to 1 if you have the `bitncmp' function. */
/* #undef HAVE_BITNCMP */
/* Define to 1 if bool is an available type. */
#define HAVE_BOOL_T
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
#define HAVE_CLOCK_GETTIME_MONOTONIC
/* Define to 1 if you have the closesocket function. */
/* #undef HAVE_CLOSESOCKET */
/* Define to 1 if you have the CloseSocket camel case function. */
/* #undef HAVE_CLOSESOCKET_CAMEL */
/* Define to 1 if you have the connect function. */
#define HAVE_CONNECT
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H
/* Define to 1 if you have the fcntl function. */
#define HAVE_FCNTL
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
#define HAVE_FCNTL_O_NONBLOCK
/* Define to 1 if you have the freeaddrinfo function. */
#define HAVE_FREEADDRINFO
/* Define to 1 if you have a working getaddrinfo function. */
#define HAVE_GETADDRINFO
/* Define to 1 if the getaddrinfo function is threadsafe. */
#define HAVE_GETADDRINFO_THREADSAFE
/* Define to 1 if you have the getenv function. */
#define HAVE_GETENV
/* Define to 1 if you have the gethostbyaddr function. */
#define HAVE_GETHOSTBYADDR
/* Define to 1 if you have the gethostbyname function. */
#define HAVE_GETHOSTBYNAME
/* Define to 1 if you have the gethostname function. */
#define HAVE_GETHOSTNAME
/* Define to 1 if you have the getnameinfo function. */
#define HAVE_GETNAMEINFO
/* Define to 1 if you have the getservbyport_r function. */
/* #undef HAVE_GETSERVBYPORT_R */
/* Define to 1 if you have the getservbyname_r function. */
/* #undef HAVE_GETSERVBYNAME_R */
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY
/* Define to 1 if you have the `if_indextoname' function. */
#define HAVE_IF_INDEXTONAME
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
/* #undef HAVE_INET_NET_PTON */
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
#define HAVE_INET_NTOP
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
#define HAVE_INET_PTON
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H
/* Define to 1 if you have the ioctl function. */
#define HAVE_IOCTL
/* Define to 1 if you have the ioctlsocket function. */
/* #undef HAVE_IOCTLSOCKET */
/* Define to 1 if you have the IoctlSocket camel case function. */
/* #undef HAVE_IOCTLSOCKET_CAMEL */
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
*/
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
/* Define to 1 if you have a working ioctl FIONBIO function. */
#define HAVE_IOCTL_FIONBIO
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
#define HAVE_IOCTL_SIOCGIFADDR
/* Define to 1 if you have the `resolve' library (-lresolve). */
/* #undef HAVE_LIBRESOLV */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H
/* if your compiler supports LL */
#define HAVE_LL
/* Define to 1 if the compiler supports the 'long long' data type. */
#define HAVE_LONGLONG
/* Define to 1 if you have the malloc.h header file. */
/* #undef HAVE_MALLOC_H */
/* Define to 1 if you have the memory.h header file. */
#define HAVE_MEMORY_H
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
/* #undef HAVE_MSG_NOSIGNAL */
/* Define to 1 if you have the <netdb.h> header file. */
#define HAVE_NETDB_H
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#define HAVE_NETINET_TCP_H
/* Define to 1 if you have the <net/if.h> header file. */
#define HAVE_NET_IF_H
/* Define to 1 if you have PF_INET6. */
#define HAVE_PF_INET6
/* Define to 1 if you have the recv function. */
#define HAVE_RECV
/* Define to 1 if you have the recvfrom function. */
#define HAVE_RECVFROM
/* Define to 1 if you have the send function. */
#define HAVE_SEND
/* Define to 1 if you have the setsockopt function. */
#define HAVE_SETSOCKOPT
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H
/* Define to 1 if sig_atomic_t is an available typedef. */
#define HAVE_SIG_ATOMIC_T
/* Define to 1 if sig_atomic_t is already defined as volatile. */
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
/* Define to 1 if you have the socket function. */
#define HAVE_SOCKET
/* Define to 1 if you have the <socket.h> header file. */
/* #undef HAVE_SOCKET_H */
/* Define to 1 if you have the <stdbool.h> header file. */
#define HAVE_STDBOOL_H
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H
/* Define to 1 if you have the strcasecmp function. */
#define HAVE_STRCASECMP
/* Define to 1 if you have the strcmpi function. */
/* #undef HAVE_STRCMPI */
/* Define to 1 if you have the strdup function. */
#define HAVE_STRDUP
/* Define to 1 if you have the stricmp function. */
/* #undef HAVE_STRICMP */
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H
/* Define to 1 if you have the strncasecmp function. */
#define HAVE_STRNCASECMP
/* Define to 1 if you have the strncmpi function. */
/* #undef HAVE_STRNCMPI */
/* Define to 1 if you have the strnicmp function. */
/* #undef HAVE_STRNICMP */
/* Define to 1 if you have the <stropts.h> header file. */
/* #undef HAVE_STROPTS_H */
/* Define to 1 if you have struct addrinfo. */
#define HAVE_STRUCT_ADDRINFO
/* Define to 1 if you have struct in6_addr. */
#define HAVE_STRUCT_IN6_ADDR
/* Define to 1 if you have struct sockaddr_in6. */
#define HAVE_STRUCT_SOCKADDR_IN6
/* if struct sockaddr_storage is defined */
#define HAVE_STRUCT_SOCKADDR_STORAGE
/* Define to 1 if you have the timeval struct. */
#define HAVE_STRUCT_TIMEVAL
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/uio.h> header file. */
#define HAVE_SYS_UIO_H
/* Define to 1 if you have the <time.h> header file. */
#define HAVE_TIME_H
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the windows.h header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the winsock2.h header file. */
/* #undef HAVE_WINSOCK2_H */
/* Define to 1 if you have the winsock.h header file. */
/* #undef HAVE_WINSOCK_H */
/* Define to 1 if you have the writev function. */
#define HAVE_WRITEV
/* Define to 1 if you have the ws2tcpip.h header file. */
/* #undef HAVE_WS2TCPIP_H */
/* Define to 1 if you have the __system_property_get function */
#define HAVE___SYSTEM_PROPERTY_GET
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
/* #undef NEED_MALLOC_H */
/* Define to 1 if you need the memory.h header file even with stdlib.h */
/* #undef NEED_MEMORY_H */
/* a suitable file/device to read random data from */
#define CARES_RANDOM_FILE "/dev/urandom"
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
#define RECVFROM_QUAL_ARG5
/* Define to the type of arg 1 for recvfrom. */
#define RECVFROM_TYPE_ARG1 int
/* Define to the type pointed by arg 2 for recvfrom. */
#define RECVFROM_TYPE_ARG2 void *
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
#define RECVFROM_TYPE_ARG2_IS_VOID 0
/* Define to the type of arg 3 for recvfrom. */
#define RECVFROM_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recvfrom. */
#define RECVFROM_TYPE_ARG4 int
/* Define to the type pointed by arg 5 for recvfrom. */
#define RECVFROM_TYPE_ARG5 struct sockaddr *
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
#define RECVFROM_TYPE_ARG5_IS_VOID 0
/* Define to the type pointed by arg 6 for recvfrom. */
#define RECVFROM_TYPE_ARG6 socklen_t *
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
#define RECVFROM_TYPE_ARG6_IS_VOID 0
/* Define to the function return type for recvfrom. */
#define RECVFROM_TYPE_RETV ssize_t
/* Define to the type of arg 1 for recv. */
#define RECV_TYPE_ARG1 int
/* Define to the type of arg 2 for recv. */
#define RECV_TYPE_ARG2 void *
/* Define to the type of arg 3 for recv. */
#define RECV_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recv. */
#define RECV_TYPE_ARG4 int
/* Define to the function return type for recv. */
#define RECV_TYPE_RETV ssize_t
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE
/* Define to the type qualifier of arg 2 for send. */
#define SEND_QUAL_ARG2
/* Define to the type of arg 1 for send. */
#define SEND_TYPE_ARG1 int
/* Define to the type of arg 2 for send. */
#define SEND_TYPE_ARG2 void *
/* Define to the type of arg 3 for send. */
#define SEND_TYPE_ARG3 size_t
/* Define to the type of arg 4 for send. */
#define SEND_TYPE_ARG4 int
/* Define to the function return type for send. */
#define SEND_TYPE_RETV ssize_t
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#define TIME_WITH_SYS_TIME
/* Define to disable non-blocking sockets. */
#undef USE_BLOCKING_SOCKETS
/* Define to avoid automatic inclusion of winsock.h */
#undef WIN32_LEAN_AND_MEAN
/* Type to use in place of in_addr_t when system does not provide it. */
#undef in_addr_t

View File

@ -0,0 +1,43 @@
#ifndef __CARES_BUILD_H
#define __CARES_BUILD_H
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
* files. We need to include some dependent headers that may be system specific
* for C-Ares */
#define CARES_HAVE_SYS_TYPES_H
#define CARES_HAVE_SYS_SOCKET_H
/* #undef CARES_HAVE_WINDOWS_H */
/* #undef CARES_HAVE_WS2TCPIP_H */
/* #undef CARES_HAVE_WINSOCK2_H */
/* #undef CARES_HAVE_WINDOWS_H */
#define CARES_HAVE_ARPA_NAMESER_H
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
#ifdef CARES_HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#ifdef CARES_HAVE_SYS_SOCKET_H
# include <sys/socket.h>
#endif
#ifdef CARES_HAVE_WINSOCK2_H
# include <winsock2.h>
#endif
#ifdef CARES_HAVE_WS2TCPIP_H
# include <ws2tcpip.h>
#endif
#ifdef CARES_HAVE_WINDOWS_H
# include <windows.h>
#endif
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
#endif /* __CARES_BUILD_H */

View File

@ -0,0 +1,432 @@
/* Generated from ares_config.h.cmake */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* define this if ares is built for a big endian system */
#undef ARES_BIG_ENDIAN
/* when building as static part of libcurl */
#undef BUILDING_LIBCURL
/* Defined for build that exposes internal static functions for testing. */
#undef CARES_EXPOSE_STATICS
/* Defined for build with symbol hiding. */
#undef CARES_SYMBOL_HIDING
/* Definition to make a library symbol externally visible. */
#undef CARES_SYMBOL_SCOPE_EXTERN
/* Use resolver library to configure cares */
/* #undef CARES_USE_LIBRESOLV */
/* if a /etc/inet dir is being used */
#undef ETC_INET
/* Define to the type of arg 2 for gethostname. */
#define GETHOSTNAME_TYPE_ARG2 size_t
/* Define to the type qualifier of arg 1 for getnameinfo. */
#define GETNAMEINFO_QUAL_ARG1
/* Define to the type of arg 1 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
/* Define to the type of arg 2 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG2 socklen_t
/* Define to the type of args 4 and 6 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG46 socklen_t
/* Define to the type of arg 7 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG7 int
/* Specifies the number of arguments to getservbyport_r */
#define GETSERVBYPORT_R_ARGS 6
/* Specifies the number of arguments to getservbyname_r */
#define GETSERVBYNAME_R_ARGS 6
/* Define to 1 if you have AF_INET6. */
#define HAVE_AF_INET6
/* Define to 1 if you have the <arpa/inet.h> header file. */
#define HAVE_ARPA_INET_H
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
#define HAVE_ARPA_NAMESER_COMPAT_H
/* Define to 1 if you have the <arpa/nameser.h> header file. */
#define HAVE_ARPA_NAMESER_H
/* Define to 1 if you have the <assert.h> header file. */
#define HAVE_ASSERT_H
/* Define to 1 if you have the `bitncmp' function. */
/* #undef HAVE_BITNCMP */
/* Define to 1 if bool is an available type. */
#define HAVE_BOOL_T
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
#define HAVE_CLOCK_GETTIME_MONOTONIC
/* Define to 1 if you have the closesocket function. */
/* #undef HAVE_CLOSESOCKET */
/* Define to 1 if you have the CloseSocket camel case function. */
/* #undef HAVE_CLOSESOCKET_CAMEL */
/* Define to 1 if you have the connect function. */
#define HAVE_CONNECT
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H
/* Define to 1 if you have the fcntl function. */
#define HAVE_FCNTL
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
#define HAVE_FCNTL_O_NONBLOCK
/* Define to 1 if you have the freeaddrinfo function. */
#define HAVE_FREEADDRINFO
/* Define to 1 if you have a working getaddrinfo function. */
#define HAVE_GETADDRINFO
/* Define to 1 if the getaddrinfo function is threadsafe. */
#define HAVE_GETADDRINFO_THREADSAFE
/* Define to 1 if you have the getenv function. */
#define HAVE_GETENV
/* Define to 1 if you have the gethostbyaddr function. */
#define HAVE_GETHOSTBYADDR
/* Define to 1 if you have the gethostbyname function. */
#define HAVE_GETHOSTBYNAME
/* Define to 1 if you have the gethostname function. */
#define HAVE_GETHOSTNAME
/* Define to 1 if you have the getnameinfo function. */
#define HAVE_GETNAMEINFO
/* Define to 1 if you have the getservbyport_r function. */
#define HAVE_GETSERVBYPORT_R
/* Define to 1 if you have the getservbyname_r function. */
#define HAVE_GETSERVBYNAME_R
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY
/* Define to 1 if you have the `if_indextoname' function. */
#define HAVE_IF_INDEXTONAME
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
/* #undef HAVE_INET_NET_PTON */
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
#define HAVE_INET_NTOP
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
#define HAVE_INET_PTON
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H
/* Define to 1 if you have the ioctl function. */
#define HAVE_IOCTL
/* Define to 1 if you have the ioctlsocket function. */
/* #undef HAVE_IOCTLSOCKET */
/* Define to 1 if you have the IoctlSocket camel case function. */
/* #undef HAVE_IOCTLSOCKET_CAMEL */
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
*/
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
/* Define to 1 if you have a working ioctl FIONBIO function. */
#define HAVE_IOCTL_FIONBIO
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
#define HAVE_IOCTL_SIOCGIFADDR
/* Define to 1 if you have the `resolve' library (-lresolve). */
/* #undef HAVE_LIBRESOLV */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H
/* if your compiler supports LL */
#define HAVE_LL
/* Define to 1 if the compiler supports the 'long long' data type. */
#define HAVE_LONGLONG
/* Define to 1 if you have the malloc.h header file. */
/* #undef HAVE_MALLOC_H */
/* Define to 1 if you have the memory.h header file. */
#define HAVE_MEMORY_H
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
#define HAVE_MSG_NOSIGNAL
/* Define to 1 if you have the <netdb.h> header file. */
#define HAVE_NETDB_H
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#define HAVE_NETINET_TCP_H
/* Define to 1 if you have the <net/if.h> header file. */
#define HAVE_NET_IF_H
/* Define to 1 if you have PF_INET6. */
#define HAVE_PF_INET6
/* Define to 1 if you have the recv function. */
#define HAVE_RECV
/* Define to 1 if you have the recvfrom function. */
#define HAVE_RECVFROM
/* Define to 1 if you have the send function. */
#define HAVE_SEND
/* Define to 1 if you have the setsockopt function. */
#define HAVE_SETSOCKOPT
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H
/* Define to 1 if sig_atomic_t is an available typedef. */
#define HAVE_SIG_ATOMIC_T
/* Define to 1 if sig_atomic_t is already defined as volatile. */
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
/* Define to 1 if you have the socket function. */
#define HAVE_SOCKET
/* Define to 1 if you have the <socket.h> header file. */
/* #undef HAVE_SOCKET_H */
/* Define to 1 if you have the <stdbool.h> header file. */
#define HAVE_STDBOOL_H
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H
/* Define to 1 if you have the strcasecmp function. */
#define HAVE_STRCASECMP
/* Define to 1 if you have the strcmpi function. */
/* #undef HAVE_STRCMPI */
/* Define to 1 if you have the strdup function. */
#define HAVE_STRDUP
/* Define to 1 if you have the stricmp function. */
/* #undef HAVE_STRICMP */
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H
/* Define to 1 if you have the strncasecmp function. */
#define HAVE_STRNCASECMP
/* Define to 1 if you have the strncmpi function. */
/* #undef HAVE_STRNCMPI */
/* Define to 1 if you have the strnicmp function. */
/* #undef HAVE_STRNICMP */
/* Define to 1 if you have the <stropts.h> header file. */
/* #undef HAVE_STROPTS_H */
/* Define to 1 if you have struct addrinfo. */
#define HAVE_STRUCT_ADDRINFO
/* Define to 1 if you have struct in6_addr. */
#define HAVE_STRUCT_IN6_ADDR
/* Define to 1 if you have struct sockaddr_in6. */
#define HAVE_STRUCT_SOCKADDR_IN6
/* if struct sockaddr_storage is defined */
#define HAVE_STRUCT_SOCKADDR_STORAGE
/* Define to 1 if you have the timeval struct. */
#define HAVE_STRUCT_TIMEVAL
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/uio.h> header file. */
#define HAVE_SYS_UIO_H
/* Define to 1 if you have the <time.h> header file. */
#define HAVE_TIME_H
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the windows.h header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the winsock2.h header file. */
/* #undef HAVE_WINSOCK2_H */
/* Define to 1 if you have the winsock.h header file. */
/* #undef HAVE_WINSOCK_H */
/* Define to 1 if you have the writev function. */
#define HAVE_WRITEV
/* Define to 1 if you have the ws2tcpip.h header file. */
/* #undef HAVE_WS2TCPIP_H */
/* Define to 1 if you have the __system_property_get function */
#define HAVE___SYSTEM_PROPERTY_GET
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
/* #undef NEED_MALLOC_H */
/* Define to 1 if you need the memory.h header file even with stdlib.h */
/* #undef NEED_MEMORY_H */
/* a suitable file/device to read random data from */
#define CARES_RANDOM_FILE "/dev/urandom"
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
#define RECVFROM_QUAL_ARG5
/* Define to the type of arg 1 for recvfrom. */
#define RECVFROM_TYPE_ARG1 int
/* Define to the type pointed by arg 2 for recvfrom. */
#define RECVFROM_TYPE_ARG2 void *
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
#define RECVFROM_TYPE_ARG2_IS_VOID 0
/* Define to the type of arg 3 for recvfrom. */
#define RECVFROM_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recvfrom. */
#define RECVFROM_TYPE_ARG4 int
/* Define to the type pointed by arg 5 for recvfrom. */
#define RECVFROM_TYPE_ARG5 struct sockaddr *
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
#define RECVFROM_TYPE_ARG5_IS_VOID 0
/* Define to the type pointed by arg 6 for recvfrom. */
#define RECVFROM_TYPE_ARG6 socklen_t *
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
#define RECVFROM_TYPE_ARG6_IS_VOID 0
/* Define to the function return type for recvfrom. */
#define RECVFROM_TYPE_RETV ssize_t
/* Define to the type of arg 1 for recv. */
#define RECV_TYPE_ARG1 int
/* Define to the type of arg 2 for recv. */
#define RECV_TYPE_ARG2 void *
/* Define to the type of arg 3 for recv. */
#define RECV_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recv. */
#define RECV_TYPE_ARG4 int
/* Define to the function return type for recv. */
#define RECV_TYPE_RETV ssize_t
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE
/* Define to the type qualifier of arg 2 for send. */
#define SEND_QUAL_ARG2
/* Define to the type of arg 1 for send. */
#define SEND_TYPE_ARG1 int
/* Define to the type of arg 2 for send. */
#define SEND_TYPE_ARG2 void *
/* Define to the type of arg 3 for send. */
#define SEND_TYPE_ARG3 size_t
/* Define to the type of arg 4 for send. */
#define SEND_TYPE_ARG4 int
/* Define to the function return type for send. */
#define SEND_TYPE_RETV ssize_t
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#define TIME_WITH_SYS_TIME
/* Define to disable non-blocking sockets. */
#undef USE_BLOCKING_SOCKETS
/* Define to avoid automatic inclusion of winsock.h */
#undef WIN32_LEAN_AND_MEAN
/* Type to use in place of in_addr_t when system does not provide it. */
#undef in_addr_t

View File

@ -0,0 +1,43 @@
#ifndef __CARES_BUILD_H
#define __CARES_BUILD_H
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
* files. We need to include some dependent headers that may be system specific
* for C-Ares */
#define CARES_HAVE_SYS_TYPES_H
#define CARES_HAVE_SYS_SOCKET_H
/* #undef CARES_HAVE_WINDOWS_H */
/* #undef CARES_HAVE_WS2TCPIP_H */
/* #undef CARES_HAVE_WINSOCK2_H */
/* #undef CARES_HAVE_WINDOWS_H */
#define CARES_HAVE_ARPA_NAMESER_H
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
#ifdef CARES_HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#ifdef CARES_HAVE_SYS_SOCKET_H
# include <sys/socket.h>
#endif
#ifdef CARES_HAVE_WINSOCK2_H
# include <winsock2.h>
#endif
#ifdef CARES_HAVE_WS2TCPIP_H
# include <ws2tcpip.h>
#endif
#ifdef CARES_HAVE_WINDOWS_H
# include <windows.h>
#endif
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
#endif /* __CARES_BUILD_H */

View File

@ -0,0 +1,432 @@
/* Generated from ares_config.h.cmake */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* define this if ares is built for a big endian system */
#undef ARES_BIG_ENDIAN
/* when building as static part of libcurl */
#undef BUILDING_LIBCURL
/* Defined for build that exposes internal static functions for testing. */
#undef CARES_EXPOSE_STATICS
/* Defined for build with symbol hiding. */
#undef CARES_SYMBOL_HIDING
/* Definition to make a library symbol externally visible. */
#undef CARES_SYMBOL_SCOPE_EXTERN
/* Use resolver library to configure cares */
/* #undef CARES_USE_LIBRESOLV */
/* if a /etc/inet dir is being used */
#undef ETC_INET
/* Define to the type of arg 2 for gethostname. */
#define GETHOSTNAME_TYPE_ARG2 size_t
/* Define to the type qualifier of arg 1 for getnameinfo. */
#define GETNAMEINFO_QUAL_ARG1
/* Define to the type of arg 1 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
/* Define to the type of arg 2 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG2 socklen_t
/* Define to the type of args 4 and 6 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG46 socklen_t
/* Define to the type of arg 7 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG7 int
/* Specifies the number of arguments to getservbyport_r */
#define GETSERVBYPORT_R_ARGS 6
/* Specifies the number of arguments to getservbyname_r */
#define GETSERVBYNAME_R_ARGS 6
/* Define to 1 if you have AF_INET6. */
#define HAVE_AF_INET6
/* Define to 1 if you have the <arpa/inet.h> header file. */
#define HAVE_ARPA_INET_H
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
#define HAVE_ARPA_NAMESER_COMPAT_H
/* Define to 1 if you have the <arpa/nameser.h> header file. */
#define HAVE_ARPA_NAMESER_H
/* Define to 1 if you have the <assert.h> header file. */
#define HAVE_ASSERT_H
/* Define to 1 if you have the `bitncmp' function. */
/* #undef HAVE_BITNCMP */
/* Define to 1 if bool is an available type. */
#define HAVE_BOOL_T
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
#define HAVE_CLOCK_GETTIME_MONOTONIC
/* Define to 1 if you have the closesocket function. */
/* #undef HAVE_CLOSESOCKET */
/* Define to 1 if you have the CloseSocket camel case function. */
/* #undef HAVE_CLOSESOCKET_CAMEL */
/* Define to 1 if you have the connect function. */
#define HAVE_CONNECT
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H
/* Define to 1 if you have the fcntl function. */
#define HAVE_FCNTL
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
#define HAVE_FCNTL_O_NONBLOCK
/* Define to 1 if you have the freeaddrinfo function. */
#define HAVE_FREEADDRINFO
/* Define to 1 if you have a working getaddrinfo function. */
#define HAVE_GETADDRINFO
/* Define to 1 if the getaddrinfo function is threadsafe. */
/* #undef HAVE_GETADDRINFO_THREADSAFE */
/* Define to 1 if you have the getenv function. */
#define HAVE_GETENV
/* Define to 1 if you have the gethostbyaddr function. */
#define HAVE_GETHOSTBYADDR
/* Define to 1 if you have the gethostbyname function. */
#define HAVE_GETHOSTBYNAME
/* Define to 1 if you have the gethostname function. */
#define HAVE_GETHOSTNAME
/* Define to 1 if you have the getnameinfo function. */
#define HAVE_GETNAMEINFO
/* Define to 1 if you have the getservbyport_r function. */
#define HAVE_GETSERVBYPORT_R
/* Define to 1 if you have the getservbyname_r function. */
#define HAVE_GETSERVBYNAME_R
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY
/* Define to 1 if you have the `if_indextoname' function. */
#define HAVE_IF_INDEXTONAME
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
/* #undef HAVE_INET_NET_PTON */
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
#define HAVE_INET_NTOP
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
#define HAVE_INET_PTON
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H
/* Define to 1 if you have the ioctl function. */
#define HAVE_IOCTL
/* Define to 1 if you have the ioctlsocket function. */
/* #undef HAVE_IOCTLSOCKET */
/* Define to 1 if you have the IoctlSocket camel case function. */
/* #undef HAVE_IOCTLSOCKET_CAMEL */
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
*/
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
/* Define to 1 if you have a working ioctl FIONBIO function. */
#define HAVE_IOCTL_FIONBIO
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
#define HAVE_IOCTL_SIOCGIFADDR
/* Define to 1 if you have the `resolve' library (-lresolve). */
/* #undef HAVE_LIBRESOLV */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H
/* if your compiler supports LL */
#define HAVE_LL
/* Define to 1 if the compiler supports the 'long long' data type. */
#define HAVE_LONGLONG
/* Define to 1 if you have the malloc.h header file. */
#define HAVE_MALLOC_H
/* Define to 1 if you have the memory.h header file. */
#define HAVE_MEMORY_H
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
#define HAVE_MSG_NOSIGNAL
/* Define to 1 if you have the <netdb.h> header file. */
#define HAVE_NETDB_H
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#define HAVE_NETINET_TCP_H
/* Define to 1 if you have the <net/if.h> header file. */
#define HAVE_NET_IF_H
/* Define to 1 if you have PF_INET6. */
#define HAVE_PF_INET6
/* Define to 1 if you have the recv function. */
#define HAVE_RECV
/* Define to 1 if you have the recvfrom function. */
#define HAVE_RECVFROM
/* Define to 1 if you have the send function. */
#define HAVE_SEND
/* Define to 1 if you have the setsockopt function. */
#define HAVE_SETSOCKOPT
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H
/* Define to 1 if sig_atomic_t is an available typedef. */
#define HAVE_SIG_ATOMIC_T
/* Define to 1 if sig_atomic_t is already defined as volatile. */
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
/* Define to 1 if you have the socket function. */
#define HAVE_SOCKET
/* Define to 1 if you have the <socket.h> header file. */
/* #undef HAVE_SOCKET_H */
/* Define to 1 if you have the <stdbool.h> header file. */
#define HAVE_STDBOOL_H
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H
/* Define to 1 if you have the strcasecmp function. */
#define HAVE_STRCASECMP
/* Define to 1 if you have the strcmpi function. */
/* #undef HAVE_STRCMPI */
/* Define to 1 if you have the strdup function. */
#define HAVE_STRDUP
/* Define to 1 if you have the stricmp function. */
/* #undef HAVE_STRICMP */
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H
/* Define to 1 if you have the strncasecmp function. */
#define HAVE_STRNCASECMP
/* Define to 1 if you have the strncmpi function. */
/* #undef HAVE_STRNCMPI */
/* Define to 1 if you have the strnicmp function. */
/* #undef HAVE_STRNICMP */
/* Define to 1 if you have the <stropts.h> header file. */
#define HAVE_STROPTS_H
/* Define to 1 if you have struct addrinfo. */
#define HAVE_STRUCT_ADDRINFO
/* Define to 1 if you have struct in6_addr. */
#define HAVE_STRUCT_IN6_ADDR
/* Define to 1 if you have struct sockaddr_in6. */
#define HAVE_STRUCT_SOCKADDR_IN6
/* if struct sockaddr_storage is defined */
#define HAVE_STRUCT_SOCKADDR_STORAGE
/* Define to 1 if you have the timeval struct. */
#define HAVE_STRUCT_TIMEVAL
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/uio.h> header file. */
#define HAVE_SYS_UIO_H
/* Define to 1 if you have the <time.h> header file. */
#define HAVE_TIME_H
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the windows.h header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the winsock2.h header file. */
/* #undef HAVE_WINSOCK2_H */
/* Define to 1 if you have the winsock.h header file. */
/* #undef HAVE_WINSOCK_H */
/* Define to 1 if you have the writev function. */
#define HAVE_WRITEV
/* Define to 1 if you have the ws2tcpip.h header file. */
/* #undef HAVE_WS2TCPIP_H */
/* Define to 1 if you have the __system_property_get function */
#define HAVE___SYSTEM_PROPERTY_GET
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
/* #undef NEED_MALLOC_H */
/* Define to 1 if you need the memory.h header file even with stdlib.h */
/* #undef NEED_MEMORY_H */
/* a suitable file/device to read random data from */
#define CARES_RANDOM_FILE "/dev/urandom"
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
#define RECVFROM_QUAL_ARG5
/* Define to the type of arg 1 for recvfrom. */
#define RECVFROM_TYPE_ARG1 int
/* Define to the type pointed by arg 2 for recvfrom. */
#define RECVFROM_TYPE_ARG2 void *
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
#define RECVFROM_TYPE_ARG2_IS_VOID 0
/* Define to the type of arg 3 for recvfrom. */
#define RECVFROM_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recvfrom. */
#define RECVFROM_TYPE_ARG4 int
/* Define to the type pointed by arg 5 for recvfrom. */
#define RECVFROM_TYPE_ARG5 struct sockaddr *
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
#define RECVFROM_TYPE_ARG5_IS_VOID 0
/* Define to the type pointed by arg 6 for recvfrom. */
#define RECVFROM_TYPE_ARG6 socklen_t *
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
#define RECVFROM_TYPE_ARG6_IS_VOID 0
/* Define to the function return type for recvfrom. */
#define RECVFROM_TYPE_RETV ssize_t
/* Define to the type of arg 1 for recv. */
#define RECV_TYPE_ARG1 int
/* Define to the type of arg 2 for recv. */
#define RECV_TYPE_ARG2 void *
/* Define to the type of arg 3 for recv. */
#define RECV_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recv. */
#define RECV_TYPE_ARG4 int
/* Define to the function return type for recv. */
#define RECV_TYPE_RETV ssize_t
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE
/* Define to the type qualifier of arg 2 for send. */
#define SEND_QUAL_ARG2
/* Define to the type of arg 1 for send. */
#define SEND_TYPE_ARG1 int
/* Define to the type of arg 2 for send. */
#define SEND_TYPE_ARG2 void *
/* Define to the type of arg 3 for send. */
#define SEND_TYPE_ARG3 size_t
/* Define to the type of arg 4 for send. */
#define SEND_TYPE_ARG4 int
/* Define to the function return type for send. */
#define SEND_TYPE_RETV ssize_t
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#define TIME_WITH_SYS_TIME
/* Define to disable non-blocking sockets. */
#undef USE_BLOCKING_SOCKETS
/* Define to avoid automatic inclusion of winsock.h */
#undef WIN32_LEAN_AND_MEAN
/* Type to use in place of in_addr_t when system does not provide it. */
#undef in_addr_t

View File

@ -415,7 +415,7 @@
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */

View File

@ -440,7 +440,9 @@
#define HAVE_STRERROR 1
/* Define to 1 if you have the `strerror_r' function. */
#ifndef USE_MUSL
#define HAVE_STRERROR_R 1
#endif
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1

2
contrib/libcpuid vendored

@ -1 +1 @@
Subproject commit 8db3b8d2d32d22437f063ce692a1b9bb15e42d18
Subproject commit 503083acb77edf9fbce22a05826307dff2ce96e6

View File

@ -63,6 +63,13 @@ target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include")
target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs")
# NOTE: this is a dirty hack to avoid and instead pg_config.h should be shipped
# for different OS'es like for jemalloc, not one generic for all OS'es like
# now.
if (OS_DARWIN OR OS_FREEBSD OR USE_MUSL)
target_compile_definitions(_libpq PRIVATE -DSTRERROR_R_INT=1)
endif()
target_link_libraries (_libpq PRIVATE OpenSSL::SSL)
add_library(ch_contrib::libpq ALIAS _libpq)

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit ff32b4e9eeafd0b276f010ee969179e4e9e6d0b2
Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f

2
contrib/llvm vendored

@ -1 +1 @@
Subproject commit 20607e61728e97c969e536644c3c0c1bb1a50672
Subproject commit 0db5bf5bd2452cd8f1283a1fcdc04845af705bfc

@ -1 +1 @@
Subproject commit f431047ac8da13179c488018dddf1c0d0771a997
Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit 73695e419c27af7fe2a099c7aa57931cc02aea5d
Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2

View File

@ -304,7 +304,7 @@ target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src")
# Please regenerate these files if you update vectorscan.
if (ARCH_AMD64)
target_include_directories (_vectorscan PRIVATE x86_64)
target_include_directories (_vectorscan PRIVATE amd64)
endif ()
if (ARCH_AARCH64)

View File

@ -67,24 +67,5 @@ ENV GOCACHE=/workdir/
RUN mkdir /workdir && chmod 777 /workdir
WORKDIR /workdir
# NOTE: thread sanitizer is broken in clang-14, we have to build it with clang-15
# https://github.com/ClickHouse/ClickHouse/pull/39450
# https://github.com/google/sanitizers/issues/1540
# https://github.com/google/sanitizers/issues/1552
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-15 main" >> \
/etc/apt/sources.list.d/clang.list \
&& apt-get update \
&& apt-get install \
clang-15 \
llvm-15 \
clang-tidy-15 \
--yes --no-install-recommends \
&& apt-get clean
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-15 /usr/bin/llvm-symbolizer
COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -339,17 +339,16 @@ if __name__ == "__main__":
parser.add_argument(
"--compiler",
choices=(
"clang-15", # For TSAN builds, see #39450
"clang-14",
"clang-14-darwin",
"clang-14-darwin-aarch64",
"clang-14-aarch64",
"clang-14-ppc64le",
"clang-14-amd64sse2",
"clang-14-freebsd",
"clang-15",
"clang-15-darwin",
"clang-15-darwin-aarch64",
"clang-15-aarch64",
"clang-15-ppc64le",
"clang-15-amd64sse2",
"clang-15-freebsd",
"gcc-11",
),
default="clang-14",
default="clang-15",
help="a compiler to use",
)
parser.add_argument(

View File

@ -16,11 +16,10 @@ RUN apt-get update \
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
# TSAN will flush shadow memory when reaching this limit.
# It may cause false-negatives, but it's better than OOM.
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment; \
echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment; \
ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080'

View File

@ -8,16 +8,41 @@ FROM clickhouse/binary-builder:$FROM_TAG
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-14 libllvm14 libclang-14-dev libmlir-14-dev
RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev
# libclang-15-dev does not contain proper symlink:
#
# This is what cmake will search for:
#
# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1
# /usr/lib/x86_64-linux-gnu/libclang-15.so.1
#
# This is what exists:
#
# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15*
# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1
# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0
# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0
#
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
*) exit 1 ;; \
esac \
&& ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
# TODO: remove branch in a few weeks after merge, e.g. in May or June 2022
RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser --branch llvm-14 \
#
# FIXME: update location of a repo
RUN git clone https://github.com/azat/woboq_codebrowser --branch llvm-15 \
&& cd woboq_codebrowser \
&& cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-14 -DCMAKE_C_COMPILER=clang-14 \
&& make -j \
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \
&& ninja \
&& cd .. \
&& rm -rf woboq_codebrowser
@ -32,7 +57,7 @@ ENV SHA=nosha
ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-14 -DCMAKE_C_COMPILER=/usr/bin/clang-14 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\

View File

@ -19,7 +19,7 @@ stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir"
repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_unsplitted_disable_False_binary"}
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
function clone

View File

@ -2,7 +2,7 @@
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -61,7 +61,7 @@ function configure
cp -rv right/config left ||:
# Start a temporary server to rename the tables
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
set -m # Spawn temporary in its own process groups
@ -88,7 +88,7 @@ function configure
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
# Make copies of the original db for both servers. Use hardlinks instead
@ -106,7 +106,7 @@ function configure
function restart
{
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
# Change the jemalloc settings here.
@ -1400,7 +1400,7 @@ case "$stage" in
while env kill -- -$watchdog_pid ; do sleep 1; done
# Stop the servers to free memory for the subsequent query analysis.
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo Servers stopped.
;&
"analyze_queries")

View File

@ -5,7 +5,7 @@ FROM ubuntu:20.04
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
RUN apt-get update \
&& apt-get install \
@ -56,6 +56,8 @@ RUN apt-get update \
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
ARG CCACHE_VERSION=4.6.1
RUN mkdir /tmp/ccache \

View File

@ -134,6 +134,13 @@ Example of configuration for versions later or equal to 22.8:
<max_size>10000000</max_size>
</cache>
</disks>
<policies>
<volumes>
<main>
<disk>cache</disk>
</main>
</volumes>
<policies>
</storage_configuration>
```
@ -151,6 +158,13 @@ Example of configuration for versions earlier than 22.8:
<data_cache_size>10000000</data_cache_size>
</s3>
</disks>
<policies>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
<policies>
</storage_configuration>
```

View File

@ -2,10 +2,9 @@
slug: /en/operations/troubleshooting
sidebar_position: 46
sidebar_label: Troubleshooting
title: Troubleshooting
---
# Troubleshooting
- [Installation](#troubleshooting-installation-errors)
- [Connecting to the server](#troubleshooting-accepts-no-connections)
- [Query processing](#troubleshooting-does-not-process-queries)

View File

@ -1227,6 +1227,8 @@ Result:
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
Alias: `fromUnixTimestamp`.
**Example:**
Query:

View File

@ -1823,6 +1823,36 @@ Result:
Evaluate external model.
Accepts a model name and model arguments. Returns Float64.
## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n)
Evaluate external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learing.
Accepts a path to a catboost model and model arguments (features). Returns Float64.
``` sql
SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction
FROM data_table
```
**Prerequisites**
1. Build the catboost evaluation library
Before evaluating catboost models, the `libcatboostmodel.<so|dylib>` library must be made available. See [CatBoost documentation](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html) how to compile it.
Next, specify the path to `libcatboostmodel.<so|dylib>` in the clickhouse configuration:
``` xml
<clickhouse>
...
<catboost_lib_path>/path/to/libcatboostmodel.so</catboost_lib_path>
...
</clickhouse>
```
2. Train a catboost model using libcatboost
See [Training and applying models](https://catboost.ai/docs/features/training.html#training) for how to train catboost models from a training data set.
## throwIf(x\[, message\[, error_code\]\])
Throw an exception if the argument is non zero.

View File

@ -30,7 +30,12 @@ SELECT name, status FROM system.dictionaries;
## RELOAD MODELS
Reloads all [CatBoost](../../guides/developer/apply-catboost-model.md) models if the configuration was updated without restarting the server.
:::note
This statement and `SYSTEM RELOAD MODEL` merely unload catboost models from the clickhouse-library-bridge. The function `catboostEvaluate()`
loads a model upon first access if it is not loaded yet.
:::
Unloads all CatBoost models.
**Syntax**
@ -40,12 +45,12 @@ SYSTEM RELOAD MODELS [ON CLUSTER cluster_name]
## RELOAD MODEL
Completely reloads a CatBoost model `model_name` if the configuration was updated without restarting the server.
Unloads a CatBoost model at `model_path`.
**Syntax**
```sql
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_name>
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_path>
```
## RELOAD FUNCTIONS

View File

@ -13,7 +13,7 @@ Creates a table from a file. This table function is similar to [url](../../sql-r
**Syntax**
``` sql
file(path, format, structure)
file(path [,format] [,structure])
```
**Parameters**

View File

@ -11,7 +11,7 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws.
**Syntax**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
```
**Arguments**

View File

@ -10,7 +10,7 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel
**Syntax**
``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
```
**Arguments**

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Syntax**
``` sql
url(URL, format, structure)
url(URL [,format] [,structure])
```
**Parameters**

View File

@ -155,7 +155,6 @@ getting_started/index.md getting-started/index.md
getting_started/install.md getting-started/install.md
getting_started/playground.md getting-started/playground.md
getting_started/tutorial.md getting-started/tutorial.md
guides/apply_catboost_model.md guides/apply-catboost-model.md
images/column_oriented.gif images/column-oriented.gif
images/row_oriented.gif images/row-oriented.gif
interfaces/http_interface.md interfaces/http.md

View File

@ -1,241 +0,0 @@
---
slug: /ru/guides/apply-catboost-model
sidebar_position: 41
sidebar_label: "Применение модели CatBoost в ClickHouse"
---
# Применение модели CatBoost в ClickHouse {#applying-catboost-model-in-clickhouse}
[CatBoost](https://catboost.ai) — открытая программная библиотека разработанная компанией [Яндекс](https://yandex.ru/company/) для машинного обучения, которая использует схему градиентного бустинга.
С помощью этой инструкции вы научитесь применять предобученные модели в ClickHouse: в результате вы запустите вывод модели из SQL.
Чтобы применить модель CatBoost в ClickHouse:
1. [Создайте таблицу](#create-table).
2. [Вставьте данные в таблицу](#insert-data-to-table).
3. [Интегрируйте CatBoost в ClickHouse](#integrate-catboost-into-clickhouse) (Опциональный шаг).
4. [Запустите вывод модели из SQL](#run-model-inference).
Подробнее об обучении моделей в CatBoost, см. [Обучение и применение моделей](https://catboost.ai/docs/features/training.html#training).
Вы можете перегрузить модели CatBoost, если их конфигурация была обновлена, без перезагрузки сервера. Для этого используйте системные запросы [RELOAD MODEL](../sql-reference/statements/system.md#query_language-system-reload-model) и [RELOAD MODELS](../sql-reference/statements/system.md#query_language-system-reload-models).
## Перед началом работы {#prerequisites}
Если у вас еще нет [Docker](https://docs.docker.com/install/), установите его.
:::note "Примечание"
[Docker](https://www.docker.com) это программная платформа для создания контейнеров, которые изолируют установку CatBoost и ClickHouse от остальной части системы.
:::
Перед применением модели CatBoost:
**1.** Скачайте [Docker-образ](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) из реестра:
``` bash
$ docker pull yandex/tutorial-catboost-clickhouse
```
Данный Docker-образ содержит все необходимое для запуска CatBoost и ClickHouse: код, среду выполнения, библиотеки, переменные окружения и файлы конфигурации.
**2.** Проверьте, что Docker-образ успешно скачался:
``` bash
$ docker image ls
REPOSITORY TAG IMAGE ID CREATED SIZE
yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB
```
**3.** Запустите Docker-контейнер основанный на данном образе:
``` bash
$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse
```
## 1. Создайте таблицу {#create-table}
Чтобы создать таблицу для обучающей выборки:
**1.** Запустите клиент ClickHouse:
``` bash
$ clickhouse client
```
:::note "Примечание"
Сервер ClickHouse уже запущен внутри Docker-контейнера.
:::
**2.** Создайте таблицу в ClickHouse с помощью следующей команды:
``` sql
:) CREATE TABLE amazon_train
(
date Date MATERIALIZED today(),
ACTION UInt8,
RESOURCE UInt32,
MGR_ID UInt32,
ROLE_ROLLUP_1 UInt32,
ROLE_ROLLUP_2 UInt32,
ROLE_DEPTNAME UInt32,
ROLE_TITLE UInt32,
ROLE_FAMILY_DESC UInt32,
ROLE_FAMILY UInt32,
ROLE_CODE UInt32
)
ENGINE = MergeTree ORDER BY date
```
**3.** Выйдите из клиента ClickHouse:
``` sql
:) exit
```
## 2. Вставьте данные в таблицу {#insert-data-to-table}
Чтобы вставить данные:
**1.** Выполните следующую команду:
``` bash
$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv
```
**2.** Запустите клиент ClickHouse:
``` bash
$ clickhouse client
```
**3.** Проверьте, что данные успешно загрузились:
``` sql
:) SELECT count() FROM amazon_train
SELECT count()
FROM amazon_train
+-count()-+
| 65538 |
+---------+
```
## 3. Интегрируйте CatBoost в ClickHouse {#integrate-catboost-into-clickhouse}
:::note "Примечание"
**Опциональный шаг.** Docker-образ содержит все необходимое для запуска CatBoost и ClickHouse.
:::
Чтобы интегрировать CatBoost в ClickHouse:
**1.** Создайте библиотеку для оценки модели.
Наиболее быстрый способ оценить модель CatBoost — это скомпилировать библиотеку `libcatboostmodel.<so|dll|dylib>`. Подробнее о том, как скомпилировать библиотеку, читайте в [документации CatBoost](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
**2.** Создайте в любом месте новую директорию с произвольным названием, например `data` и поместите в нее созданную библиотеку. Docker-образ уже содержит библиотеку `data/libcatboostmodel.so`.
**3.** Создайте в любом месте новую директорию для конфигурации модели с произвольным названием, например `models`.
**4.** Создайте файл конфигурации модели с произвольным названием, например `models/amazon_model.xml`.
**5.** Опишите конфигурацию модели:
``` xml
<models>
<model>
<!-- Тип модели. В настоящий момент ClickHouse предоставляет только модель catboost. -->
<type>catboost</type>
<!-- Имя модели. -->
<name>amazon</name>
<!-- Путь к обученной модели. -->
<path>/home/catboost/tutorial/catboost_model.bin</path>
<!-- Интервал обновления. -->
<lifetime>0</lifetime>
</model>
</models>
```
**6.** Добавьте в конфигурацию ClickHouse путь к CatBoost и конфигурации модели:
``` xml
<!-- Файл etc/clickhouse-server/config.d/models_config.xml. -->
<catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
<models_config>/home/catboost/models/*_model.xml</models_config>
```
:::note "Примечание"
Вы можете позднее изменить путь к конфигурации модели CatBoost без перезагрузки сервера.
:::
## 4. Запустите вывод модели из SQL {#run-model-inference}
Для тестирования модели запустите клиент ClickHouse `$ clickhouse client`.
Проверьте, что модель работает:
``` sql
:) SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) > 0 AS prediction,
ACTION AS target
FROM amazon_train
LIMIT 10
```
:::note "Примечание"
Функция [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) возвращает кортежи (tuple) с исходными прогнозами по классам для моделей с несколькими классами.
:::
Спрогнозируйте вероятность:
``` sql
:) SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) AS prediction,
1. / (1 + exp(-prediction)) AS probability,
ACTION AS target
FROM amazon_train
LIMIT 10
```
:::note "Примечание"
Подробнее про функцию [exp()](../sql-reference/functions/math-functions.md).
:::
Посчитайте логистическую функцию потерь (LogLoss) на всей выборке:
``` sql
:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss
FROM
(
SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) AS prediction,
1. / (1. + exp(-prediction)) AS prob,
ACTION AS tg
FROM amazon_train
)
```
:::note "Примечание"
Подробнее про функции [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg), [log()](../sql-reference/functions/math-functions.md).
:::

View File

@ -7,5 +7,3 @@ sidebar_label: "Руководства"
# Руководства {#rukovodstva}
Подробные пошаговые инструкции, которые помогут вам решать различные задачи с помощью ClickHouse.
- [Применение модели CatBoost в ClickHouse](apply-catboost-model.md)

View File

@ -29,7 +29,12 @@ SELECT name, status FROM system.dictionaries;
## RELOAD MODELS {#query_language-system-reload-models}
Перегружает все модели [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-model-in-clickhouse), если их конфигурация была обновлена, без перезагрузки сервера.
:::note
Это утверждение и `SYSTEM RELOAD MODEL` просто выгружают модели catboost из clickhouse-library-bridge. Функция `catboostEvaluate()`
загружает модель при первом обращении, если она еще не загружена.
:::
Разгрузите все модели CatBoost.
**Синтаксис**
@ -39,12 +44,12 @@ SYSTEM RELOAD MODELS
## RELOAD MODEL {#query_language-system-reload-model}
Полностью перегружает модель [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-model-in-clickhouse) `model_name`, если ее конфигурация была обновлена, без перезагрузки сервера.
Выгружает модель CatBoost по адресу одель_путь`.
**Синтаксис**
```sql
SYSTEM RELOAD MODEL <model_name>
SYSTEM RELOAD MODEL <model_path>
```
## RELOAD FUNCTIONS {#query_language-system-reload-functions}

View File

@ -13,7 +13,7 @@ sidebar_label: file
**Синтаксис**
``` sql
file(path, format, structure)
file(path [,format] [,structure])
```
**Параметры**

View File

@ -11,7 +11,7 @@ sidebar_label: s3
**Синтаксис**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
```
**Aргументы**

View File

@ -11,7 +11,7 @@ sidebar_label: s3Cluster
**Синтаксис**
``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
```
**Аргументы**

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Синтаксис**
``` sql
url(URL, format, structure)
url(URL [,format] [,structure])
```
**Параметры**

View File

@ -1,244 +0,0 @@
---
slug: /zh/guides/apply-catboost-model
sidebar_position: 41
sidebar_label: "\u5E94\u7528CatBoost\u6A21\u578B"
---
# 在ClickHouse中应用Catboost模型 {#applying-catboost-model-in-clickhouse}
[CatBoost](https://catboost.ai) 是一个由[Yandex](https://yandex.com/company/)开发的开源免费机器学习库。
通过本篇文档您将学会如何用SQL语句调用已经存放在Clickhouse中的预训练模型来预测数据。
为了在ClickHouse中应用CatBoost模型需要进行如下步骤
1. [创建数据表](#create-table).
2. [将数据插入到表中](#insert-data-to-table).
3. [将CatBoost集成到ClickHouse中](#integrate-catboost-into-clickhouse) (可跳过)。
4. [从SQL运行模型推断](#run-model-inference).
有关训练CatBoost模型的详细信息请参阅 [训练和模型应用](https://catboost.ai/docs/features/training.html#training).
您可以通过[RELOAD MODEL](https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-reload-model)与[RELOAD MODELS](https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-reload-models)语句来重载CatBoost模型。
## 先决条件 {#prerequisites}
请先安装 [Docker](https://docs.docker.com/install/)。
!!! note "注"
[Docker](https://www.docker.com) 是一个软件平台用户可以用Docker来创建独立于已有系统并集成了CatBoost和ClickHouse的容器。
在应用CatBoost模型之前:
**1.** 从容器仓库拉取示例docker镜像 (https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) :
``` bash
$ docker pull yandex/tutorial-catboost-clickhouse
```
此示例Docker镜像包含运行CatBoost和ClickHouse所需的所有内容代码、运行时、库、环境变量和配置文件。
**2.** 确保已成功拉取Docker镜像:
``` bash
$ docker image ls
REPOSITORY TAG IMAGE ID CREATED SIZE
yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB
```
**3.** 基于此镜像启动一个Docker容器:
``` bash
$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse
```
## 1. 创建数据表 {#create-table}
为训练样本创建ClickHouse表:
**1.** 在交互模式下启动ClickHouse控制台客户端:
``` bash
$ clickhouse client
```
!!! note "注"
ClickHouse服务器已经在Docker容器内运行。
**2.** 使用以下命令创建表:
``` sql
:) CREATE TABLE amazon_train
(
date Date MATERIALIZED today(),
ACTION UInt8,
RESOURCE UInt32,
MGR_ID UInt32,
ROLE_ROLLUP_1 UInt32,
ROLE_ROLLUP_2 UInt32,
ROLE_DEPTNAME UInt32,
ROLE_TITLE UInt32,
ROLE_FAMILY_DESC UInt32,
ROLE_FAMILY UInt32,
ROLE_CODE UInt32
)
ENGINE = MergeTree ORDER BY date
```
**3.** 从ClickHouse控制台客户端退出:
``` sql
:) exit
```
## 2. 将数据插入到表中 {#insert-data-to-table}
插入数据:
**1.** 运行以下命令:
``` bash
$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv
```
**2.** 在交互模式下启动ClickHouse控制台客户端:
``` bash
$ clickhouse client
```
**3.** 确保数据已上传:
``` sql
:) SELECT count() FROM amazon_train
SELECT count()
FROM amazon_train
+-count()-+
| 65538 |
+-------+
```
## 3. 将CatBoost集成到ClickHouse中 {#integrate-catboost-into-clickhouse}
!!! note "注"
**可跳过。** 示例Docker映像已经包含了运行CatBoost和ClickHouse所需的所有内容。
为了将CatBoost集成进ClickHouse需要进行如下步骤
**1.** 构建评估库。
评估CatBoost模型的最快方法是编译 `libcatboostmodel.<so|dll|dylib>` 库文件.
有关如何构建库文件的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
**2.** 创建一个新目录(位置与名称可随意指定), 如 `data` 并将创建的库文件放入其中。 示例Docker镜像已经包含了库 `data/libcatboostmodel.so`.
**3.** 创建一个新目录来放配置模型, 如 `models`.
**4.** 创建一个模型配置文件,如 `models/amazon_model.xml`.
**5.** 修改模型配置:
``` xml
<models>
<model>
<!-- Model type. Now catboost only. -->
<type>catboost</type>
<!-- Model name. -->
<name>amazon</name>
<!-- Path to trained model. -->
<path>/home/catboost/tutorial/catboost_model.bin</path>
<!-- Update interval. -->
<lifetime>0</lifetime>
</model>
</models>
```
**6.** 将CatBoost库文件的路径和模型配置添加到ClickHouse配置:
``` xml
<!-- File etc/clickhouse-server/config.d/models_config.xml. -->
<catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
<models_config>/home/catboost/models/*_model.xml</models_config>
```
## 4. 使用SQL调用预测模型 {#run-model-inference}
为了测试模型是否正常可以使用ClickHouse客户端 `$ clickhouse client`.
让我们确保模型能正常工作:
``` sql
:) SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) > 0 AS prediction,
ACTION AS target
FROM amazon_train
LIMIT 10
```
!!! note "注"
函数 [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) 会对多类别模型返回一个元组,其中包含每一类别的原始预测值。
执行预测:
``` sql
:) SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) AS prediction,
1. / (1 + exp(-prediction)) AS probability,
ACTION AS target
FROM amazon_train
LIMIT 10
```
!!! note "注"
查看函数说明 [exp()](../sql-reference/functions/math-functions.md) 。
让我们计算样本的LogLoss:
``` sql
:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss
FROM
(
SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) AS prediction,
1. / (1. + exp(-prediction)) AS prob,
ACTION AS tg
FROM amazon_train
)
```
!!! note "注"
查看函数说明 [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) 和 [log()](../sql-reference/functions/math-functions.md) 。
[原始文章](https://clickhouse.com/docs/en/guides/apply_catboost_model/) <!--hide-->

View File

@ -9,6 +9,5 @@ sidebar_label: ClickHouse指南
列出了如何使用 Clickhouse 解决各种任务的详细说明:
- [关于简单集群设置的教程](../getting-started/tutorial.md)
- [在ClickHouse中应用CatBoost模型](apply-catboost-model.md)
[原始文章](https://clickhouse.com/docs/en/guides/) <!--hide-->

View File

@ -54,7 +54,7 @@ else ()
endif ()
if (NOT USE_MUSL)
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Library dictionary source" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to external dynamically loaded libraries" ${ENABLE_CLICKHOUSE_ALL})
endif ()
# https://presentations.clickhouse.com/matemarketing_2020/

View File

@ -446,8 +446,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
fs::path ulimits_file = ulimits_dir / fmt::format("{}.conf", user);
fmt::print("Will set ulimits for {} user in {}.\n", user, ulimits_file.string());
std::string ulimits_content = fmt::format(
"{0}\tsoft\tnofile\t262144\n"
"{0}\thard\tnofile\t262144\n", user);
"{0}\tsoft\tnofile\t1048576\n"
"{0}\thard\tnofile\t1048576\n", user);
fs::create_directories(ulimits_dir);

View File

@ -1,6 +1,8 @@
include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
CatBoostLibraryHandler.cpp
CatBoostLibraryHandlerFactory.cpp
ExternalDictionaryLibraryAPI.cpp
ExternalDictionaryLibraryHandler.cpp
ExternalDictionaryLibraryHandlerFactory.cpp

View File

@ -0,0 +1,49 @@
#pragma once
#include <cstdint>
#include <cstddef>
// Function pointer typedefs and names of libcatboost.so functions used by ClickHouse
struct CatBoostLibraryAPI
{
using ModelCalcerHandle = void;
using ModelCalcerCreateFunc = ModelCalcerHandle * (*)();
static constexpr const char * ModelCalcerCreateName = "ModelCalcerCreate";
using ModelCalcerDeleteFunc = void (*)(ModelCalcerHandle *);
static constexpr const char * ModelCalcerDeleteName = "ModelCalcerDelete";
using GetErrorStringFunc = const char * (*)();
static constexpr const char * GetErrorStringName = "GetErrorString";
using LoadFullModelFromFileFunc = bool (*)(ModelCalcerHandle *, const char *);
static constexpr const char * LoadFullModelFromFileName = "LoadFullModelFromFile";
using CalcModelPredictionFlatFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, double *, size_t);
static constexpr const char * CalcModelPredictionFlatName = "CalcModelPredictionFlat";
using CalcModelPredictionFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, const char ***, size_t, double *, size_t);
static constexpr const char * CalcModelPredictionName = "CalcModelPrediction";
using CalcModelPredictionWithHashedCatFeaturesFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, const int **, size_t, double *, size_t);
static constexpr const char * CalcModelPredictionWithHashedCatFeaturesName = "CalcModelPredictionWithHashedCatFeatures";
using GetStringCatFeatureHashFunc = int (*)(const char *, size_t);
static constexpr const char * GetStringCatFeatureHashName = "GetStringCatFeatureHash";
using GetIntegerCatFeatureHashFunc = int (*)(uint64_t);
static constexpr const char * GetIntegerCatFeatureHashName = "GetIntegerCatFeatureHash";
using GetFloatFeaturesCountFunc = size_t (*)(ModelCalcerHandle *);
static constexpr const char * GetFloatFeaturesCountName = "GetFloatFeaturesCount";
using GetCatFeaturesCountFunc = size_t (*)(ModelCalcerHandle *);
static constexpr const char * GetCatFeaturesCountName = "GetCatFeaturesCount";
using GetTreeCountFunc = size_t (*)(ModelCalcerHandle *);
static constexpr const char * GetTreeCountName = "GetTreeCount";
using GetDimensionsCountFunc = size_t (*)(ModelCalcerHandle *);
static constexpr const char * GetDimensionsCountName = "GetDimensionsCount";
};

View File

@ -0,0 +1,389 @@
#include "CatBoostLibraryHandler.h"
#include <Columns/ColumnTuple.h>
#include <Common/FieldVisitorConvertToNumber.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_APPLY_CATBOOST_MODEL;
extern const int CANNOT_LOAD_CATBOOST_MODEL;
extern const int LOGICAL_ERROR;
}
CatBoostLibraryHandler::APIHolder::APIHolder(SharedLibrary & lib)
{
ModelCalcerCreate = lib.get<CatBoostLibraryAPI::ModelCalcerCreateFunc>(CatBoostLibraryAPI::ModelCalcerCreateName);
ModelCalcerDelete = lib.get<CatBoostLibraryAPI::ModelCalcerDeleteFunc>(CatBoostLibraryAPI::ModelCalcerDeleteName);
GetErrorString = lib.get<CatBoostLibraryAPI::GetErrorStringFunc>(CatBoostLibraryAPI::GetErrorStringName);
LoadFullModelFromFile = lib.get<CatBoostLibraryAPI::LoadFullModelFromFileFunc>(CatBoostLibraryAPI::LoadFullModelFromFileName);
CalcModelPredictionFlat = lib.get<CatBoostLibraryAPI::CalcModelPredictionFlatFunc>(CatBoostLibraryAPI::CalcModelPredictionFlatName);
CalcModelPrediction = lib.get<CatBoostLibraryAPI::CalcModelPredictionFunc>(CatBoostLibraryAPI::CalcModelPredictionName);
CalcModelPredictionWithHashedCatFeatures = lib.get<CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesFunc>(CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesName);
GetStringCatFeatureHash = lib.get<CatBoostLibraryAPI::GetStringCatFeatureHashFunc>(CatBoostLibraryAPI::GetStringCatFeatureHashName);
GetIntegerCatFeatureHash = lib.get<CatBoostLibraryAPI::GetIntegerCatFeatureHashFunc>(CatBoostLibraryAPI::GetIntegerCatFeatureHashName);
GetFloatFeaturesCount = lib.get<CatBoostLibraryAPI::GetFloatFeaturesCountFunc>(CatBoostLibraryAPI::GetFloatFeaturesCountName);
GetCatFeaturesCount = lib.get<CatBoostLibraryAPI::GetCatFeaturesCountFunc>(CatBoostLibraryAPI::GetCatFeaturesCountName);
GetTreeCount = lib.tryGet<CatBoostLibraryAPI::GetTreeCountFunc>(CatBoostLibraryAPI::GetTreeCountName);
GetDimensionsCount = lib.tryGet<CatBoostLibraryAPI::GetDimensionsCountFunc>(CatBoostLibraryAPI::GetDimensionsCountName);
}
CatBoostLibraryHandler::CatBoostLibraryHandler(
const std::string & library_path,
const std::string & model_path)
: loading_start_time(std::chrono::system_clock::now())
, library(std::make_shared<SharedLibrary>(library_path))
, api(*library)
{
model_calcer_handle = api.ModelCalcerCreate();
if (!api.LoadFullModelFromFile(model_calcer_handle, model_path.c_str()))
{
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
"Cannot load CatBoost model: {}", api.GetErrorString());
}
float_features_count = api.GetFloatFeaturesCount(model_calcer_handle);
cat_features_count = api.GetCatFeaturesCount(model_calcer_handle);
tree_count = 1;
if (api.GetDimensionsCount)
tree_count = api.GetDimensionsCount(model_calcer_handle);
loading_duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - loading_start_time);
}
CatBoostLibraryHandler::~CatBoostLibraryHandler()
{
api.ModelCalcerDelete(model_calcer_handle);
}
std::chrono::system_clock::time_point CatBoostLibraryHandler::getLoadingStartTime() const
{
return loading_start_time;
}
std::chrono::milliseconds CatBoostLibraryHandler::getLoadingDuration() const
{
return loading_duration;
}
namespace
{
/// Buffer should be allocated with features_count * column->size() elements.
/// Place column elements in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
template <typename T>
void placeColumnAsNumber(const IColumn * column, T * buffer, size_t features_count)
{
size_t size = column->size();
FieldVisitorConvertToNumber<T> visitor;
for (size_t i = 0; i < size; ++i)
{
/// TODO: Replace with column visitor.
Field field;
column->get(i, field);
*buffer = applyVisitor(visitor, field);
buffer += features_count;
}
}
/// Buffer should be allocated with features_count * column->size() elements.
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
void placeStringColumn(const ColumnString & column, const char ** buffer, size_t features_count)
{
size_t size = column.size();
for (size_t i = 0; i < size; ++i)
{
*buffer = const_cast<char *>(column.getDataAtWithTerminatingZero(i).data);
buffer += features_count;
}
}
/// Buffer should be allocated with features_count * column->size() elements.
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
/// Returns PODArray which holds data (because ColumnFixedString doesn't store terminating zero).
PODArray<char> placeFixedStringColumn(const ColumnFixedString & column, const char ** buffer, size_t features_count)
{
size_t size = column.size();
size_t str_size = column.getN();
PODArray<char> data(size * (str_size + 1));
char * data_ptr = data.data();
for (size_t i = 0; i < size; ++i)
{
auto ref = column.getDataAt(i);
memcpy(data_ptr, ref.data, ref.size);
data_ptr[ref.size] = 0;
*buffer = data_ptr;
data_ptr += ref.size + 1;
buffer += features_count;
}
return data;
}
/// Place columns into buffer, returns column which holds placed data. Buffer should contains column->size() values.
template <typename T>
ColumnPtr placeNumericColumns(const ColumnRawPtrs & columns, size_t offset, size_t size, const T** buffer)
{
if (size == 0)
return nullptr;
size_t column_size = columns[offset]->size();
auto data_column = ColumnVector<T>::create(size * column_size);
T * data = data_column->getData().data();
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (column->isNumeric())
placeColumnAsNumber(column, data + i, size);
}
for (size_t i = 0; i < column_size; ++i)
{
*buffer = data;
++buffer;
data += size;
}
return data_column;
}
/// Place columns into buffer, returns data which was used for fixed string columns.
/// Buffer should contains column->size() values, each value contains size strings.
std::vector<PODArray<char>> placeStringColumns(const ColumnRawPtrs & columns, size_t offset, size_t size, const char ** buffer)
{
if (size == 0)
return {};
std::vector<PODArray<char>> data;
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
placeStringColumn(*column_string, buffer + i, size);
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
data.push_back(placeFixedStringColumn(*column_fixed_string, buffer + i, size));
else
throw Exception("Cannot place string column.", ErrorCodes::LOGICAL_ERROR);
}
return data;
}
/// buffer[column_size * cat_features_count] -> char * => cat_features[column_size][cat_features_count] -> char *
void fillCatFeaturesBuffer(
const char *** cat_features, const char ** buffer,
size_t column_size, size_t cat_features_count)
{
for (size_t i = 0; i < column_size; ++i)
{
*cat_features = buffer;
++cat_features;
buffer += cat_features_count;
}
}
/// Calc hash for string cat feature at ps positions.
template <typename Column>
void calcStringHashes(const Column * column, size_t ps, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
{
size_t column_size = column->size();
for (size_t j = 0; j < column_size; ++j)
{
auto ref = column->getDataAt(j);
const_cast<int *>(*buffer)[ps] = api.GetStringCatFeatureHash(ref.data, ref.size);
++buffer;
}
}
/// Calc hash for int cat feature at ps position. Buffer at positions ps should contains unhashed values.
void calcIntHashes(size_t column_size, size_t ps, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
{
for (size_t j = 0; j < column_size; ++j)
{
const_cast<int *>(*buffer)[ps] = api.GetIntegerCatFeatureHash((*buffer)[ps]);
++buffer;
}
}
/// buffer contains column->size() rows and size columns.
/// For int cat features calc hash inplace.
/// For string cat features calc hash from column rows.
void calcHashes(const ColumnRawPtrs & columns, size_t offset, size_t size, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
{
if (size == 0)
return;
size_t column_size = columns[offset]->size();
std::vector<PODArray<char>> data;
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
calcStringHashes(column_string, i, buffer, api);
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
calcStringHashes(column_fixed_string, i, buffer, api);
else
calcIntHashes(column_size, i, buffer, api);
}
}
}
/// Convert values to row-oriented format and call evaluation function from CatBoost wrapper api.
/// * CalcModelPredictionFlat if no cat features
/// * CalcModelPrediction if all cat features are strings
/// * CalcModelPredictionWithHashedCatFeatures if has int cat features.
ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
const ColumnRawPtrs & columns,
bool cat_features_are_strings) const
{
std::string error_msg = "Error occurred while applying CatBoost model: ";
size_t column_size = columns.front()->size();
auto result = ColumnFloat64::create(column_size * tree_count);
auto * result_buf = result->getData().data();
if (!column_size)
return result;
/// Prepare float features.
PODArray<const float *> float_features(column_size);
auto * float_features_buf = float_features.data();
/// Store all float data into single column. float_features is a list of pointers to it.
auto float_features_col = placeNumericColumns<float>(columns, 0, float_features_count, float_features_buf);
if (cat_features_count == 0)
{
if (!api.CalcModelPredictionFlat(model_calcer_handle, column_size,
float_features_buf, float_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
return result;
}
/// Prepare cat features.
if (cat_features_are_strings)
{
/// cat_features_holder stores pointers to ColumnString data or fixed_strings_data.
PODArray<const char *> cat_features_holder(cat_features_count * column_size);
PODArray<const char **> cat_features(column_size);
auto * cat_features_buf = cat_features.data();
fillCatFeaturesBuffer(cat_features_buf, cat_features_holder.data(), column_size, cat_features_count);
/// Fixed strings are stored without termination zero, so have to copy data into fixed_strings_data.
auto fixed_strings_data = placeStringColumns(columns, float_features_count,
cat_features_count, cat_features_holder.data());
if (!api.CalcModelPrediction(model_calcer_handle, column_size,
float_features_buf, float_features_count,
cat_features_buf, cat_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
}
else
{
PODArray<const int *> cat_features(column_size);
auto * cat_features_buf = cat_features.data();
auto cat_features_col = placeNumericColumns<int>(columns, float_features_count,
cat_features_count, cat_features_buf);
calcHashes(columns, float_features_count, cat_features_count, cat_features_buf, api);
if (!api.CalcModelPredictionWithHashedCatFeatures(
model_calcer_handle, column_size,
float_features_buf, float_features_count,
cat_features_buf, cat_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
}
return result;
}
size_t CatBoostLibraryHandler::getTreeCount() const
{
std::lock_guard lock(mutex);
return tree_count;
}
ColumnPtr CatBoostLibraryHandler::evaluate(const ColumnRawPtrs & columns) const
{
std::lock_guard lock(mutex);
if (columns.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model.");
if (columns.size() != float_features_count + cat_features_count)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
columns.size(),
float_features_count,
cat_features_count);
for (size_t i = 0; i < float_features_count; ++i)
{
if (!columns[i]->isNumeric())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i);
}
}
bool cat_features_are_strings = true;
for (size_t i = float_features_count; i < float_features_count + cat_features_count; ++i)
{
const auto * column = columns[i];
if (column->isNumeric())
{
cat_features_are_strings = false;
}
else if (!(typeid_cast<const ColumnString *>(column)
|| typeid_cast<const ColumnFixedString *>(column)))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i);
}
}
auto result = evalImpl(columns, cat_features_are_strings);
if (tree_count == 1)
return result;
size_t column_size = columns.front()->size();
auto * result_buf = result->getData().data();
/// Multiple trees case. Copy data to several columns.
MutableColumns mutable_columns(tree_count);
std::vector<Float64 *> column_ptrs(tree_count);
for (size_t i = 0; i < tree_count; ++i)
{
auto col = ColumnFloat64::create(column_size);
column_ptrs[i] = col->getData().data();
mutable_columns[i] = std::move(col);
}
Float64 * data = result_buf;
for (size_t row = 0; row < column_size; ++row)
{
for (size_t i = 0; i < tree_count; ++i)
{
*column_ptrs[i] = *data;
++column_ptrs[i];
++data;
}
}
return ColumnTuple::create(std::move(mutable_columns));
}
}

View File

@ -0,0 +1,78 @@
#pragma once
#include "CatBoostLibraryAPI.h"
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Common/SharedLibrary.h>
#include <base/defines.h>
#include <chrono>
#include <mutex>
namespace DB
{
/// Abstracts access to the CatBoost shared library.
class CatBoostLibraryHandler
{
public:
/// Holds pointers to CatBoost library functions
struct APIHolder
{
explicit APIHolder(SharedLibrary & lib);
// NOLINTBEGIN(readability-identifier-naming)
CatBoostLibraryAPI::ModelCalcerCreateFunc ModelCalcerCreate;
CatBoostLibraryAPI::ModelCalcerDeleteFunc ModelCalcerDelete;
CatBoostLibraryAPI::GetErrorStringFunc GetErrorString;
CatBoostLibraryAPI::LoadFullModelFromFileFunc LoadFullModelFromFile;
CatBoostLibraryAPI::CalcModelPredictionFlatFunc CalcModelPredictionFlat;
CatBoostLibraryAPI::CalcModelPredictionFunc CalcModelPrediction;
CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesFunc CalcModelPredictionWithHashedCatFeatures;
CatBoostLibraryAPI::GetStringCatFeatureHashFunc GetStringCatFeatureHash;
CatBoostLibraryAPI::GetIntegerCatFeatureHashFunc GetIntegerCatFeatureHash;
CatBoostLibraryAPI::GetFloatFeaturesCountFunc GetFloatFeaturesCount;
CatBoostLibraryAPI::GetCatFeaturesCountFunc GetCatFeaturesCount;
CatBoostLibraryAPI::GetTreeCountFunc GetTreeCount;
CatBoostLibraryAPI::GetDimensionsCountFunc GetDimensionsCount;
// NOLINTEND(readability-identifier-naming)
};
CatBoostLibraryHandler(
const String & library_path,
const String & model_path);
~CatBoostLibraryHandler();
std::chrono::system_clock::time_point getLoadingStartTime() const;
std::chrono::milliseconds getLoadingDuration() const;
size_t getTreeCount() const;
ColumnPtr evaluate(const ColumnRawPtrs & columns) const;
private:
std::chrono::system_clock::time_point loading_start_time;
std::chrono::milliseconds loading_duration;
const SharedLibraryPtr library;
const APIHolder api;
mutable std::mutex mutex;
CatBoostLibraryAPI::ModelCalcerHandle * model_calcer_handle TSA_GUARDED_BY(mutex) TSA_PT_GUARDED_BY(mutex);
size_t float_features_count TSA_GUARDED_BY(mutex);
size_t cat_features_count TSA_GUARDED_BY(mutex);
size_t tree_count TSA_GUARDED_BY(mutex);
ColumnFloat64::MutablePtr evalImpl(const ColumnRawPtrs & columns, bool cat_features_are_strings) const TSA_REQUIRES(mutex);
};
using CatBoostLibraryHandlerPtr = std::shared_ptr<CatBoostLibraryHandler>;
}

View File

@ -0,0 +1,80 @@
#include "CatBoostLibraryHandlerFactory.h"
#include <Common/logger_useful.h>
namespace DB
{
CatBoostLibraryHandlerFactory & CatBoostLibraryHandlerFactory::instance()
{
static CatBoostLibraryHandlerFactory instance;
return instance;
}
CatBoostLibraryHandlerFactory::CatBoostLibraryHandlerFactory()
: log(&Poco::Logger::get("CatBoostLibraryHandlerFactory"))
{
}
CatBoostLibraryHandlerPtr CatBoostLibraryHandlerFactory::tryGetModel(const String & model_path, const String & library_path, bool create_if_not_found)
{
std::lock_guard lock(mutex);
auto handler = library_handlers.find(model_path);
bool found = (handler != library_handlers.end());
if (found)
return handler->second;
else
{
if (create_if_not_found)
{
auto new_handler = std::make_shared<CatBoostLibraryHandler>(library_path, model_path);
library_handlers.emplace(model_path, new_handler);
LOG_DEBUG(log, "Loaded catboost library handler for model path '{}'", model_path);
return new_handler;
}
return nullptr;
}
}
void CatBoostLibraryHandlerFactory::removeModel(const String & model_path)
{
std::lock_guard lock(mutex);
bool deleted = library_handlers.erase(model_path);
if (!deleted)
{
LOG_DEBUG(log, "Cannot unload catboost library handler for model path '{}'", model_path);
return;
}
LOG_DEBUG(log, "Unloaded catboost library handler for model path '{}'", model_path);
}
void CatBoostLibraryHandlerFactory::removeAllModels()
{
std::lock_guard lock(mutex);
library_handlers.clear();
LOG_DEBUG(log, "Unloaded all catboost library handlers");
}
ExternalModelInfos CatBoostLibraryHandlerFactory::getModelInfos()
{
std::lock_guard lock(mutex);
ExternalModelInfos result;
for (const auto & handler : library_handlers)
result.push_back({
.model_path = handler.first,
.model_type = "catboost",
.loading_start_time = handler.second->getLoadingStartTime(),
.loading_duration = handler.second->getLoadingDuration()
});
return result;
}
}

View File

@ -0,0 +1,37 @@
#pragma once
#include "CatBoostLibraryHandler.h"
#include <base/defines.h>
#include <Common/ExternalModelInfo.h>
#include <chrono>
#include <mutex>
#include <unordered_map>
namespace DB
{
class CatBoostLibraryHandlerFactory final : private boost::noncopyable
{
public:
static CatBoostLibraryHandlerFactory & instance();
CatBoostLibraryHandlerFactory();
CatBoostLibraryHandlerPtr tryGetModel(const String & model_path, const String & library_path, bool create_if_not_found);
void removeModel(const String & model_path);
void removeAllModels();
ExternalModelInfos getModelInfos();
private:
/// map: model path --> catboost library handler
std::unordered_map<String, CatBoostLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
std::mutex mutex;
Poco::Logger * log;
};
}

View File

@ -50,6 +50,6 @@ private:
void * lib_data;
};
using SharedLibraryHandlerPtr = std::shared_ptr<ExternalDictionaryLibraryHandler>;
using ExternalDictionaryLibraryHandlerPtr = std::shared_ptr<ExternalDictionaryLibraryHandler>;
}

View File

@ -1,37 +1,40 @@
#include "ExternalDictionaryLibraryHandlerFactory.h"
#include <Common/logger_useful.h>
namespace DB
{
SharedLibraryHandlerPtr ExternalDictionaryLibraryHandlerFactory::get(const std::string & dictionary_id)
ExternalDictionaryLibraryHandlerPtr ExternalDictionaryLibraryHandlerFactory::get(const String & dictionary_id)
{
std::lock_guard lock(mutex);
auto library_handler = library_handlers.find(dictionary_id);
if (library_handler != library_handlers.end())
return library_handler->second;
if (auto handler = library_handlers.find(dictionary_id); handler != library_handlers.end())
return handler->second;
return nullptr;
}
void ExternalDictionaryLibraryHandlerFactory::create(
const std::string & dictionary_id,
const std::string & library_path,
const std::vector<std::string> & library_settings,
const String & dictionary_id,
const String & library_path,
const std::vector<String> & library_settings,
const Block & sample_block,
const std::vector<std::string> & attributes_names)
const std::vector<String> & attributes_names)
{
std::lock_guard lock(mutex);
if (!library_handlers.contains(dictionary_id))
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<ExternalDictionaryLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
else
if (library_handlers.contains(dictionary_id))
{
LOG_WARNING(&Poco::Logger::get("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
return;
}
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<ExternalDictionaryLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
}
bool ExternalDictionaryLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
bool ExternalDictionaryLibraryHandlerFactory::clone(const String & from_dictionary_id, const String & to_dictionary_id)
{
std::lock_guard lock(mutex);
auto from_library_handler = library_handlers.find(from_dictionary_id);
@ -45,7 +48,7 @@ bool ExternalDictionaryLibraryHandlerFactory::clone(const std::string & from_dic
}
bool ExternalDictionaryLibraryHandlerFactory::remove(const std::string & dictionary_id)
bool ExternalDictionaryLibraryHandlerFactory::remove(const String & dictionary_id)
{
std::lock_guard lock(mutex);
/// extDict_libDelete is called in destructor.

View File

@ -17,22 +17,22 @@ class ExternalDictionaryLibraryHandlerFactory final : private boost::noncopyable
public:
static ExternalDictionaryLibraryHandlerFactory & instance();
SharedLibraryHandlerPtr get(const std::string & dictionary_id);
ExternalDictionaryLibraryHandlerPtr get(const String & dictionary_id);
void create(
const std::string & dictionary_id,
const std::string & library_path,
const std::vector<std::string> & library_settings,
const String & dictionary_id,
const String & library_path,
const std::vector<String> & library_settings,
const Block & sample_block,
const std::vector<std::string> & attributes_names);
const std::vector<String> & attributes_names);
bool clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id);
bool clone(const String & from_dictionary_id, const String & to_dictionary_id);
bool remove(const std::string & dictionary_id);
bool remove(const String & dictionary_id);
private:
/// map: dict_id -> sharedLibraryHandler
std::unordered_map<std::string, SharedLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
std::unordered_map<String, ExternalDictionaryLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
std::mutex mutex;
};

View File

@ -27,12 +27,16 @@ std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHa
{
if (uri.getPath() == "/extdict_ping")
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
else if (uri.getPath() == "/catboost_ping")
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
}
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
{
if (uri.getPath() == "/extdict_request")
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
else if (uri.getPath() == "/catboost_request")
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
}
return nullptr;

View File

@ -1,24 +1,32 @@
#include "LibraryBridgeHandlers.h"
#include "CatBoostLibraryHandler.h"
#include "CatBoostLibraryHandlerFactory.h"
#include "ExternalDictionaryLibraryHandler.h"
#include "ExternalDictionaryLibraryHandlerFactory.h"
#include <Formats/FormatFactory.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <Common/BridgeProtocolVersion.h>
#include <IO/WriteHelpers.h>
#include <Poco/Net/HTMLForm.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/Net/HTMLForm.h>
#include <Poco/ThreadPool.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Formats/IInputFormat.h>
#include <QueryPipeline/QueryPipeline.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <QueryPipeline/Pipe.h>
#include <QueryPipeline/QueryPipeline.h>
#include <Server/HTTP/HTMLForm.h>
#include <IO/ReadBufferFromString.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <Formats/NativeReader.h>
#include <Formats/NativeWriter.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
namespace DB
@ -31,7 +39,7 @@ namespace ErrorCodes
namespace
{
void processError(HTTPServerResponse & response, const std::string & message)
void processError(HTTPServerResponse & response, const String & message)
{
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
@ -41,7 +49,7 @@ namespace
LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message));
}
std::shared_ptr<Block> parseColumns(std::string && column_string)
std::shared_ptr<Block> parseColumns(String && column_string)
{
auto sample_block = std::make_shared<Block>();
auto names_and_types = NamesAndTypesList::parse(column_string);
@ -59,10 +67,10 @@ namespace
return ids;
}
std::vector<std::string> parseNamesFromBinary(const std::string & names_string)
std::vector<String> parseNamesFromBinary(const String & names_string)
{
ReadBufferFromString buf(names_string);
std::vector<std::string> names;
std::vector<String> names;
readVectorBinary(names, buf);
return names;
}
@ -79,13 +87,15 @@ static void writeData(Block data, OutputFormatPtr format)
executor.execute();
}
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler"))
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler"))
{
}
void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
@ -97,7 +107,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
version = 0; /// assumed version for too old servers which do not send a version
else
{
String version_str = params.get("version");
const String & version_str = params.get("version");
if (!tryParse(version, version_str))
{
processError(response, "Unable to parse 'version' string in request URL: '" + version_str + "' Check if the server and library-bridge have the same version.");
@ -124,8 +134,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
std::string method = params.get("method");
std::string dictionary_id = params.get("dictionary_id");
const String & method = params.get("method");
const String & dictionary_id = params.get("dictionary_id");
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
@ -141,7 +151,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
std::string from_dictionary_id = params.get("from_dictionary_id");
const String & from_dictionary_id = params.get("from_dictionary_id");
bool cloned = false;
cloned = ExternalDictionaryLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
@ -166,7 +176,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
std::string library_path = params.get("library_path");
const String & library_path = params.get("library_path");
if (!params.has("library_settings"))
{
@ -174,10 +184,10 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
const auto & settings_string = params.get("library_settings");
const String & settings_string = params.get("library_settings");
LOG_DEBUG(log, "Parsing library settings from binary string");
std::vector<std::string> library_settings = parseNamesFromBinary(settings_string);
std::vector<String> library_settings = parseNamesFromBinary(settings_string);
/// Needed for library dictionary
if (!params.has("attributes_names"))
@ -186,10 +196,10 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
const auto & attributes_string = params.get("attributes_names");
const String & attributes_string = params.get("attributes_names");
LOG_DEBUG(log, "Parsing attributes names from binary string");
std::vector<std::string> attributes_names = parseNamesFromBinary(attributes_string);
std::vector<String> attributes_names = parseNamesFromBinary(attributes_string);
/// Needed to parse block from binary string format
if (!params.has("sample_block"))
@ -197,7 +207,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
processError(response, "No 'sample_block' in request URL");
return;
}
std::string sample_block_string = params.get("sample_block");
String sample_block_string = params.get("sample_block");
std::shared_ptr<Block> sample_block;
try
@ -297,7 +307,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
std::string requested_block_string = params.get("requested_block_sample");
String requested_block_string = params.get("requested_block_sample");
std::shared_ptr<Block> requested_sample_block;
try
@ -332,7 +342,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
}
else
{
LOG_WARNING(log, "Unknown library method: '{}'", method);
processError(response, "Unknown library method '" + method + "'");
LOG_ERROR(log, "Unknown library method: '{}'", method);
}
}
catch (...)
@ -362,6 +373,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
}
}
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
@ -369,6 +381,7 @@ ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExi
{
}
void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
try
@ -382,7 +395,7 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
return;
}
std::string dictionary_id = params.get("dictionary_id");
const String & dictionary_id = params.get("dictionary_id");
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
@ -399,4 +412,230 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
}
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("CatBoostLibraryBridgeRequestHandler"))
{
}
void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
size_t version;
if (!params.has("version"))
version = 0; /// assumed version for too old servers which do not send a version
else
{
const String & version_str = params.get("version");
if (!tryParse(version, version_str))
{
processError(response, "Unable to parse 'version' string in request URL: '" + version_str + "' Check if the server and library-bridge have the same version.");
return;
}
}
if (version != LIBRARY_BRIDGE_PROTOCOL_VERSION)
{
/// backwards compatibility is considered unnecessary for now, just let the user know that the server and the bridge must be upgraded together
processError(response, "Server and library-bridge have different versions: '" + std::to_string(version) + "' vs. '" + std::to_string(LIBRARY_BRIDGE_PROTOCOL_VERSION) + "'");
return;
}
if (!params.has("method"))
{
processError(response, "No 'method' in request URL");
return;
}
const String & method = params.get("method");
LOG_TRACE(log, "Library method: '{}'", method);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try
{
if (method == "catboost_list")
{
ExternalModelInfos model_infos = CatBoostLibraryHandlerFactory::instance().getModelInfos();
writeIntBinary(static_cast<UInt64>(model_infos.size()), out);
for (const auto & info : model_infos)
{
writeStringBinary(info.model_path, out);
writeStringBinary(info.model_type, out);
UInt64 t = std::chrono::system_clock::to_time_t(info.loading_start_time);
writeIntBinary(t, out);
t = info.loading_duration.count();
writeIntBinary(t, out);
}
}
else if (method == "catboost_removeModel")
{
auto & read_buf = request.getStream();
params.read(read_buf);
if (!params.has("model_path"))
{
processError(response, "No 'model_path' in request URL");
return;
}
const String & model_path = params.get("model_path");
CatBoostLibraryHandlerFactory::instance().removeModel(model_path);
String res = "1";
writeStringBinary(res, out);
}
else if (method == "catboost_removeAllModels")
{
CatBoostLibraryHandlerFactory::instance().removeAllModels();
String res = "1";
writeStringBinary(res, out);
}
else if (method == "catboost_GetTreeCount")
{
auto & read_buf = request.getStream();
params.read(read_buf);
if (!params.has("library_path"))
{
processError(response, "No 'library_path' in request URL");
return;
}
const String & library_path = params.get("library_path");
if (!params.has("model_path"))
{
processError(response, "No 'model_path' in request URL");
return;
}
const String & model_path = params.get("model_path");
auto catboost_handler = CatBoostLibraryHandlerFactory::instance().tryGetModel(model_path, library_path, /*create_if_not_found*/ true);
size_t tree_count = catboost_handler->getTreeCount();
writeIntBinary(tree_count, out);
}
else if (method == "catboost_libEvaluate")
{
auto & read_buf = request.getStream();
params.read(read_buf);
if (!params.has("model_path"))
{
processError(response, "No 'model_path' in request URL");
return;
}
const String & model_path = params.get("model_path");
if (!params.has("data"))
{
processError(response, "No 'data' in request URL");
return;
}
const String & data = params.get("data");
ReadBufferFromString string_read_buf(data);
NativeReader deserializer(string_read_buf, /*server_revision*/ 0);
Block block_read = deserializer.read();
Columns col_ptrs = block_read.getColumns();
ColumnRawPtrs col_raw_ptrs;
for (const auto & p : col_ptrs)
col_raw_ptrs.push_back(&*p);
auto catboost_handler = CatBoostLibraryHandlerFactory::instance().tryGetModel(model_path, "DummyLibraryPath", /*create_if_not_found*/ false);
if (!catboost_handler)
{
processError(response, "CatBoost library is not loaded for model '" + model_path + "'. Please try again.");
return;
}
ColumnPtr res_col = catboost_handler->evaluate(col_raw_ptrs);
DataTypePtr res_col_type = std::make_shared<DataTypeFloat64>();
String res_col_name = "res_col";
ColumnsWithTypeAndName res_cols_with_type_and_name = {{res_col, res_col_type, res_col_name}};
Block block_write(res_cols_with_type_and_name);
NativeWriter serializer{out, /*client_revision*/ 0, block_write};
serializer.write(block_write);
}
else
{
processError(response, "Unknown library method '" + method + "'");
LOG_ERROR(log, "Unknown library method: '{}'", method);
}
}
catch (...)
{
auto message = getCurrentExceptionMessage(true);
LOG_ERROR(log, "Failed to process request. Error: {}", message);
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR, message); // can't call process_error, because of too soon response sending
try
{
writeStringBinary(message, out);
out.finalize();
}
catch (...)
{
tryLogCurrentException(log);
}
}
try
{
out.finalize();
}
catch (...)
{
tryLogCurrentException(log);
}
}
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("CatBoostLibraryBridgeExistsHandler"))
{
}
void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
try
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
String res = "1";
setResponseDefaultHeaders(response, keep_alive_timeout);
LOG_TRACE(log, "Sending ping response: {}", res);
response.sendBuffer(res.data(), res.size());
}
catch (...)
{
tryLogCurrentException("PingHandler");
}
}
}

View File

@ -1,9 +1,8 @@
#pragma once
#include <Common/logger_useful.h>
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Common/logger_useful.h>
#include "ExternalDictionaryLibraryHandler.h"
namespace DB
@ -26,11 +25,12 @@ public:
private:
static constexpr inline auto FORMAT = "RowBinary";
const size_t keep_alive_timeout;
Poco::Logger * log;
size_t keep_alive_timeout;
};
// Handler for checking if the external dictionary library is loaded (used for handshake)
class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
{
public:
@ -43,4 +43,47 @@ private:
Poco::Logger * log;
};
/// Handler for requests to catboost library. The call protocol is as follows:
/// (1) Send a "catboost_GetTreeCount" request from the server to the bridge. It contains a library path (e.g /home/user/libcatboost.so) and
/// a model path (e.g. /home/user/model.bin). This loads the catboost library handler associated with the model path, then executes
/// GetTreeCount() on the library handler and sends the result back to the server.
/// (2) Send "catboost_Evaluate" from the server to the bridge. It contains a model path and the features to run the interference on. Step
/// (2) is called multiple times (once per chunk) by the server.
///
/// We would ideally like to have steps (1) and (2) in one atomic handler but can't because the evaluation on the server side is divided
/// into two dependent phases: FunctionCatBoostEvaluate::getReturnTypeImpl() and ::executeImpl(). So the model may in principle be unloaded
/// from the library-bridge between steps (1) and (2). Step (2) checks if that is the case and fails gracefully. This is okay because that
/// situation considered exceptional and rare.
///
/// An update of a model is performed by unloading it. The first call to "catboost_GetTreeCount" brings it into memory again.
///
/// Further handlers are provided for unloading a specific model, for unloading all models or for retrieving information about the loaded
/// models for display in a system view.
class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
{
public:
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
private:
const size_t keep_alive_timeout;
Poco::Logger * log;
};
// Handler for pinging the library-bridge for catboost access (used for handshake)
class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
{
public:
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
private:
const size_t keep_alive_timeout;
Poco::Logger * log;
};
}

View File

@ -51,7 +51,6 @@
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/ExternalModelsLoader.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
@ -1158,7 +1157,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setExternalAuthenticatorsConfig(*config);
global_context->loadOrReloadDictionaries(*config);
global_context->loadOrReloadModels(*config);
global_context->loadOrReloadUserDefinedExecutableFunctions(*config);
global_context->setRemoteHostFilter(*config);
@ -1739,17 +1737,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
throw;
}
/// try to load models immediately, throw on error and die
try
{
global_context->loadOrReloadModels(config());
}
catch (...)
{
tryLogCurrentException(log, "Caught exception while loading dictionaries.");
throw;
}
/// try to load user defined executable functions, throw on error and die
try
{

View File

@ -278,6 +278,71 @@ public:
}
}
void addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t) const final
{
std::unique_ptr<UInt8[]> final_null_flags = std::make_unique<UInt8[]>(row_end);
const size_t filter_column_num = number_of_arguments - 1;
if (is_nullable[filter_column_num])
{
const ColumnNullable * nullable_column = assert_cast<const ColumnNullable *>(columns[filter_column_num]);
const IColumn & filter_column = nullable_column->getNestedColumn();
const UInt8 * filter_null_map = nullable_column->getNullMapColumn().getData().data();
const UInt8 * filter_values = assert_cast<const ColumnUInt8 &>(filter_column).getData().data();
for (size_t i = row_begin; i < row_end; i++)
{
final_null_flags[i] = (null_is_skipped && filter_null_map[i]) || !filter_values[i];
}
}
else
{
const IColumn * filter_column = columns[filter_column_num];
const UInt8 * filter_values = assert_cast<const ColumnUInt8 *>(filter_column)->getData().data();
for (size_t i = row_begin; i < row_end; i++)
final_null_flags[i] = !filter_values[i];
}
const IColumn * nested_columns[number_of_arguments];
for (size_t arg = 0; arg < number_of_arguments; arg++)
{
if (is_nullable[arg])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[arg]);
if (null_is_skipped && (arg != filter_column_num))
{
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
const UInt8 * col_null_map = nullmap_column.getData().data();
for (size_t r = row_begin; r < row_end; r++)
{
final_null_flags[r] |= col_null_map[r];
}
}
nested_columns[arg] = &nullable_col.getNestedColumn();
}
else
nested_columns[arg] = columns[arg];
}
bool at_least_one = false;
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_null_flags[i])
{
at_least_one = true;
break;
}
}
if (at_least_one)
{
this->setFlag(place);
this->nested_function->addBatchSinglePlaceNotNull(
row_begin, row_end, this->nestedPlace(place), nested_columns, final_null_flags.get(), arena, -1);
}
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override

View File

@ -414,6 +414,109 @@ public:
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos) const final
{
/// We are going to merge all the flags into a single one to be able to call the nested batching functions
std::vector<const UInt8 *> nullable_filters;
const IColumn * nested_columns[number_of_arguments];
std::unique_ptr<UInt8[]> final_flags = nullptr;
const UInt8 * final_flags_ptr = nullptr;
if (if_argument_pos >= 0)
{
final_flags = std::make_unique<UInt8[]>(row_end);
final_flags_ptr = final_flags.get();
bool included_elements = 0;
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; i++)
{
final_flags[i] = !flags.data()[i];
included_elements += !!flags.data()[i];
}
if (included_elements == 0)
return;
if (included_elements != (row_end - row_begin))
{
nullable_filters.push_back(final_flags_ptr);
}
}
for (size_t i = 0; i < number_of_arguments; ++i)
{
if (is_nullable[i])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
nested_columns[i] = &nullable_col.getNestedColumn();
if constexpr (null_is_skipped)
{
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
nullable_filters.push_back(nullmap_column.getData().data());
}
}
else
{
nested_columns[i] = columns[i];
}
}
bool found_one = false;
chassert(nullable_filters.size() > 0); /// We work under the assumption that we reach this because one argument was NULL
if (nullable_filters.size() == 1)
{
/// We can avoid making copies of the only filter but we still need to check that there is data to be added
final_flags_ptr = nullable_filters[0];
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_flags_ptr[i])
{
found_one = true;
break;
}
}
}
else
{
if (!final_flags)
{
final_flags = std::make_unique<UInt8[]>(row_end);
final_flags_ptr = final_flags.get();
}
const size_t filter_start = nullable_filters[0] == final_flags_ptr ? 1 : 0;
for (size_t filter = filter_start; filter < nullable_filters.size(); filter++)
{
for (size_t i = row_begin; i < row_end; i++)
final_flags[i] |= nullable_filters[filter][i];
}
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_flags_ptr[i])
{
found_one = true;
break;
}
}
}
if (!found_one)
return; // Nothing to do and nothing to mark
this->setFlag(place);
this->nested_function->addBatchSinglePlaceNotNull(
row_begin, row_end, this->nestedPlace(place), nested_columns, final_flags_ptr, arena, -1);
}
#if USE_EMBEDDED_COMPILER

View File

@ -424,14 +424,30 @@ public:
alloc(new_size_degree);
for (size_t i = 0; i < m_size; ++i)
if (m_size <= 1)
{
HashValue x = 0;
DB::readIntBinary(x, rb);
if (x == 0)
has_zero = true;
else
reinsertImpl(x);
for (size_t i = 0; i < m_size; ++i)
{
HashValue x = 0;
DB::readIntBinary(x, rb);
if (x == 0)
has_zero = true;
else
reinsertImpl(x);
}
}
else
{
auto hs = std::make_unique<HashValue[]>(m_size);
rb.readStrict(reinterpret_cast<char *>(hs.get()), m_size * sizeof(HashValue));
for (size_t i = 0; i < m_size; ++i)
{
if (hs[i] == 0)
has_zero = true;
else
reinsertImpl(hs[i]);
}
}
}
@ -458,11 +474,24 @@ public:
resize(new_size_degree);
}
for (size_t i = 0; i < rhs_size; ++i)
if (rhs_size <= 1)
{
HashValue x = 0;
DB::readIntBinary(x, rb);
insertHash(x);
for (size_t i = 0; i < rhs_size; ++i)
{
HashValue x = 0;
DB::readIntBinary(x, rb);
insertHash(x);
}
}
else
{
auto hs = std::make_unique<HashValue[]>(rhs_size);
rb.readStrict(reinterpret_cast<char *>(hs.get()), rhs_size * sizeof(HashValue));
for (size_t i = 0; i < rhs_size; ++i)
{
insertHash(hs[i]);
}
}
}

View File

@ -0,0 +1,194 @@
#include "CatBoostLibraryBridgeHelper.h"
#include <Columns/ColumnsNumber.h>
#include <Common/escapeForFileName.h>
#include <Core/Block.h>
#include <DataTypes/DataTypesNumber.h>
#include <Formats/NativeReader.h>
#include <Formats/NativeWriter.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <Poco/Net/HTTPRequest.h>
#include <random>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
CatBoostLibraryBridgeHelper::CatBoostLibraryBridgeHelper(
ContextPtr context_,
std::optional<String> model_path_,
std::optional<String> library_path_)
: LibraryBridgeHelper(context_->getGlobalContext())
, model_path(model_path_)
, library_path(library_path_)
{
}
Poco::URI CatBoostLibraryBridgeHelper::getPingURI() const
{
auto uri = createBaseURI();
uri.setPath(PING_HANDLER);
return uri;
}
Poco::URI CatBoostLibraryBridgeHelper::getMainURI() const
{
auto uri = createBaseURI();
uri.setPath(MAIN_HANDLER);
return uri;
}
Poco::URI CatBoostLibraryBridgeHelper::createRequestURI(const String & method) const
{
auto uri = getMainURI();
uri.addQueryParameter("version", std::to_string(LIBRARY_BRIDGE_PROTOCOL_VERSION));
uri.addQueryParameter("method", method);
return uri;
}
bool CatBoostLibraryBridgeHelper::bridgeHandShake()
{
String result;
try
{
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials);
readString(result, buf);
}
catch (...)
{
tryLogCurrentException(log);
return false;
}
if (result != "1")
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {}. Check that bridge and server have the same version.", result);
return true;
}
ExternalModelInfos CatBoostLibraryBridgeHelper::listModels()
{
startBridgeSync();
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_LIST_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[](std::ostream &) {},
http_timeouts, credentials);
ExternalModelInfos result;
UInt64 num_rows;
readIntBinary(num_rows, buf);
for (UInt64 i = 0; i < num_rows; ++i)
{
ExternalModelInfo info;
readStringBinary(info.model_path, buf);
readStringBinary(info.model_type, buf);
UInt64 t;
readIntBinary(t, buf);
info.loading_start_time = std::chrono::system_clock::from_time_t(t);
readIntBinary(t, buf);
info.loading_duration = std::chrono::milliseconds(t);
result.push_back(info);
}
return result;
}
void CatBoostLibraryBridgeHelper::removeModel()
{
startBridgeSync();
assert(model_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_REMOVEMODEL_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this](std::ostream & os)
{
os << "model_path=" << escapeForFileName(*model_path);
},
http_timeouts, credentials);
String result;
readStringBinary(result, buf);
assert(result == "1");
}
void CatBoostLibraryBridgeHelper::removeAllModels()
{
startBridgeSync();
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_REMOVEALLMODELS_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[](std::ostream &){},
http_timeouts, credentials);
String result;
readStringBinary(result, buf);
assert(result == "1");
}
size_t CatBoostLibraryBridgeHelper::getTreeCount()
{
startBridgeSync();
assert(model_path && library_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_GETTREECOUNT_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this](std::ostream & os)
{
os << "library_path=" << escapeForFileName(*library_path) << "&";
os << "model_path=" << escapeForFileName(*model_path);
},
http_timeouts, credentials);
size_t result;
readIntBinary(result, buf);
return result;
}
ColumnPtr CatBoostLibraryBridgeHelper::evaluate(const ColumnsWithTypeAndName & columns)
{
startBridgeSync();
WriteBufferFromOwnString string_write_buf;
Block block(columns);
NativeWriter serializer(string_write_buf, /*client_revision*/ 0, block);
serializer.write(block);
assert(model_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_LIB_EVALUATE_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this, serialized = string_write_buf.str()](std::ostream & os)
{
os << "model_path=" << escapeForFileName(*model_path) << "&";
os << "data=" << escapeForFileName(serialized);
},
http_timeouts, credentials);
NativeReader deserializer(buf, /*server_revision*/ 0);
Block block_read = deserializer.read();
return block_read.getColumns()[0];
}
}

View File

@ -0,0 +1,53 @@
#pragma once
#include <BridgeHelper/LibraryBridgeHelper.h>
#include <Common/ExternalModelInfo.h>
#include <DataTypes/IDataType.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <Interpreters/Context.h>
#include <Poco/URI.h>
#include <optional>
namespace DB
{
class CatBoostLibraryBridgeHelper : public LibraryBridgeHelper
{
public:
static constexpr inline auto PING_HANDLER = "/catboost_ping";
static constexpr inline auto MAIN_HANDLER = "/catboost_request";
explicit CatBoostLibraryBridgeHelper(
ContextPtr context_,
std::optional<String> model_path_ = std::nullopt,
std::optional<String> library_path_ = std::nullopt);
ExternalModelInfos listModels();
void removeModel(); /// requires model_path
void removeAllModels();
size_t getTreeCount(); /// requires model_path and library_path
ColumnPtr evaluate(const ColumnsWithTypeAndName & columns); /// requires model_path
protected:
Poco::URI getPingURI() const override;
Poco::URI getMainURI() const override;
bool bridgeHandShake() override;
private:
static constexpr inline auto CATBOOST_LIST_METHOD = "catboost_list";
static constexpr inline auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel";
static constexpr inline auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels";
static constexpr inline auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount";
static constexpr inline auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate";
Poco::URI createRequestURI(const String & method) const;
const std::optional<String> model_path;
const std::optional<String> library_path;
};
}

View File

@ -12,8 +12,8 @@
namespace DB
{
/// Common base class for XDBC and Library bridge helpers.
/// Contains helper methods to check/start bridge sync.
/// Base class for server-side bridge helpers, e.g. xdbc-bridge and library-bridge.
/// Contains helper methods to check/start bridge sync
class IBridgeHelper: protected WithContext
{

View File

@ -722,5 +722,3 @@ public:
return res;
}
};
template <typename Key, typename Payload> constexpr size_t ArrayCache<Key, Payload>::min_chunk_size;

View File

@ -176,10 +176,10 @@ static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string
void tryLogCurrentException(const char * log_name, const std::string & start_of_message)
{
/// Under high memory pressure, any new allocation will definitelly lead
/// to MEMORY_LIMIT_EXCEEDED exception.
/// Under high memory pressure, new allocations throw a
/// MEMORY_LIMIT_EXCEEDED exception.
///
/// And in this case the exception will not be logged, so let's block the
/// In this case the exception will not be logged, so let's block the
/// MemoryTracker until the exception will be logged.
LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global);
@ -189,8 +189,8 @@ void tryLogCurrentException(const char * log_name, const std::string & start_of_
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message)
{
/// Under high memory pressure, any new allocation will definitelly lead
/// to MEMORY_LIMIT_EXCEEDED exception.
/// Under high memory pressure, new allocations throw a
/// MEMORY_LIMIT_EXCEEDED exception.
///
/// And in this case the exception will not be logged, so let's block the
/// MemoryTracker until the exception will be logged.

View File

@ -0,0 +1,20 @@
#pragma once
#include <vector>
#include <base/types.h>
namespace DB
{
/// Details about external machine learning model, used by clickhouse-server and clickhouse-library-bridge
struct ExternalModelInfo
{
String model_path;
String model_type;
std::chrono::system_clock::time_point loading_start_time; /// serialized as std::time_t
std::chrono::milliseconds loading_duration; /// serialized as UInt64
};
using ExternalModelInfos = std::vector<ExternalModelInfo>;
}

View File

@ -135,7 +135,7 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const
struct kinfo_proc kp;
size_t len = sizeof(struct kinfo_proc);
if (-1 == ::sysctl(mib, 4, &kp, &len, NULL, 0))
if (-1 == ::sysctl(mib, 4, &kp, &len, nullptr, 0))
throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::SYSTEM_ERROR);
if (sizeof(struct kinfo_proc) != len)

View File

@ -130,16 +130,15 @@ void SpanHolder::finish() noexcept
try
{
auto log = current_thread_trace_context.span_log.lock();
if (!log)
/// The log might be disabled, check it before use
if (log)
{
// The log might be disabled.
return;
this->finish_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
log->add(OpenTelemetrySpanLogElement(*this));
}
this->finish_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
log->add(OpenTelemetrySpanLogElement(*this));
}
catch (...)
{

View File

@ -189,6 +189,9 @@ KeeperConfigurationAndSettings::loadFromConfig(const Poco::Util::AbstractConfigu
ret->coordination_settings->loadFromConfig("keeper_server.coordination_settings", config);
if (ret->coordination_settings->quorum_reads)
LOG_WARNING(&Poco::Logger::get("KeeperConfigurationAndSettings"), "Setting 'quorum_reads' is deprecated. Please use 'read_mode'");
return ret;
}

View File

@ -26,6 +26,7 @@ struct Settings;
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, leadership_expiry, 0, "How often will leader node check if it still has majority. Set it lower or equal to election_timeout_lower_bound_ms to have linearizable reads.", 0) \
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
@ -38,11 +39,12 @@ struct Settings;
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
M(UInt64, max_requests_batch_size, 100, "Max size of batch in requests count before it will be sent to RAFT", 0) \
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
M(Bool, quorum_reads, false, "Deprecated - use read_mode. Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0)
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
M(String, read_mode, "nonlinear", "How should reads be processed. Valid values: 'nonlinear', 'fastlinear', 'quorum'. 'nonlinear' is the fastest option because there are no consistency requirements", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -1,4 +1,5 @@
#include <Coordination/KeeperDispatcher.h>
#include <libnuraft/async.hxx>
#include <Common/setThreadName.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <future>
@ -6,6 +7,8 @@
#include <Poco/Path.h>
#include <Common/hex.h>
#include <filesystem>
#include <iterator>
#include <limits>
#include <Common/checkStackSize.h>
#include <Common/CurrentMetrics.h>
@ -30,22 +33,83 @@ namespace ErrorCodes
KeeperDispatcher::KeeperDispatcher()
: responses_queue(std::numeric_limits<size_t>::max())
, read_requests_queue(std::numeric_limits<size_t>::max())
, finalize_requests_queue(std::numeric_limits<size_t>::max())
, configuration_and_settings(std::make_shared<KeeperConfigurationAndSettings>())
, log(&Poco::Logger::get("KeeperDispatcher"))
{
}
/// ZooKeepers has 2 requirements:
/// - writes need to be linearizable
/// - all requests from single session need to be processed in the order of their arrival
///
/// Because of that, we cannot process read and write requests from SAME session at the same time.
/// To be able to process read and write requests in parallel we need to make sure that only 1 type
/// of request is being processed from a single session.
/// Multiple types from different sessions can be processed at the same time.
///
/// We do some in-session housekeeping to make sure that the multithreaded request processing is correct.
/// When a request is received from a client, we check if there are requests being processed from that same
/// session, and if yes, of what type. If the types are the same, and there are no requests of different
/// type inbetetween, we can instanly add it to active request queue. Otherwise, we need to wait until
/// all requests of the other type are processed.
///
/// There are multiple threads used for processing the request, each of them communicating with a queue.
/// Assumption: only one type of request is being processed from a same session at any point in time (read or write).
///
/// requestThread -> requests currently being processed
/// readRequestThread -> thread for processing read requests
/// finalizeRequestThread -> thread for finalizing requests:
/// - in-session housekeeping, add requests to the active request queue if there are any
///
/// If reads are linearizable without quorum, a request can possibly wait for a certain log to be committed.
/// In that case we add it to the waiting queue for that log.
/// When that log is committed, the committing thread will send that read request to readRequestThread so it can be processed.
///
void KeeperDispatcher::requestThread()
{
setThreadName("KeeperReqT");
/// Result of requests batch from previous iteration
RaftAppendResult prev_result = nullptr;
/// Requests from previous iteration. We store them to be able
/// to send errors to the client.
KeeperStorage::RequestsForSessions prev_batch;
RaftResult prev_result = nullptr;
const auto previous_quorum_done = [&] { return !prev_result || prev_result->has_result() || prev_result->get_result_code() != nuraft::cmd_result_code::OK; };
const auto needs_quorum = [](const auto & coordination_settings, const auto & request)
{
return coordination_settings->quorum_reads || coordination_settings->read_mode.toString() == "quorum" || !request.request->isReadRequest();
};
KeeperStorage::RequestsForSessions quorum_requests;
KeeperStorage::RequestsForSessions read_requests;
auto process_quorum_requests = [&, this]() mutable
{
/// Forcefully process all previous pending requests
if (prev_result)
forceWaitAndProcessResult(prev_result);
prev_result = server->putRequestBatch(quorum_requests);
if (prev_result)
{
prev_result->when_ready([&, requests_for_sessions = std::move(quorum_requests)](nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & result, nuraft::ptr<std::exception> &) mutable
{
if (!result.get_accepted() || result.get_result_code() == nuraft::cmd_result_code::TIMEOUT)
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
else if (result.get_result_code() != nuraft::cmd_result_code::OK)
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
});
}
quorum_requests.clear();
};
/// ZooKeeper requires that the requests inside a single session are processed in a strict order
/// (we cannot process later requests before all the previous once are processed)
/// By making sure that at this point we can either have just read requests or just write requests
/// from a single session, we can process them independently
while (!shutdown_called)
{
KeeperStorage::RequestForSession request;
@ -54,94 +118,67 @@ void KeeperDispatcher::requestThread()
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
uint64_t max_batch_size = coordination_settings->max_requests_batch_size;
/// The code below do a very simple thing: batch all write (quorum) requests into vector until
/// previous write batch is not finished or max_batch size achieved. The main complexity goes from
/// the ability to process read requests without quorum (from local state). So when we are collecting
/// requests into a batch we must check that the new request is not read request. Otherwise we have to
/// process all already accumulated write requests, wait them synchronously and only after that process
/// read request. So reads are some kind of "separator" for writes.
try
{
if (requests_queue->tryPop(request, max_wait))
if (active_requests_queue->tryPop(request, max_wait))
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
if (shutdown_called)
break;
KeeperStorage::RequestsForSessions current_batch;
if (needs_quorum(coordination_settings, request))
quorum_requests.emplace_back(request);
else
read_requests.emplace_back(request);
bool has_read_request = false;
/// If new request is not read request or we must to process it through quorum.
/// Otherwise we will process it locally.
if (coordination_settings->quorum_reads || !request.request->isReadRequest())
/// Waiting until previous append will be successful, or batch is big enough
/// has_result == false && get_result_code == OK means that our request still not processed.
/// Sometimes NuRaft set errorcode without setting result, so we check both here.
while (true)
{
current_batch.emplace_back(request);
if (quorum_requests.size() > max_batch_size)
break;
/// Waiting until previous append will be successful, or batch is big enough
/// has_result == false && get_result_code == OK means that our request still not processed.
/// Sometimes NuRaft set errorcode without setting result, so we check both here.
while (prev_result && (!prev_result->has_result() && prev_result->get_result_code() == nuraft::cmd_result_code::OK) && current_batch.size() <= max_batch_size)
if (read_requests.size() > max_batch_size)
{
/// Trying to get batch requests as fast as possible
if (requests_queue->tryPop(request, 1))
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
/// Don't append read request into batch, we have to process them separately
if (!coordination_settings->quorum_reads && request.request->isReadRequest())
{
has_read_request = true;
break;
}
else
{
processReadRequests(coordination_settings, read_requests);
current_batch.emplace_back(request);
}
}
if (shutdown_called)
if (previous_quorum_done())
break;
}
/// Trying to get batch requests as fast as possible
if (active_requests_queue->tryPop(request, 1))
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
if (needs_quorum(coordination_settings, request))
quorum_requests.emplace_back(request);
else
read_requests.emplace_back(request);
}
else
{
/// batch of read requests can send at most one request
/// so we don't care if the previous batch hasn't received response
if (!read_requests.empty())
processReadRequests(coordination_settings, read_requests);
/// if we still didn't process previous batch we can
/// increase are current batch even more
if (previous_quorum_done())
break;
}
if (shutdown_called)
break;
}
else
has_read_request = true;
if (shutdown_called)
break;
/// Forcefully process all previous pending requests
if (prev_result)
forceWaitAndProcessResult(prev_result, prev_batch);
if (!quorum_requests.empty())
process_quorum_requests();
/// Process collected write requests batch
if (!current_batch.empty())
{
auto result = server->putRequestBatch(current_batch);
if (result)
{
if (has_read_request) /// If we will execute read request next, than we have to process result now
forceWaitAndProcessResult(result, current_batch);
}
else
{
addErrorResponses(current_batch, Coordination::Error::ZCONNECTIONLOSS);
current_batch.clear();
}
prev_batch = std::move(current_batch);
prev_result = result;
}
/// Read request always goes after write batch (last request)
if (has_read_request)
{
if (server->isLeaderAlive())
server->putLocalReadRequest(request);
else
addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
}
}
}
catch (...)
@ -151,6 +188,72 @@ void KeeperDispatcher::requestThread()
}
}
void KeeperDispatcher::processReadRequests(const CoordinationSettingsPtr & coordination_settings, KeeperStorage::RequestsForSessions & read_requests)
{
if (coordination_settings->read_mode.toString() == "fastlinear")
{
// we just want to know what's the current latest committed log on Leader node
auto leader_info_result = server->getLeaderInfo();
if (leader_info_result)
{
leader_info_result->when_ready([&, requests_for_sessions = std::move(read_requests)](nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & result, nuraft::ptr<std::exception> & exception) mutable
{
if (!result.get_accepted() || result.get_result_code() == nuraft::cmd_result_code::TIMEOUT)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
return;
}
if (result.get_result_code() != nuraft::cmd_result_code::OK)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
if (exception)
{
LOG_INFO(log, "Got exception while waiting for read results {}", exception->what());
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
auto & leader_info_ctx = result.get();
if (!leader_info_ctx)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
KeeperServer::NodeInfo leader_info;
leader_info.term = leader_info_ctx->get_ulong();
leader_info.last_committed_index = leader_info_ctx->get_ulong();
std::lock_guard lock(leader_waiter_mutex);
auto node_info = server->getNodeInfo();
/// we're behind, we need to wait
if (node_info.term < leader_info.term || node_info.last_committed_index < leader_info.last_committed_index)
{
auto & leader_waiter = leader_waiters[leader_info];
leader_waiter.insert(leader_waiter.end(), requests_for_sessions.begin(), requests_for_sessions.end());
LOG_TRACE(log, "waiting for term {}, idx {}", leader_info.term, leader_info.last_committed_index);
}
/// process it in background thread
else if (!read_requests_queue.push(std::move(requests_for_sessions)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
});
}
}
else
{
assert(coordination_settings->read_mode.toString() == "nonlinear");
if (!read_requests_queue.push(std::move(read_requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
read_requests.clear();
}
void KeeperDispatcher::responseThread()
{
setThreadName("KeeperRspT");
@ -200,6 +303,65 @@ void KeeperDispatcher::snapshotThread()
}
}
/// Background thread for processing read requests
void KeeperDispatcher::readRequestThread()
{
setThreadName("KeeperReadT");
while (!shutdown_called)
{
KeeperStorage::RequestsForSessions requests;
if (!read_requests_queue.pop(requests))
break;
if (shutdown_called)
break;
try
{
for (const auto & request_info : requests)
{
if (server->isLeaderAlive())
server->putLocalReadRequest(request_info);
else
addErrorResponses({request_info}, Coordination::Error::ZCONNECTIONLOSS);
}
if (!finalize_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
/// We finalize requests every time we commit a single log with request
/// or process a batch of read requests.
/// Because it can get heavy, we do it in background thread.
void KeeperDispatcher::finalizeRequestsThread()
{
setThreadName("KeeperFinalT");
while (!shutdown_called)
{
KeeperStorage::RequestsForSessions requests;
if (!finalize_requests_queue.pop(requests))
break;
if (shutdown_called)
break;
try
{
finalizeRequests(requests);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
{
std::lock_guard lock(session_to_response_callback_mutex);
@ -255,6 +417,30 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
request_info.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
request_info.session_id = session_id;
{
std::lock_guard lock{unprocessed_request_mutex};
auto unprocessed_requests_it = unprocessed_requests_for_session.find(session_id);
if (unprocessed_requests_it == unprocessed_requests_for_session.end())
{
auto & unprocessed_requests = unprocessed_requests_for_session[session_id];
unprocessed_requests.unprocessed_num = 1;
unprocessed_requests.is_read = request->isReadRequest();
}
else
{
auto & unprocessed_requests = unprocessed_requests_it->second;
/// queue is not empty, or the request types don't match, put it in the waiting queue
if (!unprocessed_requests.request_queue.empty() || unprocessed_requests.is_read != request->isReadRequest())
{
unprocessed_requests.request_queue.push_back(std::move(request_info));
return true;
}
++unprocessed_requests.unprocessed_num;
}
}
std::lock_guard lock(push_request_mutex);
if (shutdown_called)
@ -263,10 +449,10 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
/// Put close requests without timeouts
if (request->getOpNum() == Coordination::OpNum::Close)
{
if (!requests_queue->push(std::move(request_info)))
if (!active_requests_queue->push(std::move(request_info)))
throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR);
}
else if (!requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
else if (!active_requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
{
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
}
@ -279,13 +465,23 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
LOG_DEBUG(log, "Initializing storage dispatcher");
configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_requests_batch_size);
active_requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_requests_batch_size);
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
read_request_thread = ThreadFromGlobalPool([this] { readRequestThread(); });
finalize_requests_thread = ThreadFromGlobalPool([this] { finalizeRequestsThread(); });
server = std::make_unique<KeeperServer>(configuration_and_settings, config, responses_queue, snapshots_queue);
server = std::make_unique<KeeperServer>(
configuration_and_settings,
config,
responses_queue,
snapshots_queue,
[this](const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx)
{ onRequestCommit(request_for_session, log_term, log_idx); },
[this](uint64_t term, uint64_t last_idx)
{ onApplySnapshot(term, last_idx); });
try
{
@ -333,9 +529,9 @@ void KeeperDispatcher::shutdown()
if (session_cleaner_thread.joinable())
session_cleaner_thread.join();
if (requests_queue)
if (active_requests_queue)
{
requests_queue->finish();
active_requests_queue->finish();
if (request_thread.joinable())
request_thread.join();
@ -349,6 +545,14 @@ void KeeperDispatcher::shutdown()
if (snapshot_thread.joinable())
snapshot_thread.join();
read_requests_queue.finish();
if (read_request_thread.joinable())
read_request_thread.join();
finalize_requests_queue.finish();
if (finalize_requests_thread.joinable())
finalize_requests_thread.join();
update_configuration_queue.finish();
if (update_configuration_thread.joinable())
update_configuration_thread.join();
@ -357,7 +561,7 @@ void KeeperDispatcher::shutdown()
KeeperStorage::RequestForSession request_for_session;
/// Set session expired for all pending requests
while (requests_queue && requests_queue->tryPop(request_for_session))
while (active_requests_queue && active_requests_queue->tryPop(request_for_session))
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
auto response = request_for_session.request->makeResponse();
@ -474,7 +678,7 @@ void KeeperDispatcher::sessionCleanerTask()
};
{
std::lock_guard lock(push_request_mutex);
if (!requests_queue->push(std::move(request_info)))
if (!active_requests_queue->push(std::move(request_info)))
LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions");
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
}
@ -524,19 +728,12 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession
}
}
void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions)
void KeeperDispatcher::forceWaitAndProcessResult(RaftResult & result)
{
if (!result->has_result())
result->get();
/// If we get some errors, than send them to clients
if (!result->get_accepted() || result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
else if (result->get_result_code() != nuraft::cmd_result_code::OK)
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
result = nullptr;
requests_for_sessions.clear();
}
int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
@ -584,7 +781,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
/// Push new session request to queue
{
std::lock_guard lock(push_request_mutex);
if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms))
if (!active_requests_queue->tryPush(std::move(request_info), session_timeout_ms))
throw Exception("Cannot push session id request to queue within session timeout", ErrorCodes::TIMEOUT_EXCEEDED);
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
}
@ -657,6 +854,122 @@ void KeeperDispatcher::updateConfigurationThread()
}
}
// Used to update the state for a session based on the requests
// - update the number of current unprocessed requests for the session
// - if the number of unprocessed requests is 0, we can start adding next type of requests
// from unprocessed requests queue to the active queue
void KeeperDispatcher::finalizeRequests(const KeeperStorage::RequestsForSessions & requests_for_sessions)
{
std::unordered_map<int64_t, size_t> counts_for_session;
for (const auto & request_for_session : requests_for_sessions)
{
++counts_for_session[request_for_session.session_id];
}
std::lock_guard lock{unprocessed_request_mutex};
for (const auto [session_id, count] : counts_for_session)
{
auto unprocessed_requests_it = unprocessed_requests_for_session.find(session_id);
if (unprocessed_requests_it == unprocessed_requests_for_session.end())
continue;
auto & unprocessed_requests = unprocessed_requests_it->second;
unprocessed_requests.unprocessed_num -= count;
if (unprocessed_requests.unprocessed_num == 0)
{
if (!unprocessed_requests.request_queue.empty())
{
auto & unprocessed_requests_queue = unprocessed_requests.request_queue;
unprocessed_requests.is_read = !unprocessed_requests.is_read;
// start adding next type of requests
while (!unprocessed_requests_queue.empty() && unprocessed_requests_queue.front().request->isReadRequest() == unprocessed_requests.is_read)
{
auto & front_request = unprocessed_requests_queue.front();
/// Put close requests without timeouts
if (front_request.request->getOpNum() == Coordination::OpNum::Close)
{
if (!active_requests_queue->push(std::move(front_request)))
throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR);
}
else if (!active_requests_queue->tryPush(std::move(front_request), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
{
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
}
++unprocessed_requests.unprocessed_num;
unprocessed_requests_queue.pop_front();
}
}
else
{
unprocessed_requests_for_session.erase(unprocessed_requests_it);
}
}
}
}
// Finalize request
// Process read requests that were waiting for this commit
void KeeperDispatcher::onRequestCommit(const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx)
{
if (!finalize_requests_queue.push({request_for_session}))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
KeeperStorage::RequestsForSessions requests;
{
std::lock_guard lock(leader_waiter_mutex);
auto request_queue_it = leader_waiters.find(KeeperServer::NodeInfo{.term = log_term, .last_committed_index = log_idx});
if (request_queue_it != leader_waiters.end())
{
requests = std::move(request_queue_it->second);
leader_waiters.erase(request_queue_it);
}
}
if (requests.empty())
return;
if (!read_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
/// Process all read request that are waiting for lower or currently last processed log index
void KeeperDispatcher::onApplySnapshot(uint64_t term, uint64_t last_idx)
{
KeeperServer::NodeInfo current_node_info{term, last_idx};
KeeperStorage::RequestsForSessions requests;
{
std::lock_guard lock(leader_waiter_mutex);
for (auto leader_waiter_it = leader_waiters.begin(); leader_waiter_it != leader_waiters.end();)
{
auto waiting_node_info = leader_waiter_it->first;
if (waiting_node_info.term <= current_node_info.term
&& waiting_node_info.last_committed_index <= current_node_info.last_committed_index)
{
for (auto & request : leader_waiter_it->second)
{
requests.push_back(std::move(request));
}
leader_waiter_it = leader_waiters.erase(leader_waiter_it);
}
else
{
++leader_waiter_it;
}
}
}
if (requests.empty())
return;
if (!read_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
bool KeeperDispatcher::isServerActive() const
{
return checkInit() && hasLeader() && !server->isRecovering();
@ -721,7 +1034,7 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const
Keeper4LWInfo result = server->getPartiallyFilled4LWInfo();
{
std::lock_guard lock(push_request_mutex);
result.outstanding_requests_count = requests_queue->size();
result.outstanding_requests_count = active_requests_queue->size();
}
{
std::lock_guard lock(session_to_response_callback_mutex);

View File

@ -32,9 +32,12 @@ private:
using UpdateConfigurationQueue = ConcurrentBoundedQueue<ConfigUpdateAction>;
/// Size depends on coordination settings
std::unique_ptr<RequestsQueue> requests_queue;
/// Request currently being processed
std::unique_ptr<RequestsQueue> active_requests_queue;
ResponsesQueue responses_queue;
SnapshotsQueue snapshots_queue{1};
ConcurrentBoundedQueue<KeeperStorage::RequestsForSessions> read_requests_queue;
ConcurrentBoundedQueue<KeeperStorage::RequestsForSessions> finalize_requests_queue;
/// More than 1k updates is definitely misconfiguration.
UpdateConfigurationQueue update_configuration_queue{1000};
@ -64,6 +67,8 @@ private:
ThreadFromGlobalPool snapshot_thread;
/// Apply or wait for configuration changes
ThreadFromGlobalPool update_configuration_thread;
ThreadFromGlobalPool read_request_thread;
ThreadFromGlobalPool finalize_requests_thread;
/// RAFT wrapper.
std::unique_ptr<KeeperServer> server;
@ -77,6 +82,34 @@ private:
/// Counter for new session_id requests.
std::atomic<int64_t> internal_session_id_counter{0};
/// A read request needs to have at least the log it was the last committed log on the leader
/// at the time the request was being made.
/// If the node is stale, we need to wait to commit that log before doing local read requests to achieve
/// linearizability.
std::unordered_map<KeeperServer::NodeInfo, KeeperStorage::RequestsForSessions> leader_waiters;
std::mutex leader_waiter_mutex;
/// We can be actively processing one type of requests (either read or write) from a single session.
/// If we receive a request of a type that is not currently being processed, we put it in the waiting queue.
/// Also, we want to process them in ariving order, so if we have a different type in the queue, we cannot process that request
/// but wait for all the previous requests to finish.
/// E.g. READ -> WRITE -> READ, the last READ will go to the waiting queue even though we are currently processing the first READ
/// because we have WRITE request before it that needs to be processed.
struct UnprocessedRequests
{
/// how many requests are currently in the active request queue
size_t unprocessed_num{0};
/// is_read currently being processed
bool is_read{false};
std::list<KeeperStorage::RequestForSession> request_queue;
};
// Called every time a batch of requests are processed.
void finalizeRequests(const KeeperStorage::RequestsForSessions & requests_for_sessions);
std::unordered_map<int64_t, UnprocessedRequests> unprocessed_requests_for_session;
std::mutex unprocessed_request_mutex;
/// Thread put requests to raft
void requestThread();
/// Thread put responses for subscribed sessions
@ -88,6 +121,12 @@ private:
/// Thread apply or wait configuration changes from leader
void updateConfigurationThread();
void readRequestThread();
void finalizeRequestsThread();
void processReadRequests(const CoordinationSettingsPtr & coordination_settings, KeeperStorage::RequestsForSessions & read_requests);
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
/// Add error responses for requests to responses queue.
@ -96,7 +135,7 @@ private:
/// Forcefully wait for result and sets errors if something when wrong.
/// Clears both arguments
void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions);
static void forceWaitAndProcessResult(RaftResult & result);
public:
/// Just allocate some objects, real initialization is done by `intialize method`
@ -116,6 +155,12 @@ public:
return server && server->checkInit();
}
/// Called when a single log with request is committed.
void onRequestCommit(const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx);
/// Called when a snapshot is applied
void onApplySnapshot(uint64_t term, uint64_t last_idx);
/// Is server accepting requests, i.e. connected to the cluster
/// and achieved quorum
bool isServerActive() const;

View File

@ -105,7 +105,9 @@ KeeperServer::KeeperServer(
const KeeperConfigurationAndSettingsPtr & configuration_and_settings_,
const Poco::Util::AbstractConfiguration & config,
ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_)
SnapshotsQueue & snapshots_queue_,
KeeperStateMachine::CommitCallback commit_callback,
KeeperStateMachine::ApplySnapshotCallback apply_snapshot_callback)
: server_id(configuration_and_settings_->server_id)
, coordination_settings(configuration_and_settings_->coordination_settings)
, log(&Poco::Logger::get("KeeperServer"))
@ -113,7 +115,7 @@ KeeperServer::KeeperServer(
, keeper_context{std::make_shared<KeeperContext>()}
, create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true))
{
if (coordination_settings->quorum_reads)
if (coordination_settings->quorum_reads || coordination_settings->read_mode.toString() == "quorum")
LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false);
@ -125,7 +127,9 @@ KeeperServer::KeeperServer(
configuration_and_settings_->snapshot_storage_path,
coordination_settings,
keeper_context,
checkAndGetSuperdigest(configuration_and_settings_->super_digest));
checkAndGetSuperdigest(configuration_and_settings_->super_digest),
std::move(commit_callback),
std::move(apply_snapshot_callback));
state_manager = nuraft::cs_new<KeeperStateManager>(
server_id,
@ -176,6 +180,13 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
reconfigure(new_config);
}
RaftResult getLeaderInfo()
{
nuraft::ptr<nuraft::req_msg> req
= nuraft::cs_new<nuraft::req_msg>(0ull, nuraft::msg_type::leader_status_request, 0, 0, 0ull, 0ull, 0ull);
return send_msg_to_leader(req);
}
void commit_in_bg() override
{
// For NuRaft, if any commit fails (uncaught exception) the whole server aborts as a safety
@ -269,6 +280,20 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(), "election_timeout_lower_bound_ms", log);
params.election_timeout_upper_bound_ = getValueOrMaxInt32AndLogWarning(
coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(), "election_timeout_upper_bound_ms", log);
params.leadership_expiry_ = getValueOrMaxInt32AndLogWarning(coordination_settings->leadership_expiry.totalMilliseconds(), "leadership_expiry", log);
if (coordination_settings->read_mode.toString() == "fastlinear")
{
if (params.leadership_expiry_ == 0)
params.leadership_expiry_ = params.election_timeout_lower_bound_;
else if (params.leadership_expiry_ > params.election_timeout_lower_bound_)
{
LOG_WARNING(log, "To use fast linearizable reads, leadership_expiry should be set to a value that is less or equal to the election_timeout_upper_bound_ms. "
"Based on current settings, there are no guarantees for linearizability of reads.");
}
}
params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log);
params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log);
@ -487,7 +512,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession &
state_machine->processReadRequest(request_for_session);
}
RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
RaftResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
{
std::vector<nuraft::ptr<nuraft::buffer>> entries;
for (const auto & request_for_session : requests_for_sessions)
@ -713,6 +738,20 @@ std::vector<int64_t> KeeperServer::getDeadSessions()
return state_machine->getDeadSessions();
}
RaftResult KeeperServer::getLeaderInfo()
{
std::lock_guard lock{server_write_mutex};
if (is_recovering)
return nullptr;
return raft_instance->getLeaderInfo();
}
KeeperServer::NodeInfo KeeperServer::getNodeInfo()
{
return { .term = raft_instance->get_term(), .last_committed_index = state_machine->last_commit_index() };
}
ConfigUpdateActions KeeperServer::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
{
auto diff = state_manager->getConfigurationDiff(config);

View File

@ -14,7 +14,7 @@
namespace DB
{
using RaftAppendResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>;
using RaftResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>;
class KeeperServer
{
@ -71,7 +71,9 @@ public:
const KeeperConfigurationAndSettingsPtr & settings_,
const Poco::Util::AbstractConfiguration & config_,
ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_);
SnapshotsQueue & snapshots_queue_,
KeeperStateMachine::CommitCallback commit_callback,
KeeperStateMachine::ApplySnapshotCallback apply_snapshot_callback);
/// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings.
void startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6 = true);
@ -84,7 +86,7 @@ public:
/// Put batch of requests into Raft and get result of put. Responses will be set separately into
/// responses_queue.
RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
RaftResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
/// Return set of the non-active sessions
std::vector<int64_t> getDeadSessions();
@ -119,6 +121,17 @@ public:
int getServerID() const { return server_id; }
struct NodeInfo
{
uint64_t term;
uint64_t last_committed_index;
bool operator==(const NodeInfo &) const = default;
};
RaftResult getLeaderInfo();
NodeInfo getNodeInfo();
/// Get configuration diff between current configuration in RAFT and in XML file
ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config);
@ -126,10 +139,23 @@ public:
/// Synchronously check for update results with retries.
void applyConfigurationUpdate(const ConfigUpdateAction & task);
/// Wait configuration update for action. Used by followers.
/// Return true if update was successfully received.
bool waitConfigurationUpdate(const ConfigUpdateAction & task);
};
}
namespace std
{
template <>
struct hash<DB::KeeperServer::NodeInfo>
{
size_t operator()(const DB::KeeperServer::NodeInfo & info) const
{
SipHash hash_state;
hash_state.update(info.term);
hash_state.update(info.last_committed_index);
return hash_state.get64();
}
};
}

View File

@ -44,7 +44,9 @@ KeeperStateMachine::KeeperStateMachine(
const std::string & snapshots_path_,
const CoordinationSettingsPtr & coordination_settings_,
const KeeperContextPtr & keeper_context_,
const std::string & superdigest_)
const std::string & superdigest_,
CommitCallback commit_callback_,
ApplySnapshotCallback apply_snapshot_callback_)
: coordination_settings(coordination_settings_)
, snapshot_manager(
snapshots_path_,
@ -58,6 +60,8 @@ KeeperStateMachine::KeeperStateMachine(
, last_committed_idx(0)
, log(&Poco::Logger::get("KeeperStateMachine"))
, superdigest(superdigest_)
, commit_callback(std::move(commit_callback_))
, apply_snapshot_callback(std::move(apply_snapshot_callback_))
, keeper_context(keeper_context_)
{
}
@ -223,11 +227,11 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
return true;
}
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data)
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit_ext(const ext_op_params & params)
{
auto request_for_session = parseRequest(data);
auto request_for_session = parseRequest(*params.data);
if (!request_for_session.zxid)
request_for_session.zxid = log_idx;
request_for_session.zxid = params.log_idx;
/// Special processing of session_id request
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
@ -272,8 +276,9 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
assertDigest(*request_for_session.digest, storage->getNodesDigest(true), *request_for_session.request, true);
}
last_committed_idx = params.log_idx;
commit_callback(request_for_session, params.log_term, params.log_idx);
ProfileEvents::increment(ProfileEvents::KeeperCommits);
last_committed_idx = log_idx;
return nullptr;
}
@ -306,6 +311,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
ProfileEvents::increment(ProfileEvents::KeeperSnapshotApplys);
last_committed_idx = s.get_last_log_idx();
apply_snapshot_callback(s.get_last_log_term(), s.get_last_log_idx());
return true;
}
@ -320,6 +326,10 @@ void KeeperStateMachine::commit_config(const uint64_t /* log_idx */, nuraft::ptr
void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)
{
auto request_for_session = parseRequest(data);
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
return;
// If we received a log from an older node, use the log_idx as the zxid
// log_idx will always be larger or equal to the zxid so we can safely do this
// (log_idx is increased for all logs, while zxid is only increased for requests)

View File

@ -20,13 +20,18 @@ using SnapshotsQueue = ConcurrentBoundedQueue<CreateSnapshotTask>;
class KeeperStateMachine : public nuraft::state_machine
{
public:
using CommitCallback = std::function<void(const KeeperStorage::RequestForSession &, uint64_t, uint64_t)>;
using ApplySnapshotCallback = std::function<void(uint64_t, uint64_t)>;
KeeperStateMachine(
ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_,
const std::string & snapshots_path_,
const CoordinationSettingsPtr & coordination_settings_,
const KeeperContextPtr & keeper_context_,
const std::string & superdigest_ = "");
const std::string & superdigest_ = "",
CommitCallback commit_callback_ = [](const KeeperStorage::RequestForSession &, uint64_t, uint64_t){},
ApplySnapshotCallback apply_snapshot_callback_ = [](uint64_t, uint64_t){});
/// Read state from the latest snapshot
void init();
@ -37,7 +42,7 @@ public:
nuraft::ptr<nuraft::buffer> pre_commit(uint64_t log_idx, nuraft::buffer & data) override;
nuraft::ptr<nuraft::buffer> commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT
nuraft::ptr<nuraft::buffer> commit_ext(const ext_op_params & params) override; /// NOLINT
/// Save new cluster config to our snapshot (copy of the config stored in StateManager)
void commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf) override; /// NOLINT
@ -145,6 +150,11 @@ private:
/// Special part of ACL system -- superdigest specified in server config.
const std::string superdigest;
/// call when a request is committed
const CommitCallback commit_callback;
/// call when snapshot is applied
const ApplySnapshotCallback apply_snapshot_callback;
KeeperContextPtr keeper_context;
};

View File

@ -1330,8 +1330,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
changelog.append(entry);
changelog.end_of_append_batch(0, 0);
state_machine->pre_commit(i, changelog.entry_at(i)->get_buf());
state_machine->commit(i, changelog.entry_at(i)->get_buf());
auto entry_buf = changelog.entry_at(i)->get_buf_ptr();
state_machine->pre_commit(i, *entry_buf);
state_machine->commit_ext(nuraft::state_machine::ext_op_params{i, entry_buf});
bool snapshot_created = false;
if (i % settings->snapshot_distance == 0)
{
@ -1375,8 +1376,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
for (size_t i = restore_machine->last_commit_index() + 1; i < restore_changelog.next_slot(); ++i)
{
restore_machine->pre_commit(i, changelog.entry_at(i)->get_buf());
restore_machine->commit(i, changelog.entry_at(i)->get_buf());
auto entry = changelog.entry_at(i)->get_buf_ptr();
restore_machine->pre_commit(i, *entry);
restore_machine->commit_ext(nuraft::state_machine::ext_op_params{i, entry});
}
auto & source_storage = state_machine->getStorage();
@ -1477,18 +1479,18 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove)
std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();
request_c->path = "/hello";
request_c->is_ephemeral = true;
auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c);
state_machine->pre_commit(1, entry_c->get_buf());
state_machine->commit(1, entry_c->get_buf());
auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c)->get_buf_ptr();
state_machine->pre_commit(1, *entry_c);
state_machine->commit_ext(nuraft::state_machine::ext_op_params{1, entry_c});
const auto & storage = state_machine->getStorage();
EXPECT_EQ(storage.ephemerals.size(), 1);
std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
request_d->path = "/hello";
/// Delete from other session
auto entry_d = getLogEntryFromZKRequest(0, 2, state_machine->getNextZxid(), request_d);
state_machine->pre_commit(2, entry_d->get_buf());
state_machine->commit(2, entry_d->get_buf());
auto entry_d = getLogEntryFromZKRequest(0, 2, state_machine->getNextZxid(), request_d)->get_buf_ptr();
state_machine->pre_commit(2, *entry_d);
state_machine->commit_ext(nuraft::state_machine::ext_op_params{2, entry_d});
EXPECT_EQ(storage.ephemerals.size(), 0);
}

View File

@ -481,7 +481,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \
M(Bool, optimize_monotonous_functions_in_order_by, false, "Replace monotonous function with its argument in ORDER BY", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
@ -527,7 +527,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, describe_extend_object_types, false, "Deduce concrete type of columns of type Object in DESCRIBE query", 0) \
M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
\
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
\
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \

View File

@ -14,7 +14,7 @@
namespace std // NOLINT(cert-dcl58-cpp)
{
using namespace experimental::coroutines_v1;
using namespace experimental::coroutines_v1; // NOLINT(cert-dcl58-cpp)
}
#if __has_warning("-Wdeprecated-experimental-coroutine")

View File

@ -143,9 +143,11 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
}
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(size_t offset) const
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment) const
{
auto path = cache->getPathInLocalCache(cache_key, offset, is_persistent);
/// Use is_persistent flag from in-memory state of the filesegment,
/// because it is consistent with what is written on disk.
auto path = file_segment.getPathInLocalCache();
ReadSettings local_read_settings{settings};
/// Do not allow to use asynchronous version of LocalFSReadMethod.
@ -206,7 +208,7 @@ CachedOnDiskReadBufferFromFile::getRemoteFSReadBuffer(FileSegment & file_segment
return remote_file_reader;
auto remote_fs_segment_reader = file_segment.extractRemoteFileReader();
if (remote_fs_segment_reader && file_offset_of_buffer_end == remote_file_reader->getFileOffsetOfBufferEnd())
if (remote_fs_segment_reader && file_offset_of_buffer_end == remote_fs_segment_reader->getFileOffsetOfBufferEnd())
remote_file_reader = remote_fs_segment_reader;
else
remote_file_reader = implementation_buffer_creator();
@ -237,8 +239,6 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & file_segment)
{
auto range = file_segment->range();
auto download_state = file_segment->state();
LOG_TEST(log, "getReadBufferForFileSegment: {}", file_segment->getInfoForLog());
@ -247,7 +247,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
if (download_state == FileSegment::State::DOWNLOADED)
{
read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left);
return getCacheReadBuffer(*file_segment);
}
else
{
@ -280,7 +280,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
/// file_offset_of_buffer_end
read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left);
return getCacheReadBuffer(*file_segment);
}
download_state = file_segment->wait();
@ -289,7 +289,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
case FileSegment::State::DOWNLOADED:
{
read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left);
return getCacheReadBuffer(*file_segment);
}
case FileSegment::State::EMPTY:
case FileSegment::State::PARTIALLY_DOWNLOADED:
@ -305,7 +305,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
/// file_offset_of_buffer_end
read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left);
return getCacheReadBuffer(*file_segment);
}
auto downloader_id = file_segment->getOrSetDownloader();
@ -323,7 +323,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
read_type = ReadType::CACHED;
file_segment->resetDownloader();
return getCacheReadBuffer(range.left);
return getCacheReadBuffer(*file_segment);
}
if (file_segment->getCurrentWriteOffset() < file_offset_of_buffer_end)
@ -339,7 +339,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
LOG_TEST(log, "Predownload. File segment info: {}", file_segment->getInfoForLog());
chassert(file_offset_of_buffer_end > file_segment->getCurrentWriteOffset());
bytes_to_predownload = file_offset_of_buffer_end - file_segment->getCurrentWriteOffset();
chassert(bytes_to_predownload < range.size());
chassert(bytes_to_predownload < file_segment->range().size());
}
read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
@ -354,7 +354,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
{
read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left);
return getCacheReadBuffer(*file_segment);
}
else
{

View File

@ -68,7 +68,7 @@ private:
ImplementationBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment);
ImplementationBufferPtr getCacheReadBuffer(size_t offset) const;
ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const;
std::optional<size_t> getLastNonDownloadedOffset() const;

View File

@ -13,7 +13,6 @@ namespace DB
namespace ErrorCodes
{
extern const int UNKNOWN_FORMAT;
extern const int LOGICAL_ERROR;
}
void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
@ -131,9 +130,6 @@ DiskObjectStorageMetadata::DiskObjectStorageMetadata(
void DiskObjectStorageMetadata::addObject(const String & path, size_t size)
{
if (!object_storage_root_path.empty() && path.starts_with(object_storage_root_path))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected relative path");
total_size += size;
storage_objects.emplace_back(path, size);
}

View File

@ -530,6 +530,7 @@ String FormatFactory::getFormatFromFileDescriptor(int fd)
return getFormatFromFileName(file_path, false);
return "";
#else
(void)fd;
return "";
#endif
}

View File

@ -1,18 +1,18 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <base/range.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalModelsLoader.h>
#include <Columns/ColumnString.h>
#include <string>
#include <memory>
#include <DataTypes/DataTypeNullable.h>
#include <BridgeHelper/CatBoostLibraryBridgeHelper.h>
#include <BridgeHelper/IBridgeHelper.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeTuple.h>
#include <Columns/ColumnsNumber.h>
#include <Common/assert_cast.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <Interpreters/Context_fwd.h>
@ -21,66 +21,80 @@ namespace DB
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_COLUMN;
}
class ExternalModelsLoader;
/// Evaluate external model.
/// First argument - model name, the others - model arguments.
/// * for CatBoost model - float features first, then categorical
/// Result - Float64.
class FunctionModelEvaluate final : public IFunction
/// Evaluate CatBoost model.
/// - Arguments: float features first, then categorical features.
/// - Result: Float64.
class FunctionCatBoostEvaluate final : public IFunction, WithContext
{
private:
mutable std::unique_ptr<CatBoostLibraryBridgeHelper> bridge_helper;
public:
static constexpr auto name = "modelEvaluate";
static constexpr auto name = "catboostEvaluate";
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionModelEvaluate>(context->getExternalModelsLoader());
}
explicit FunctionModelEvaluate(const ExternalModelsLoader & models_loader_)
: models_loader(models_loader_) {}
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionCatBoostEvaluate>(context_); }
explicit FunctionCatBoostEvaluate(ContextPtr context_) : WithContext(context_) {}
String getName() const override { return name; }
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool isDeterministic() const override { return false; }
bool useDefaultImplementationForNulls() const override { return false; }
size_t getNumberOfArguments() const override { return 0; }
void initBridge(const ColumnConst * name_col) const
{
String library_path = getContext()->getConfigRef().getString("catboost_lib_path");
if (!std::filesystem::exists(library_path))
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can't load library {}: file doesn't exist", library_path);
String model_path = name_col->getValue<String>();
if (!std::filesystem::exists(model_path))
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can't load model {}: file doesn't exist", model_path);
bridge_helper = std::make_unique<CatBoostLibraryBridgeHelper>(getContext(), model_path, library_path);
}
DataTypePtr getReturnTypeFromLibraryBridge() const
{
size_t tree_count = bridge_helper->getTreeCount();
auto type = std::make_shared<DataTypeFloat64>();
if (tree_count == 1)
return type;
DataTypes types(tree_count, type);
return std::make_shared<DataTypeTuple>(types);
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() < 2)
throw Exception("Function " + getName() + " expects at least 2 arguments",
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION);
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 2 arguments", getName());
if (!isString(arguments[0].type))
throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
+ ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument of function {}, expected a string.", arguments[0].type->getName(), getName());
const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
if (!name_col)
throw Exception("First argument of function " + getName() + " must be a constant string",
ErrorCodes::ILLEGAL_COLUMN);
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be a constant string", getName());
initBridge(name_col);
auto type = getReturnTypeFromLibraryBridge();
bool has_nullable = false;
for (size_t i = 1; i < arguments.size(); ++i)
has_nullable = has_nullable || arguments[i].type->isNullable();
auto model = models_loader.getModel(name_col->getValue<String>());
auto type = model->getReturnType();
if (has_nullable)
{
if (const auto * tuple = typeid_cast<const DataTypeTuple *>(type.get()))
@ -98,31 +112,25 @@ public:
return type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
{
const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
if (!name_col)
throw Exception("First argument of function " + getName() + " must be a constant string",
ErrorCodes::ILLEGAL_COLUMN);
auto model = models_loader.getModel(name_col->getValue<String>());
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be a constant string", getName());
ColumnRawPtrs column_ptrs;
Columns materialized_columns;
ColumnPtr null_map;
column_ptrs.reserve(arguments.size());
for (auto arg : collections::range(1, arguments.size()))
ColumnsWithTypeAndName feature_arguments(arguments.begin() + 1, arguments.end());
for (auto & arg : feature_arguments)
{
const auto & column = arguments[arg].column;
column_ptrs.push_back(column.get());
if (auto full_column = column->convertToFullColumnIfConst())
if (auto full_column = arg.column->convertToFullColumnIfConst())
{
materialized_columns.push_back(full_column);
column_ptrs.back() = full_column.get();
arg.column = full_column;
}
if (const auto * col_nullable = checkAndGetColumn<ColumnNullable>(*column_ptrs.back()))
if (const auto * col_nullable = checkAndGetColumn<ColumnNullable>(&*arg.column))
{
if (!null_map)
null_map = col_nullable->getNullMapColumnPtr();
@ -140,11 +148,12 @@ public:
null_map = std::move(mut_null_map);
}
column_ptrs.back() = &col_nullable->getNestedColumn();
arg.column = col_nullable->getNestedColumn().getPtr();
arg.type = static_cast<const DataTypeNullable &>(*arg.type).getNestedType();
}
}
auto res = model->evaluate(column_ptrs);
auto res = bridge_helper->evaluate(feature_arguments);
if (null_map)
{
@ -162,15 +171,12 @@ public:
return res;
}
private:
const ExternalModelsLoader & models_loader;
};
REGISTER_FUNCTION(ExternalModels)
REGISTER_FUNCTION(CatBoostEvaluate)
{
factory.registerFunction<FunctionModelEvaluate>();
factory.registerFunction<FunctionCatBoostEvaluate>();
}
}

View File

@ -233,7 +233,7 @@ void ReadBufferFromFileDescriptor::rewind()
/// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds)
bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds) const
{
fd_set fds;
FD_ZERO(&fds);

View File

@ -66,7 +66,7 @@ public:
private:
/// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
bool poll(size_t timeout_microseconds);
bool poll(size_t timeout_microseconds) const;
};

View File

@ -1,525 +0,0 @@
#include "CatBoostModel.h"
#include <Common/FieldVisitorConvertToNumber.h>
#include <mutex>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <Common/typeid_cast.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/PODArray.h>
#include <Common/SharedLibrary.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
extern const int CANNOT_LOAD_CATBOOST_MODEL;
extern const int CANNOT_APPLY_CATBOOST_MODEL;
}
/// CatBoost wrapper interface functions.
class CatBoostWrapperAPI
{
public:
using ModelCalcerHandle = void;
ModelCalcerHandle * (* ModelCalcerCreate)(); // NOLINT
void (* ModelCalcerDelete)(ModelCalcerHandle * calcer); // NOLINT
const char * (* GetErrorString)(); // NOLINT
bool (* LoadFullModelFromFile)(ModelCalcerHandle * calcer, const char * filename); // NOLINT
bool (* CalcModelPredictionFlat)(ModelCalcerHandle * calcer, size_t docCount, // NOLINT
const float ** floatFeatures, size_t floatFeaturesSize,
double * result, size_t resultSize);
bool (* CalcModelPrediction)(ModelCalcerHandle * calcer, size_t docCount, // NOLINT
const float ** floatFeatures, size_t floatFeaturesSize,
const char *** catFeatures, size_t catFeaturesSize,
double * result, size_t resultSize);
bool (* CalcModelPredictionWithHashedCatFeatures)(ModelCalcerHandle * calcer, size_t docCount, // NOLINT
const float ** floatFeatures, size_t floatFeaturesSize,
const int ** catFeatures, size_t catFeaturesSize,
double * result, size_t resultSize);
int (* GetStringCatFeatureHash)(const char * data, size_t size); // NOLINT
int (* GetIntegerCatFeatureHash)(uint64_t val); // NOLINT
size_t (* GetFloatFeaturesCount)(ModelCalcerHandle* calcer); // NOLINT
size_t (* GetCatFeaturesCount)(ModelCalcerHandle* calcer); // NOLINT
size_t (* GetTreeCount)(ModelCalcerHandle* modelHandle); // NOLINT
size_t (* GetDimensionsCount)(ModelCalcerHandle* modelHandle); // NOLINT
bool (* CheckModelMetadataHasKey)(ModelCalcerHandle* modelHandle, const char* keyPtr, size_t keySize); // NOLINT
size_t (*GetModelInfoValueSize)(ModelCalcerHandle* modelHandle, const char* keyPtr, size_t keySize); // NOLINT
const char* (*GetModelInfoValue)(ModelCalcerHandle* modelHandle, const char* keyPtr, size_t keySize); // NOLINT
};
class CatBoostModelHolder
{
private:
CatBoostWrapperAPI::ModelCalcerHandle * handle;
const CatBoostWrapperAPI * api;
public:
explicit CatBoostModelHolder(const CatBoostWrapperAPI * api_) : api(api_) { handle = api->ModelCalcerCreate(); }
~CatBoostModelHolder() { api->ModelCalcerDelete(handle); }
CatBoostWrapperAPI::ModelCalcerHandle * get() { return handle; }
};
/// Holds CatBoost wrapper library and provides wrapper interface.
class CatBoostLibHolder
{
public:
explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); }
const CatBoostWrapperAPI & getAPI() const { return api; }
const std::string & getCurrentPath() const { return lib_path; }
private:
CatBoostWrapperAPI api;
std::string lib_path;
SharedLibrary lib;
void initAPI()
{
load(api.ModelCalcerCreate, "ModelCalcerCreate");
load(api.ModelCalcerDelete, "ModelCalcerDelete");
load(api.GetErrorString, "GetErrorString");
load(api.LoadFullModelFromFile, "LoadFullModelFromFile");
load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat");
load(api.CalcModelPrediction, "CalcModelPrediction");
load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures");
load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash");
load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash");
load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount");
load(api.GetCatFeaturesCount, "GetCatFeaturesCount");
tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey");
tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize");
tryLoad(api.GetModelInfoValue, "GetModelInfoValue");
tryLoad(api.GetTreeCount, "GetTreeCount");
tryLoad(api.GetDimensionsCount, "GetDimensionsCount");
}
template <typename T>
void load(T& func, const std::string & name) { func = lib.get<T>(name); }
template <typename T>
void tryLoad(T& func, const std::string & name) { func = lib.tryGet<T>(name); }
};
std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
{
static std::shared_ptr<CatBoostLibHolder> ptr;
static std::mutex mutex;
std::lock_guard lock(mutex);
if (!ptr || ptr->getCurrentPath() != lib_path)
ptr = std::make_shared<CatBoostLibHolder>(lib_path);
return ptr;
}
class CatBoostModelImpl
{
public:
CatBoostModelImpl(const CatBoostWrapperAPI * api_, const std::string & model_path) : api(api_)
{
handle = std::make_unique<CatBoostModelHolder>(api);
if (!handle)
{
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
"Cannot create CatBoost model: {}",
api->GetErrorString());
}
if (!api->LoadFullModelFromFile(handle->get(), model_path.c_str()))
{
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
"Cannot load CatBoost model: {}",
api->GetErrorString());
}
float_features_count = api->GetFloatFeaturesCount(handle->get());
cat_features_count = api->GetCatFeaturesCount(handle->get());
tree_count = 1;
if (api->GetDimensionsCount)
tree_count = api->GetDimensionsCount(handle->get());
}
ColumnPtr evaluate(const ColumnRawPtrs & columns) const
{
if (columns.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model.");
if (columns.size() != float_features_count + cat_features_count)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
columns.size(),
float_features_count,
cat_features_count);
for (size_t i = 0; i < float_features_count; ++i)
{
if (!columns[i]->isNumeric())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i);
}
}
bool cat_features_are_strings = true;
for (size_t i = float_features_count; i < float_features_count + cat_features_count; ++i)
{
const auto * column = columns[i];
if (column->isNumeric())
{
cat_features_are_strings = false;
}
else if (!(typeid_cast<const ColumnString *>(column)
|| typeid_cast<const ColumnFixedString *>(column)))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i);
}
}
auto result = evalImpl(columns, cat_features_are_strings);
if (tree_count == 1)
return result;
size_t column_size = columns.front()->size();
auto * result_buf = result->getData().data();
/// Multiple trees case. Copy data to several columns.
MutableColumns mutable_columns(tree_count);
std::vector<Float64 *> column_ptrs(tree_count);
for (size_t i = 0; i < tree_count; ++i)
{
auto col = ColumnFloat64::create(column_size);
column_ptrs[i] = col->getData().data();
mutable_columns[i] = std::move(col);
}
Float64 * data = result_buf;
for (size_t row = 0; row < column_size; ++row)
{
for (size_t i = 0; i < tree_count; ++i)
{
*column_ptrs[i] = *data;
++column_ptrs[i];
++data;
}
}
return ColumnTuple::create(std::move(mutable_columns));
}
size_t getFloatFeaturesCount() const { return float_features_count; }
size_t getCatFeaturesCount() const { return cat_features_count; }
size_t getTreeCount() const { return tree_count; }
private:
std::unique_ptr<CatBoostModelHolder> handle;
const CatBoostWrapperAPI * api;
size_t float_features_count;
size_t cat_features_count;
size_t tree_count;
/// Buffer should be allocated with features_count * column->size() elements.
/// Place column elements in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
template <typename T>
void placeColumnAsNumber(const IColumn * column, T * buffer, size_t features_count) const
{
size_t size = column->size();
FieldVisitorConvertToNumber<T> visitor;
for (size_t i = 0; i < size; ++i)
{
/// TODO: Replace with column visitor.
Field field;
column->get(i, field);
*buffer = applyVisitor(visitor, field);
buffer += features_count;
}
}
/// Buffer should be allocated with features_count * column->size() elements.
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
static void placeStringColumn(const ColumnString & column, const char ** buffer, size_t features_count)
{
size_t size = column.size();
for (size_t i = 0; i < size; ++i)
{
*buffer = const_cast<char *>(column.getDataAtWithTerminatingZero(i).data);
buffer += features_count;
}
}
/// Buffer should be allocated with features_count * column->size() elements.
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
/// Returns PODArray which holds data (because ColumnFixedString doesn't store terminating zero).
static PODArray<char> placeFixedStringColumn(
const ColumnFixedString & column, const char ** buffer, size_t features_count)
{
size_t size = column.size();
size_t str_size = column.getN();
PODArray<char> data(size * (str_size + 1));
char * data_ptr = data.data();
for (size_t i = 0; i < size; ++i)
{
auto ref = column.getDataAt(i);
memcpy(data_ptr, ref.data, ref.size);
data_ptr[ref.size] = 0;
*buffer = data_ptr;
data_ptr += ref.size + 1;
buffer += features_count;
}
return data;
}
/// Place columns into buffer, returns column which holds placed data. Buffer should contains column->size() values.
template <typename T>
ColumnPtr placeNumericColumns(const ColumnRawPtrs & columns,
size_t offset, size_t size, const T** buffer) const
{
if (size == 0)
return nullptr;
size_t column_size = columns[offset]->size();
auto data_column = ColumnVector<T>::create(size * column_size);
T * data = data_column->getData().data();
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (column->isNumeric())
placeColumnAsNumber(column, data + i, size);
}
for (size_t i = 0; i < column_size; ++i)
{
*buffer = data;
++buffer;
data += size;
}
return data_column;
}
/// Place columns into buffer, returns data which was used for fixed string columns.
/// Buffer should contains column->size() values, each value contains size strings.
static std::vector<PODArray<char>> placeStringColumns(
const ColumnRawPtrs & columns, size_t offset, size_t size, const char ** buffer)
{
if (size == 0)
return {};
std::vector<PODArray<char>> data;
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
placeStringColumn(*column_string, buffer + i, size);
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
data.push_back(placeFixedStringColumn(*column_fixed_string, buffer + i, size));
else
throw Exception("Cannot place string column.", ErrorCodes::LOGICAL_ERROR);
}
return data;
}
/// Calc hash for string cat feature at ps positions.
template <typename Column>
void calcStringHashes(const Column * column, size_t ps, const int ** buffer) const
{
size_t column_size = column->size();
for (size_t j = 0; j < column_size; ++j)
{
auto ref = column->getDataAt(j);
const_cast<int *>(*buffer)[ps] = api->GetStringCatFeatureHash(ref.data, ref.size);
++buffer;
}
}
/// Calc hash for int cat feature at ps position. Buffer at positions ps should contains unhashed values.
void calcIntHashes(size_t column_size, size_t ps, const int ** buffer) const
{
for (size_t j = 0; j < column_size; ++j)
{
const_cast<int *>(*buffer)[ps] = api->GetIntegerCatFeatureHash((*buffer)[ps]);
++buffer;
}
}
/// buffer contains column->size() rows and size columns.
/// For int cat features calc hash inplace.
/// For string cat features calc hash from column rows.
void calcHashes(const ColumnRawPtrs & columns, size_t offset, size_t size, const int ** buffer) const
{
if (size == 0)
return;
size_t column_size = columns[offset]->size();
std::vector<PODArray<char>> data;
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
calcStringHashes(column_string, i, buffer);
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
calcStringHashes(column_fixed_string, i, buffer);
else
calcIntHashes(column_size, i, buffer);
}
}
/// buffer[column_size * cat_features_count] -> char * => cat_features[column_size][cat_features_count] -> char *
void fillCatFeaturesBuffer(const char *** cat_features, const char ** buffer,
size_t column_size) const
{
for (size_t i = 0; i < column_size; ++i)
{
*cat_features = buffer;
++cat_features;
buffer += cat_features_count;
}
}
/// Convert values to row-oriented format and call evaluation function from CatBoost wrapper api.
/// * CalcModelPredictionFlat if no cat features
/// * CalcModelPrediction if all cat features are strings
/// * CalcModelPredictionWithHashedCatFeatures if has int cat features.
ColumnFloat64::MutablePtr evalImpl(
const ColumnRawPtrs & columns,
bool cat_features_are_strings) const
{
std::string error_msg = "Error occurred while applying CatBoost model: ";
size_t column_size = columns.front()->size();
auto result = ColumnFloat64::create(column_size * tree_count);
auto * result_buf = result->getData().data();
if (!column_size)
return result;
/// Prepare float features.
PODArray<const float *> float_features(column_size);
auto * float_features_buf = float_features.data();
/// Store all float data into single column. float_features is a list of pointers to it.
auto float_features_col = placeNumericColumns<float>(columns, 0, float_features_count, float_features_buf);
if (cat_features_count == 0)
{
if (!api->CalcModelPredictionFlat(handle->get(), column_size,
float_features_buf, float_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api->GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
return result;
}
/// Prepare cat features.
if (cat_features_are_strings)
{
/// cat_features_holder stores pointers to ColumnString data or fixed_strings_data.
PODArray<const char *> cat_features_holder(cat_features_count * column_size);
PODArray<const char **> cat_features(column_size);
auto * cat_features_buf = cat_features.data();
fillCatFeaturesBuffer(cat_features_buf, cat_features_holder.data(), column_size);
/// Fixed strings are stored without termination zero, so have to copy data into fixed_strings_data.
auto fixed_strings_data = placeStringColumns(columns, float_features_count,
cat_features_count, cat_features_holder.data());
if (!api->CalcModelPrediction(handle->get(), column_size,
float_features_buf, float_features_count,
cat_features_buf, cat_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api->GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
}
else
{
PODArray<const int *> cat_features(column_size);
auto * cat_features_buf = cat_features.data();
auto cat_features_col = placeNumericColumns<int>(columns, float_features_count,
cat_features_count, cat_features_buf);
calcHashes(columns, float_features_count, cat_features_count, cat_features_buf);
if (!api->CalcModelPredictionWithHashedCatFeatures(
handle->get(), column_size,
float_features_buf, float_features_count,
cat_features_buf, cat_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api->GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
}
return result;
}
};
CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_,
const ExternalLoadableLifetime & lifetime_)
: name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime_)
{
api_provider = getCatBoostWrapperHolder(lib_path);
api = &api_provider->getAPI();
model = std::make_unique<CatBoostModelImpl>(api, model_path);
}
CatBoostModel::~CatBoostModel() = default;
size_t CatBoostModel::getFloatFeaturesCount() const
{
return model->getFloatFeaturesCount();
}
size_t CatBoostModel::getCatFeaturesCount() const
{
return model->getCatFeaturesCount();
}
size_t CatBoostModel::getTreeCount() const
{
return model->getTreeCount();
}
DataTypePtr CatBoostModel::getReturnType() const
{
size_t tree_count = getTreeCount();
auto type = std::make_shared<DataTypeFloat64>();
if (tree_count == 1)
return type;
DataTypes types(tree_count, type);
return std::make_shared<DataTypeTuple>(types);
}
ColumnPtr CatBoostModel::evaluate(const ColumnRawPtrs & columns) const
{
if (!model)
throw Exception("CatBoost model was not loaded.", ErrorCodes::LOGICAL_ERROR);
return model->evaluate(columns);
}
}

View File

@ -1,73 +0,0 @@
#pragma once
#include <Interpreters/IExternalLoadable.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnsNumber.h>
namespace DB
{
class CatBoostLibHolder;
class CatBoostWrapperAPI;
class CatBoostModelImpl;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
/// General ML model evaluator interface.
class IMLModel : public IExternalLoadable
{
public:
IMLModel() = default;
virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0;
virtual std::string getTypeName() const = 0;
virtual DataTypePtr getReturnType() const = 0;
virtual ~IMLModel() override = default;
};
class CatBoostModel : public IMLModel
{
public:
CatBoostModel(std::string name, std::string model_path,
std::string lib_path, const ExternalLoadableLifetime & lifetime);
~CatBoostModel() override;
ColumnPtr evaluate(const ColumnRawPtrs & columns) const override;
std::string getTypeName() const override { return "catboost"; }
size_t getFloatFeaturesCount() const;
size_t getCatFeaturesCount() const;
size_t getTreeCount() const;
DataTypePtr getReturnType() const override;
/// IExternalLoadable interface.
const ExternalLoadableLifetime & getLifetime() const override { return lifetime; }
std::string getLoadableName() const override { return name; }
bool supportUpdates() const override { return true; }
bool isModified() const override { return true; }
std::shared_ptr<const IExternalLoadable> clone() const override
{
return std::make_shared<CatBoostModel>(name, model_path, lib_path, lifetime);
}
private:
const std::string name;
std::string model_path;
std::string lib_path;
ExternalLoadableLifetime lifetime;
std::shared_ptr<CatBoostLibHolder> api_provider;
const CatBoostWrapperAPI * api;
std::unique_ptr<CatBoostModelImpl> model;
void init();
};
}

View File

@ -52,7 +52,6 @@
#include <Interpreters/EmbeddedDictionaries.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h>
#include <Interpreters/ExternalModelsLoader.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/InterserverCredentials.h>
@ -153,7 +152,6 @@ struct ContextSharedPart : boost::noncopyable
mutable std::mutex embedded_dictionaries_mutex;
mutable std::mutex external_dictionaries_mutex;
mutable std::mutex external_user_defined_executable_functions_mutex;
mutable std::mutex external_models_mutex;
/// Separate mutex for storage policies. During server startup we may
/// initialize some important storages (system logs with MergeTree engine)
/// under context lock.
@ -191,9 +189,7 @@ struct ContextSharedPart : boost::noncopyable
mutable std::unique_ptr<EmbeddedDictionaries> embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization.
mutable std::unique_ptr<ExternalDictionariesLoader> external_dictionaries_loader;
mutable std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> external_user_defined_executable_functions_loader;
mutable std::unique_ptr<ExternalModelsLoader> external_models_loader;
ExternalLoaderXMLConfigRepository * external_models_config_repository = nullptr;
scope_guard models_repository_guard;
ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository = nullptr;
@ -359,8 +355,6 @@ struct ContextSharedPart : boost::noncopyable
external_dictionaries_loader->enablePeriodicUpdates(false);
if (external_user_defined_executable_functions_loader)
external_user_defined_executable_functions_loader->enablePeriodicUpdates(false);
if (external_models_loader)
external_models_loader->enablePeriodicUpdates(false);
Session::shutdownNamedSessions();
@ -391,7 +385,6 @@ struct ContextSharedPart : boost::noncopyable
std::unique_ptr<EmbeddedDictionaries> delete_embedded_dictionaries;
std::unique_ptr<ExternalDictionariesLoader> delete_external_dictionaries_loader;
std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> delete_external_user_defined_executable_functions_loader;
std::unique_ptr<ExternalModelsLoader> delete_external_models_loader;
std::unique_ptr<BackgroundSchedulePool> delete_buffer_flush_schedule_pool;
std::unique_ptr<BackgroundSchedulePool> delete_schedule_pool;
std::unique_ptr<BackgroundSchedulePool> delete_distributed_schedule_pool;
@ -430,7 +423,6 @@ struct ContextSharedPart : boost::noncopyable
delete_embedded_dictionaries = std::move(embedded_dictionaries);
delete_external_dictionaries_loader = std::move(external_dictionaries_loader);
delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader);
delete_external_models_loader = std::move(external_models_loader);
delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool);
delete_schedule_pool = std::move(schedule_pool);
delete_distributed_schedule_pool = std::move(distributed_schedule_pool);
@ -458,7 +450,6 @@ struct ContextSharedPart : boost::noncopyable
delete_embedded_dictionaries.reset();
delete_external_dictionaries_loader.reset();
delete_external_user_defined_executable_functions_loader.reset();
delete_external_models_loader.reset();
delete_ddl_worker.reset();
delete_buffer_flush_schedule_pool.reset();
delete_schedule_pool.reset();
@ -1476,48 +1467,6 @@ ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedEx
return *shared->external_user_defined_executable_functions_loader;
}
const ExternalModelsLoader & Context::getExternalModelsLoader() const
{
return const_cast<Context *>(this)->getExternalModelsLoader();
}
ExternalModelsLoader & Context::getExternalModelsLoader()
{
std::lock_guard lock(shared->external_models_mutex);
return getExternalModelsLoaderUnlocked();
}
ExternalModelsLoader & Context::getExternalModelsLoaderUnlocked()
{
if (!shared->external_models_loader)
shared->external_models_loader =
std::make_unique<ExternalModelsLoader>(getGlobalContext());
return *shared->external_models_loader;
}
void Context::loadOrReloadModels(const Poco::Util::AbstractConfiguration & config)
{
auto patterns_values = getMultipleValuesFromConfig(config, "", "models_config");
std::unordered_set<std::string> patterns(patterns_values.begin(), patterns_values.end());
std::lock_guard lock(shared->external_models_mutex);
auto & external_models_loader = getExternalModelsLoaderUnlocked();
if (shared->external_models_config_repository)
{
shared->external_models_config_repository->updatePatterns(patterns);
external_models_loader.reloadConfig(shared->external_models_config_repository->getName());
return;
}
auto app_path = getPath();
auto config_path = getConfigRef().getString("config-file", "config.xml");
auto repository = std::make_unique<ExternalLoaderXMLConfigRepository>(app_path, config_path, patterns);
shared->external_models_config_repository = repository.get();
shared->models_repository_guard = external_models_loader.addConfigRepository(std::move(repository));
}
EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_error) const
{
std::lock_guard lock(shared->embedded_dictionaries_mutex);

View File

@ -53,7 +53,6 @@ class AccessRightsElements;
enum class RowPolicyFilterType;
class EmbeddedDictionaries;
class ExternalDictionariesLoader;
class ExternalModelsLoader;
class ExternalUserDefinedExecutableFunctionsLoader;
class InterserverCredentials;
using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
@ -645,19 +644,15 @@ public:
const EmbeddedDictionaries & getEmbeddedDictionaries() const;
const ExternalDictionariesLoader & getExternalDictionariesLoader() const;
const ExternalModelsLoader & getExternalModelsLoader() const;
const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const;
EmbeddedDictionaries & getEmbeddedDictionaries();
ExternalDictionariesLoader & getExternalDictionariesLoader();
ExternalDictionariesLoader & getExternalDictionariesLoaderUnlocked();
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader();
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked();
ExternalModelsLoader & getExternalModelsLoader();
ExternalModelsLoader & getExternalModelsLoaderUnlocked();
void tryCreateEmbeddedDictionaries(const Poco::Util::AbstractConfiguration & config) const;
void loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration & config);
void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config);
void loadOrReloadModels(const Poco::Util::AbstractConfiguration & config);
#if USE_NLP
SynonymsExtensions & getSynonymsExtensions() const;

View File

@ -61,18 +61,23 @@ public:
return host_fqdn_id;
}
std::string getQueueDir() const
{
return queue_dir;
}
void startup();
virtual void shutdown();
bool isCurrentlyActive() const { return initialized && !stop_flag; }
protected:
/// Returns cached ZooKeeper session (possibly expired).
ZooKeeperPtr tryGetZooKeeper() const;
/// If necessary, creates a new session and caches it.
ZooKeeperPtr getAndSetZooKeeper();
protected:
/// Iterates through queue tasks in ZooKeeper, runs execution of new tasks
void scheduleTasks(bool reinitialized);

View File

@ -1,41 +0,0 @@
#include <Interpreters/ExternalModelsLoader.h>
#include <Interpreters/Context.h>
namespace DB
{
namespace ErrorCodes
{
extern const int INVALID_CONFIG_PARAMETER;
}
ExternalModelsLoader::ExternalModelsLoader(ContextPtr context_)
: ExternalLoader("external model", &Poco::Logger::get("ExternalModelsLoader")), WithContext(context_)
{
setConfigSettings({"model", "name", {}, {}});
enablePeriodicUpdates(true);
}
std::shared_ptr<const IExternalLoadable> ExternalModelsLoader::create(
const std::string & name, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & /* repository_name */) const
{
String type = config.getString(config_prefix + ".type");
ExternalLoadableLifetime lifetime(config, config_prefix + ".lifetime");
/// TODO: add models factory.
if (type == "catboost")
{
return std::make_unique<CatBoostModel>(
name, config.getString(config_prefix + ".path"),
getContext()->getConfigRef().getString("catboost_dynamic_library_path"),
lifetime
);
}
else
{
throw Exception("Unknown model type: " + type, ErrorCodes::INVALID_CONFIG_PARAMETER);
}
}
}

View File

@ -1,40 +0,0 @@
#pragma once
#include <Interpreters/CatBoostModel.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/ExternalLoader.h>
#include <Common/logger_useful.h>
#include <memory>
namespace DB
{
/// Manages user-defined models.
class ExternalModelsLoader : public ExternalLoader, WithContext
{
public:
using ModelPtr = std::shared_ptr<const IMLModel>;
/// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds.
explicit ExternalModelsLoader(ContextPtr context_);
ModelPtr getModel(const std::string & model_name) const
{
return std::static_pointer_cast<const IMLModel>(load(model_name));
}
void reloadModel(const std::string & model_name) const
{
loadOrReload(model_name);
}
protected:
LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & repository_name) const override;
friend class StorageSystemModels;
};
}

Some files were not shown because too many files have changed in this diff Show More