mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 18:42:26 +00:00
Merge branch 'master' into enable_projections_and_zero_copy
This commit is contained in:
commit
b4d2d217f4
@ -22,6 +22,8 @@ Checks: '*,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-not-null-terminated-result,
|
||||
-bugprone-unchecked-optional-access,
|
||||
-bugprone-assignment-in-if-condition,
|
||||
|
||||
-cert-dcl16-c,
|
||||
-cert-err58-cpp,
|
||||
@ -103,6 +105,7 @@ Checks: '*,
|
||||
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-const-correctness,
|
||||
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-concat-nested-namespaces,
|
||||
@ -114,6 +117,7 @@ Checks: '*,
|
||||
-modernize-use-nodiscard,
|
||||
-modernize-use-override,
|
||||
-modernize-use-trailing-return-type,
|
||||
-modernize-macro-to-enum,
|
||||
|
||||
-performance-inefficient-string-concatenation,
|
||||
-performance-no-int-to-ptr,
|
||||
@ -135,6 +139,7 @@ Checks: '*,
|
||||
-readability-suspicious-call-argument,
|
||||
-readability-uppercase-literal-suffix,
|
||||
-readability-use-anyofallof,
|
||||
-readability-simplify-boolean-expr,
|
||||
|
||||
-zirkon-*,
|
||||
'
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -58,6 +58,10 @@ cmake_install.cmake
|
||||
CTestTestfile.cmake
|
||||
*.a
|
||||
*.o
|
||||
*.so
|
||||
*.dll
|
||||
*.lib
|
||||
*.dylib
|
||||
cmake-build-*
|
||||
|
||||
# Python cache
|
||||
|
@ -3,7 +3,7 @@ option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF)
|
||||
|
||||
if (ENABLE_CLANG_TIDY)
|
||||
|
||||
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
|
||||
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
|
||||
|
||||
if (CLANG_TIDY_PATH)
|
||||
message(STATUS
|
||||
|
@ -45,6 +45,7 @@ if (CMAKE_CROSSCOMPILING)
|
||||
endif ()
|
||||
|
||||
if (USE_MUSL)
|
||||
# use of undeclared identifier 'PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP'
|
||||
set (ENABLE_SENTRY OFF CACHE INTERNAL "")
|
||||
set (ENABLE_ODBC OFF CACHE INTERNAL "")
|
||||
set (ENABLE_GRPC OFF CACHE INTERNAL "")
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc
|
||||
Subproject commit e15858f8ad0ce8aba85cf74e3763874c76bf927c
|
@ -1,35 +1,95 @@
|
||||
# Choose to build static or shared library for c-ares.
|
||||
if (USE_STATIC_LIBRARIES)
|
||||
set(CARES_STATIC ON CACHE BOOL "" FORCE)
|
||||
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
|
||||
else ()
|
||||
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
|
||||
set(CARES_SHARED ON CACHE BOOL "" FORCE)
|
||||
endif ()
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/c-ares")
|
||||
|
||||
# Disable looking for libnsl on a platforms that has gethostbyname in glibc
|
||||
#
|
||||
# c-ares searching for gethostbyname in the libnsl library, however in the
|
||||
# version that shipped with gRPC it doing it wrong [1], since it uses
|
||||
# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in
|
||||
# another dependent library. The upstream already contains correct macro [2],
|
||||
# but it is not included in gRPC (even upstream gRPC, not the one that is
|
||||
# shipped with clickhousee).
|
||||
#
|
||||
# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125
|
||||
# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146
|
||||
#
|
||||
# And because if you by some reason have libnsl [3] installed, clickhouse will
|
||||
# reject to start w/o it. While this is completelly different library.
|
||||
#
|
||||
# [3]: https://packages.debian.org/bullseye/libnsl2
|
||||
if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS")
|
||||
set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE)
|
||||
# Generated from contrib/c-ares/src/lib/Makefile.inc
|
||||
SET(SRCS
|
||||
"${LIBRARY_DIR}/src/lib/ares__addrinfo2hostent.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares__addrinfo_localhost.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares__close_sockets.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares__get_hostent.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares__parse_into_addrinfo.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares__readaddrinfo.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares__sortaddrinfo.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares__read_line.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares__timeval.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_android.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_cancel.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_data.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_destroy.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_expand_name.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_expand_string.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_fds.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_free_hostent.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_free_string.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_freeaddrinfo.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_getaddrinfo.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_getenv.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_gethostbyaddr.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_gethostbyname.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_getnameinfo.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_getsock.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_init.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_library_init.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_llist.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_mkquery.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_create_query.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_nowarn.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_options.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_a_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_aaaa_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_caa_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_mx_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_naptr_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_ns_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_ptr_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_soa_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_srv_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_txt_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_parse_uri_reply.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_platform.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_process.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_query.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_search.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_send.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_strcasecmp.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_strdup.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_strerror.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_strsplit.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_timeout.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_version.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_writev.c"
|
||||
"${LIBRARY_DIR}/src/lib/bitncmp.c"
|
||||
"${LIBRARY_DIR}/src/lib/inet_net_pton.c"
|
||||
"${LIBRARY_DIR}/src/lib/inet_ntop.c"
|
||||
"${LIBRARY_DIR}/src/lib/windows_port.c"
|
||||
)
|
||||
|
||||
if (USE_STATIC_LIBRARIES)
|
||||
add_library(_c-ares STATIC ${SRCS})
|
||||
target_compile_definitions(_c-ares PUBLIC CARES_STATICLIB)
|
||||
else()
|
||||
add_library(_c-ares SHARED ${SRCS})
|
||||
target_compile_definitions(_c-ares PUBLIC CARES_BUILDING_LIBRARY)
|
||||
endif()
|
||||
|
||||
# Force use of c-ares inet_net_pton instead of libresolv one
|
||||
set(HAVE_INET_NET_PTON OFF CACHE BOOL "" FORCE)
|
||||
target_compile_definitions(_c-ares PRIVATE HAVE_CONFIG_H=1)
|
||||
|
||||
add_subdirectory("../c-ares/" "../c-ares/")
|
||||
target_include_directories(_c-ares SYSTEM PUBLIC
|
||||
"${LIBRARY_DIR}/src/lib"
|
||||
"${LIBRARY_DIR}/include"
|
||||
)
|
||||
|
||||
add_library(ch_contrib::c-ares ALIAS c-ares)
|
||||
# Platform-specific include directories. The original build system does a lot of checks to eventually generate two header files with defines:
|
||||
# ares_build.h and ares_config.h. To update, run the original CMake build in c-ares for each platform and copy the headers into the
|
||||
# platform-specific folder.
|
||||
# For the platform-specific compile definitions, see c-ares top-level CMakeLists.txt.
|
||||
if (OS_LINUX)
|
||||
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/linux")
|
||||
target_compile_definitions(_c-ares PRIVATE -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L -D_XOPEN_SOURCE=600)
|
||||
elseif (OS_DARWIN)
|
||||
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/darwin")
|
||||
target_compile_definitions(_c-ares PRIVATE -D_DARWIN_C_SOURCE)
|
||||
elseif (OS_FREEBSD)
|
||||
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/freebsd")
|
||||
endif()
|
||||
|
||||
add_library(ch_contrib::c-ares ALIAS _c-ares)
|
||||
|
43
contrib/c-ares-cmake/darwin/ares_build.h
Normal file
43
contrib/c-ares-cmake/darwin/ares_build.h
Normal file
@ -0,0 +1,43 @@
|
||||
#ifndef __CARES_BUILD_H
|
||||
#define __CARES_BUILD_H
|
||||
|
||||
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
|
||||
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
|
||||
|
||||
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
|
||||
* files. We need to include some dependent headers that may be system specific
|
||||
* for C-Ares */
|
||||
#define CARES_HAVE_SYS_TYPES_H
|
||||
#define CARES_HAVE_SYS_SOCKET_H
|
||||
/* #undef CARES_HAVE_WINDOWS_H */
|
||||
/* #undef CARES_HAVE_WS2TCPIP_H */
|
||||
/* #undef CARES_HAVE_WINSOCK2_H */
|
||||
/* #undef CARES_HAVE_WINDOWS_H */
|
||||
#define CARES_HAVE_ARPA_NAMESER_H
|
||||
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
|
||||
|
||||
#ifdef CARES_HAVE_SYS_TYPES_H
|
||||
# include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_SYS_SOCKET_H
|
||||
# include <sys/socket.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WINSOCK2_H
|
||||
# include <winsock2.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WS2TCPIP_H
|
||||
# include <ws2tcpip.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WINDOWS_H
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
|
||||
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
|
||||
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
|
||||
|
||||
#endif /* __CARES_BUILD_H */
|
432
contrib/c-ares-cmake/darwin/ares_config.h
Normal file
432
contrib/c-ares-cmake/darwin/ares_config.h
Normal file
@ -0,0 +1,432 @@
|
||||
/* Generated from ares_config.h.cmake */
|
||||
|
||||
/* Define if building universal (internal helper macro) */
|
||||
#undef AC_APPLE_UNIVERSAL_BUILD
|
||||
|
||||
/* define this if ares is built for a big endian system */
|
||||
#undef ARES_BIG_ENDIAN
|
||||
|
||||
/* when building as static part of libcurl */
|
||||
#undef BUILDING_LIBCURL
|
||||
|
||||
/* Defined for build that exposes internal static functions for testing. */
|
||||
#undef CARES_EXPOSE_STATICS
|
||||
|
||||
/* Defined for build with symbol hiding. */
|
||||
#undef CARES_SYMBOL_HIDING
|
||||
|
||||
/* Definition to make a library symbol externally visible. */
|
||||
#undef CARES_SYMBOL_SCOPE_EXTERN
|
||||
|
||||
/* Use resolver library to configure cares */
|
||||
/* #undef CARES_USE_LIBRESOLV */
|
||||
|
||||
/* if a /etc/inet dir is being used */
|
||||
#undef ETC_INET
|
||||
|
||||
/* Define to the type of arg 2 for gethostname. */
|
||||
#define GETHOSTNAME_TYPE_ARG2 size_t
|
||||
|
||||
/* Define to the type qualifier of arg 1 for getnameinfo. */
|
||||
#define GETNAMEINFO_QUAL_ARG1
|
||||
|
||||
/* Define to the type of arg 1 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
|
||||
|
||||
/* Define to the type of arg 2 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG2 socklen_t
|
||||
|
||||
/* Define to the type of args 4 and 6 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG46 socklen_t
|
||||
|
||||
/* Define to the type of arg 7 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG7 int
|
||||
|
||||
/* Specifies the number of arguments to getservbyport_r */
|
||||
#define GETSERVBYPORT_R_ARGS
|
||||
|
||||
/* Specifies the number of arguments to getservbyname_r */
|
||||
#define GETSERVBYNAME_R_ARGS
|
||||
|
||||
/* Define to 1 if you have AF_INET6. */
|
||||
#define HAVE_AF_INET6
|
||||
|
||||
/* Define to 1 if you have the <arpa/inet.h> header file. */
|
||||
#define HAVE_ARPA_INET_H
|
||||
|
||||
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
|
||||
#define HAVE_ARPA_NAMESER_COMPAT_H
|
||||
|
||||
/* Define to 1 if you have the <arpa/nameser.h> header file. */
|
||||
#define HAVE_ARPA_NAMESER_H
|
||||
|
||||
/* Define to 1 if you have the <assert.h> header file. */
|
||||
#define HAVE_ASSERT_H
|
||||
|
||||
/* Define to 1 if you have the `bitncmp' function. */
|
||||
/* #undef HAVE_BITNCMP */
|
||||
|
||||
/* Define to 1 if bool is an available type. */
|
||||
#define HAVE_BOOL_T
|
||||
|
||||
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
|
||||
#define HAVE_CLOCK_GETTIME_MONOTONIC
|
||||
|
||||
/* Define to 1 if you have the closesocket function. */
|
||||
/* #undef HAVE_CLOSESOCKET */
|
||||
|
||||
/* Define to 1 if you have the CloseSocket camel case function. */
|
||||
/* #undef HAVE_CLOSESOCKET_CAMEL */
|
||||
|
||||
/* Define to 1 if you have the connect function. */
|
||||
#define HAVE_CONNECT
|
||||
|
||||
/* define if the compiler supports basic C++11 syntax */
|
||||
/* #undef HAVE_CXX11 */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#define HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <errno.h> header file. */
|
||||
#define HAVE_ERRNO_H
|
||||
|
||||
/* Define to 1 if you have the fcntl function. */
|
||||
#define HAVE_FCNTL
|
||||
|
||||
/* Define to 1 if you have the <fcntl.h> header file. */
|
||||
#define HAVE_FCNTL_H
|
||||
|
||||
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
|
||||
#define HAVE_FCNTL_O_NONBLOCK
|
||||
|
||||
/* Define to 1 if you have the freeaddrinfo function. */
|
||||
#define HAVE_FREEADDRINFO
|
||||
|
||||
/* Define to 1 if you have a working getaddrinfo function. */
|
||||
#define HAVE_GETADDRINFO
|
||||
|
||||
/* Define to 1 if the getaddrinfo function is threadsafe. */
|
||||
#define HAVE_GETADDRINFO_THREADSAFE
|
||||
|
||||
/* Define to 1 if you have the getenv function. */
|
||||
#define HAVE_GETENV
|
||||
|
||||
/* Define to 1 if you have the gethostbyaddr function. */
|
||||
#define HAVE_GETHOSTBYADDR
|
||||
|
||||
/* Define to 1 if you have the gethostbyname function. */
|
||||
#define HAVE_GETHOSTBYNAME
|
||||
|
||||
/* Define to 1 if you have the gethostname function. */
|
||||
#define HAVE_GETHOSTNAME
|
||||
|
||||
/* Define to 1 if you have the getnameinfo function. */
|
||||
#define HAVE_GETNAMEINFO
|
||||
|
||||
/* Define to 1 if you have the getservbyport_r function. */
|
||||
/* #undef HAVE_GETSERVBYPORT_R */
|
||||
|
||||
/* Define to 1 if you have the getservbyname_r function. */
|
||||
/* #undef HAVE_GETSERVBYNAME_R */
|
||||
|
||||
/* Define to 1 if you have the `gettimeofday' function. */
|
||||
#define HAVE_GETTIMEOFDAY
|
||||
|
||||
/* Define to 1 if you have the `if_indextoname' function. */
|
||||
#define HAVE_IF_INDEXTONAME
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
|
||||
/* #undef HAVE_INET_NET_PTON */
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
|
||||
#define HAVE_INET_NTOP
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
|
||||
#define HAVE_INET_PTON
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the ioctl function. */
|
||||
#define HAVE_IOCTL
|
||||
|
||||
/* Define to 1 if you have the ioctlsocket function. */
|
||||
/* #undef HAVE_IOCTLSOCKET */
|
||||
|
||||
/* Define to 1 if you have the IoctlSocket camel case function. */
|
||||
/* #undef HAVE_IOCTLSOCKET_CAMEL */
|
||||
|
||||
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
|
||||
*/
|
||||
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
|
||||
|
||||
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
|
||||
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
|
||||
|
||||
/* Define to 1 if you have a working ioctl FIONBIO function. */
|
||||
#define HAVE_IOCTL_FIONBIO
|
||||
|
||||
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
|
||||
#define HAVE_IOCTL_SIOCGIFADDR
|
||||
|
||||
/* Define to 1 if you have the `resolve' library (-lresolve). */
|
||||
/* #undef HAVE_LIBRESOLV */
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#define HAVE_LIMITS_H
|
||||
|
||||
/* if your compiler supports LL */
|
||||
#define HAVE_LL
|
||||
|
||||
/* Define to 1 if the compiler supports the 'long long' data type. */
|
||||
#define HAVE_LONGLONG
|
||||
|
||||
/* Define to 1 if you have the malloc.h header file. */
|
||||
/* #undef HAVE_MALLOC_H */
|
||||
|
||||
/* Define to 1 if you have the memory.h header file. */
|
||||
#define HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
|
||||
/* #undef HAVE_MSG_NOSIGNAL */
|
||||
|
||||
/* Define to 1 if you have the <netdb.h> header file. */
|
||||
#define HAVE_NETDB_H
|
||||
|
||||
/* Define to 1 if you have the <netinet/in.h> header file. */
|
||||
#define HAVE_NETINET_IN_H
|
||||
|
||||
/* Define to 1 if you have the <netinet/tcp.h> header file. */
|
||||
#define HAVE_NETINET_TCP_H
|
||||
|
||||
/* Define to 1 if you have the <net/if.h> header file. */
|
||||
#define HAVE_NET_IF_H
|
||||
|
||||
/* Define to 1 if you have PF_INET6. */
|
||||
#define HAVE_PF_INET6
|
||||
|
||||
/* Define to 1 if you have the recv function. */
|
||||
#define HAVE_RECV
|
||||
|
||||
/* Define to 1 if you have the recvfrom function. */
|
||||
#define HAVE_RECVFROM
|
||||
|
||||
/* Define to 1 if you have the send function. */
|
||||
#define HAVE_SEND
|
||||
|
||||
/* Define to 1 if you have the setsockopt function. */
|
||||
#define HAVE_SETSOCKOPT
|
||||
|
||||
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
|
||||
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
|
||||
|
||||
/* Define to 1 if you have the <signal.h> header file. */
|
||||
#define HAVE_SIGNAL_H
|
||||
|
||||
/* Define to 1 if sig_atomic_t is an available typedef. */
|
||||
#define HAVE_SIG_ATOMIC_T
|
||||
|
||||
/* Define to 1 if sig_atomic_t is already defined as volatile. */
|
||||
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
|
||||
|
||||
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
|
||||
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
|
||||
|
||||
/* Define to 1 if you have the socket function. */
|
||||
#define HAVE_SOCKET
|
||||
|
||||
/* Define to 1 if you have the <socket.h> header file. */
|
||||
/* #undef HAVE_SOCKET_H */
|
||||
|
||||
/* Define to 1 if you have the <stdbool.h> header file. */
|
||||
#define HAVE_STDBOOL_H
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the strcasecmp function. */
|
||||
#define HAVE_STRCASECMP
|
||||
|
||||
/* Define to 1 if you have the strcmpi function. */
|
||||
/* #undef HAVE_STRCMPI */
|
||||
|
||||
/* Define to 1 if you have the strdup function. */
|
||||
#define HAVE_STRDUP
|
||||
|
||||
/* Define to 1 if you have the stricmp function. */
|
||||
/* #undef HAVE_STRICMP */
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the strncasecmp function. */
|
||||
#define HAVE_STRNCASECMP
|
||||
|
||||
/* Define to 1 if you have the strncmpi function. */
|
||||
/* #undef HAVE_STRNCMPI */
|
||||
|
||||
/* Define to 1 if you have the strnicmp function. */
|
||||
/* #undef HAVE_STRNICMP */
|
||||
|
||||
/* Define to 1 if you have the <stropts.h> header file. */
|
||||
/* #undef HAVE_STROPTS_H */
|
||||
|
||||
/* Define to 1 if you have struct addrinfo. */
|
||||
#define HAVE_STRUCT_ADDRINFO
|
||||
|
||||
/* Define to 1 if you have struct in6_addr. */
|
||||
#define HAVE_STRUCT_IN6_ADDR
|
||||
|
||||
/* Define to 1 if you have struct sockaddr_in6. */
|
||||
#define HAVE_STRUCT_SOCKADDR_IN6
|
||||
|
||||
/* if struct sockaddr_storage is defined */
|
||||
#define HAVE_STRUCT_SOCKADDR_STORAGE
|
||||
|
||||
/* Define to 1 if you have the timeval struct. */
|
||||
#define HAVE_STRUCT_TIMEVAL
|
||||
|
||||
/* Define to 1 if you have the <sys/ioctl.h> header file. */
|
||||
#define HAVE_SYS_IOCTL_H
|
||||
|
||||
/* Define to 1 if you have the <sys/param.h> header file. */
|
||||
#define HAVE_SYS_PARAM_H
|
||||
|
||||
/* Define to 1 if you have the <sys/select.h> header file. */
|
||||
#define HAVE_SYS_SELECT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/socket.h> header file. */
|
||||
#define HAVE_SYS_SOCKET_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/time.h> header file. */
|
||||
#define HAVE_SYS_TIME_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <sys/uio.h> header file. */
|
||||
#define HAVE_SYS_UIO_H
|
||||
|
||||
/* Define to 1 if you have the <time.h> header file. */
|
||||
#define HAVE_TIME_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if you have the windows.h header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the winsock2.h header file. */
|
||||
/* #undef HAVE_WINSOCK2_H */
|
||||
|
||||
/* Define to 1 if you have the winsock.h header file. */
|
||||
/* #undef HAVE_WINSOCK_H */
|
||||
|
||||
/* Define to 1 if you have the writev function. */
|
||||
#define HAVE_WRITEV
|
||||
|
||||
/* Define to 1 if you have the ws2tcpip.h header file. */
|
||||
/* #undef HAVE_WS2TCPIP_H */
|
||||
|
||||
/* Define to 1 if you have the __system_property_get function */
|
||||
#define HAVE___SYSTEM_PROPERTY_GET
|
||||
|
||||
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
|
||||
/* #undef NEED_MALLOC_H */
|
||||
|
||||
/* Define to 1 if you need the memory.h header file even with stdlib.h */
|
||||
/* #undef NEED_MEMORY_H */
|
||||
|
||||
/* a suitable file/device to read random data from */
|
||||
#define CARES_RANDOM_FILE "/dev/urandom"
|
||||
|
||||
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
|
||||
#define RECVFROM_QUAL_ARG5
|
||||
|
||||
/* Define to the type of arg 1 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type pointed by arg 2 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG2 void *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG2_IS_VOID 0
|
||||
|
||||
/* Define to the type of arg 3 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG4 int
|
||||
|
||||
/* Define to the type pointed by arg 5 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG5 struct sockaddr *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG5_IS_VOID 0
|
||||
|
||||
/* Define to the type pointed by arg 6 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG6 socklen_t *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG6_IS_VOID 0
|
||||
|
||||
/* Define to the function return type for recvfrom. */
|
||||
#define RECVFROM_TYPE_RETV ssize_t
|
||||
|
||||
/* Define to the type of arg 1 for recv. */
|
||||
#define RECV_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type of arg 2 for recv. */
|
||||
#define RECV_TYPE_ARG2 void *
|
||||
|
||||
/* Define to the type of arg 3 for recv. */
|
||||
#define RECV_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for recv. */
|
||||
#define RECV_TYPE_ARG4 int
|
||||
|
||||
/* Define to the function return type for recv. */
|
||||
#define RECV_TYPE_RETV ssize_t
|
||||
|
||||
/* Define as the return type of signal handlers (`int' or `void'). */
|
||||
#define RETSIGTYPE
|
||||
|
||||
/* Define to the type qualifier of arg 2 for send. */
|
||||
#define SEND_QUAL_ARG2
|
||||
|
||||
/* Define to the type of arg 1 for send. */
|
||||
#define SEND_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type of arg 2 for send. */
|
||||
#define SEND_TYPE_ARG2 void *
|
||||
|
||||
/* Define to the type of arg 3 for send. */
|
||||
#define SEND_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for send. */
|
||||
#define SEND_TYPE_ARG4 int
|
||||
|
||||
/* Define to the function return type for send. */
|
||||
#define SEND_TYPE_RETV ssize_t
|
||||
|
||||
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
|
||||
#define TIME_WITH_SYS_TIME
|
||||
|
||||
/* Define to disable non-blocking sockets. */
|
||||
#undef USE_BLOCKING_SOCKETS
|
||||
|
||||
/* Define to avoid automatic inclusion of winsock.h */
|
||||
#undef WIN32_LEAN_AND_MEAN
|
||||
|
||||
/* Type to use in place of in_addr_t when system does not provide it. */
|
||||
#undef in_addr_t
|
||||
|
43
contrib/c-ares-cmake/freebsd/ares_build.h
Normal file
43
contrib/c-ares-cmake/freebsd/ares_build.h
Normal file
@ -0,0 +1,43 @@
|
||||
#ifndef __CARES_BUILD_H
|
||||
#define __CARES_BUILD_H
|
||||
|
||||
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
|
||||
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
|
||||
|
||||
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
|
||||
* files. We need to include some dependent headers that may be system specific
|
||||
* for C-Ares */
|
||||
#define CARES_HAVE_SYS_TYPES_H
|
||||
#define CARES_HAVE_SYS_SOCKET_H
|
||||
/* #undef CARES_HAVE_WINDOWS_H */
|
||||
/* #undef CARES_HAVE_WS2TCPIP_H */
|
||||
/* #undef CARES_HAVE_WINSOCK2_H */
|
||||
/* #undef CARES_HAVE_WINDOWS_H */
|
||||
#define CARES_HAVE_ARPA_NAMESER_H
|
||||
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
|
||||
|
||||
#ifdef CARES_HAVE_SYS_TYPES_H
|
||||
# include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_SYS_SOCKET_H
|
||||
# include <sys/socket.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WINSOCK2_H
|
||||
# include <winsock2.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WS2TCPIP_H
|
||||
# include <ws2tcpip.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WINDOWS_H
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
|
||||
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
|
||||
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
|
||||
|
||||
#endif /* __CARES_BUILD_H */
|
432
contrib/c-ares-cmake/freebsd/ares_config.h
Normal file
432
contrib/c-ares-cmake/freebsd/ares_config.h
Normal file
@ -0,0 +1,432 @@
|
||||
/* Generated from ares_config.h.cmake */
|
||||
|
||||
/* Define if building universal (internal helper macro) */
|
||||
#undef AC_APPLE_UNIVERSAL_BUILD
|
||||
|
||||
/* define this if ares is built for a big endian system */
|
||||
#undef ARES_BIG_ENDIAN
|
||||
|
||||
/* when building as static part of libcurl */
|
||||
#undef BUILDING_LIBCURL
|
||||
|
||||
/* Defined for build that exposes internal static functions for testing. */
|
||||
#undef CARES_EXPOSE_STATICS
|
||||
|
||||
/* Defined for build with symbol hiding. */
|
||||
#undef CARES_SYMBOL_HIDING
|
||||
|
||||
/* Definition to make a library symbol externally visible. */
|
||||
#undef CARES_SYMBOL_SCOPE_EXTERN
|
||||
|
||||
/* Use resolver library to configure cares */
|
||||
/* #undef CARES_USE_LIBRESOLV */
|
||||
|
||||
/* if a /etc/inet dir is being used */
|
||||
#undef ETC_INET
|
||||
|
||||
/* Define to the type of arg 2 for gethostname. */
|
||||
#define GETHOSTNAME_TYPE_ARG2 size_t
|
||||
|
||||
/* Define to the type qualifier of arg 1 for getnameinfo. */
|
||||
#define GETNAMEINFO_QUAL_ARG1
|
||||
|
||||
/* Define to the type of arg 1 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
|
||||
|
||||
/* Define to the type of arg 2 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG2 socklen_t
|
||||
|
||||
/* Define to the type of args 4 and 6 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG46 socklen_t
|
||||
|
||||
/* Define to the type of arg 7 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG7 int
|
||||
|
||||
/* Specifies the number of arguments to getservbyport_r */
|
||||
#define GETSERVBYPORT_R_ARGS 6
|
||||
|
||||
/* Specifies the number of arguments to getservbyname_r */
|
||||
#define GETSERVBYNAME_R_ARGS 6
|
||||
|
||||
/* Define to 1 if you have AF_INET6. */
|
||||
#define HAVE_AF_INET6
|
||||
|
||||
/* Define to 1 if you have the <arpa/inet.h> header file. */
|
||||
#define HAVE_ARPA_INET_H
|
||||
|
||||
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
|
||||
#define HAVE_ARPA_NAMESER_COMPAT_H
|
||||
|
||||
/* Define to 1 if you have the <arpa/nameser.h> header file. */
|
||||
#define HAVE_ARPA_NAMESER_H
|
||||
|
||||
/* Define to 1 if you have the <assert.h> header file. */
|
||||
#define HAVE_ASSERT_H
|
||||
|
||||
/* Define to 1 if you have the `bitncmp' function. */
|
||||
/* #undef HAVE_BITNCMP */
|
||||
|
||||
/* Define to 1 if bool is an available type. */
|
||||
#define HAVE_BOOL_T
|
||||
|
||||
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
|
||||
#define HAVE_CLOCK_GETTIME_MONOTONIC
|
||||
|
||||
/* Define to 1 if you have the closesocket function. */
|
||||
/* #undef HAVE_CLOSESOCKET */
|
||||
|
||||
/* Define to 1 if you have the CloseSocket camel case function. */
|
||||
/* #undef HAVE_CLOSESOCKET_CAMEL */
|
||||
|
||||
/* Define to 1 if you have the connect function. */
|
||||
#define HAVE_CONNECT
|
||||
|
||||
/* define if the compiler supports basic C++11 syntax */
|
||||
/* #undef HAVE_CXX11 */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#define HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <errno.h> header file. */
|
||||
#define HAVE_ERRNO_H
|
||||
|
||||
/* Define to 1 if you have the fcntl function. */
|
||||
#define HAVE_FCNTL
|
||||
|
||||
/* Define to 1 if you have the <fcntl.h> header file. */
|
||||
#define HAVE_FCNTL_H
|
||||
|
||||
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
|
||||
#define HAVE_FCNTL_O_NONBLOCK
|
||||
|
||||
/* Define to 1 if you have the freeaddrinfo function. */
|
||||
#define HAVE_FREEADDRINFO
|
||||
|
||||
/* Define to 1 if you have a working getaddrinfo function. */
|
||||
#define HAVE_GETADDRINFO
|
||||
|
||||
/* Define to 1 if the getaddrinfo function is threadsafe. */
|
||||
#define HAVE_GETADDRINFO_THREADSAFE
|
||||
|
||||
/* Define to 1 if you have the getenv function. */
|
||||
#define HAVE_GETENV
|
||||
|
||||
/* Define to 1 if you have the gethostbyaddr function. */
|
||||
#define HAVE_GETHOSTBYADDR
|
||||
|
||||
/* Define to 1 if you have the gethostbyname function. */
|
||||
#define HAVE_GETHOSTBYNAME
|
||||
|
||||
/* Define to 1 if you have the gethostname function. */
|
||||
#define HAVE_GETHOSTNAME
|
||||
|
||||
/* Define to 1 if you have the getnameinfo function. */
|
||||
#define HAVE_GETNAMEINFO
|
||||
|
||||
/* Define to 1 if you have the getservbyport_r function. */
|
||||
#define HAVE_GETSERVBYPORT_R
|
||||
|
||||
/* Define to 1 if you have the getservbyname_r function. */
|
||||
#define HAVE_GETSERVBYNAME_R
|
||||
|
||||
/* Define to 1 if you have the `gettimeofday' function. */
|
||||
#define HAVE_GETTIMEOFDAY
|
||||
|
||||
/* Define to 1 if you have the `if_indextoname' function. */
|
||||
#define HAVE_IF_INDEXTONAME
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
|
||||
/* #undef HAVE_INET_NET_PTON */
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
|
||||
#define HAVE_INET_NTOP
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
|
||||
#define HAVE_INET_PTON
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the ioctl function. */
|
||||
#define HAVE_IOCTL
|
||||
|
||||
/* Define to 1 if you have the ioctlsocket function. */
|
||||
/* #undef HAVE_IOCTLSOCKET */
|
||||
|
||||
/* Define to 1 if you have the IoctlSocket camel case function. */
|
||||
/* #undef HAVE_IOCTLSOCKET_CAMEL */
|
||||
|
||||
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
|
||||
*/
|
||||
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
|
||||
|
||||
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
|
||||
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
|
||||
|
||||
/* Define to 1 if you have a working ioctl FIONBIO function. */
|
||||
#define HAVE_IOCTL_FIONBIO
|
||||
|
||||
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
|
||||
#define HAVE_IOCTL_SIOCGIFADDR
|
||||
|
||||
/* Define to 1 if you have the `resolve' library (-lresolve). */
|
||||
/* #undef HAVE_LIBRESOLV */
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#define HAVE_LIMITS_H
|
||||
|
||||
/* if your compiler supports LL */
|
||||
#define HAVE_LL
|
||||
|
||||
/* Define to 1 if the compiler supports the 'long long' data type. */
|
||||
#define HAVE_LONGLONG
|
||||
|
||||
/* Define to 1 if you have the malloc.h header file. */
|
||||
/* #undef HAVE_MALLOC_H */
|
||||
|
||||
/* Define to 1 if you have the memory.h header file. */
|
||||
#define HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
|
||||
#define HAVE_MSG_NOSIGNAL
|
||||
|
||||
/* Define to 1 if you have the <netdb.h> header file. */
|
||||
#define HAVE_NETDB_H
|
||||
|
||||
/* Define to 1 if you have the <netinet/in.h> header file. */
|
||||
#define HAVE_NETINET_IN_H
|
||||
|
||||
/* Define to 1 if you have the <netinet/tcp.h> header file. */
|
||||
#define HAVE_NETINET_TCP_H
|
||||
|
||||
/* Define to 1 if you have the <net/if.h> header file. */
|
||||
#define HAVE_NET_IF_H
|
||||
|
||||
/* Define to 1 if you have PF_INET6. */
|
||||
#define HAVE_PF_INET6
|
||||
|
||||
/* Define to 1 if you have the recv function. */
|
||||
#define HAVE_RECV
|
||||
|
||||
/* Define to 1 if you have the recvfrom function. */
|
||||
#define HAVE_RECVFROM
|
||||
|
||||
/* Define to 1 if you have the send function. */
|
||||
#define HAVE_SEND
|
||||
|
||||
/* Define to 1 if you have the setsockopt function. */
|
||||
#define HAVE_SETSOCKOPT
|
||||
|
||||
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
|
||||
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
|
||||
|
||||
/* Define to 1 if you have the <signal.h> header file. */
|
||||
#define HAVE_SIGNAL_H
|
||||
|
||||
/* Define to 1 if sig_atomic_t is an available typedef. */
|
||||
#define HAVE_SIG_ATOMIC_T
|
||||
|
||||
/* Define to 1 if sig_atomic_t is already defined as volatile. */
|
||||
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
|
||||
|
||||
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
|
||||
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
|
||||
|
||||
/* Define to 1 if you have the socket function. */
|
||||
#define HAVE_SOCKET
|
||||
|
||||
/* Define to 1 if you have the <socket.h> header file. */
|
||||
/* #undef HAVE_SOCKET_H */
|
||||
|
||||
/* Define to 1 if you have the <stdbool.h> header file. */
|
||||
#define HAVE_STDBOOL_H
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the strcasecmp function. */
|
||||
#define HAVE_STRCASECMP
|
||||
|
||||
/* Define to 1 if you have the strcmpi function. */
|
||||
/* #undef HAVE_STRCMPI */
|
||||
|
||||
/* Define to 1 if you have the strdup function. */
|
||||
#define HAVE_STRDUP
|
||||
|
||||
/* Define to 1 if you have the stricmp function. */
|
||||
/* #undef HAVE_STRICMP */
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the strncasecmp function. */
|
||||
#define HAVE_STRNCASECMP
|
||||
|
||||
/* Define to 1 if you have the strncmpi function. */
|
||||
/* #undef HAVE_STRNCMPI */
|
||||
|
||||
/* Define to 1 if you have the strnicmp function. */
|
||||
/* #undef HAVE_STRNICMP */
|
||||
|
||||
/* Define to 1 if you have the <stropts.h> header file. */
|
||||
/* #undef HAVE_STROPTS_H */
|
||||
|
||||
/* Define to 1 if you have struct addrinfo. */
|
||||
#define HAVE_STRUCT_ADDRINFO
|
||||
|
||||
/* Define to 1 if you have struct in6_addr. */
|
||||
#define HAVE_STRUCT_IN6_ADDR
|
||||
|
||||
/* Define to 1 if you have struct sockaddr_in6. */
|
||||
#define HAVE_STRUCT_SOCKADDR_IN6
|
||||
|
||||
/* if struct sockaddr_storage is defined */
|
||||
#define HAVE_STRUCT_SOCKADDR_STORAGE
|
||||
|
||||
/* Define to 1 if you have the timeval struct. */
|
||||
#define HAVE_STRUCT_TIMEVAL
|
||||
|
||||
/* Define to 1 if you have the <sys/ioctl.h> header file. */
|
||||
#define HAVE_SYS_IOCTL_H
|
||||
|
||||
/* Define to 1 if you have the <sys/param.h> header file. */
|
||||
#define HAVE_SYS_PARAM_H
|
||||
|
||||
/* Define to 1 if you have the <sys/select.h> header file. */
|
||||
#define HAVE_SYS_SELECT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/socket.h> header file. */
|
||||
#define HAVE_SYS_SOCKET_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/time.h> header file. */
|
||||
#define HAVE_SYS_TIME_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <sys/uio.h> header file. */
|
||||
#define HAVE_SYS_UIO_H
|
||||
|
||||
/* Define to 1 if you have the <time.h> header file. */
|
||||
#define HAVE_TIME_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if you have the windows.h header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the winsock2.h header file. */
|
||||
/* #undef HAVE_WINSOCK2_H */
|
||||
|
||||
/* Define to 1 if you have the winsock.h header file. */
|
||||
/* #undef HAVE_WINSOCK_H */
|
||||
|
||||
/* Define to 1 if you have the writev function. */
|
||||
#define HAVE_WRITEV
|
||||
|
||||
/* Define to 1 if you have the ws2tcpip.h header file. */
|
||||
/* #undef HAVE_WS2TCPIP_H */
|
||||
|
||||
/* Define to 1 if you have the __system_property_get function */
|
||||
#define HAVE___SYSTEM_PROPERTY_GET
|
||||
|
||||
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
|
||||
/* #undef NEED_MALLOC_H */
|
||||
|
||||
/* Define to 1 if you need the memory.h header file even with stdlib.h */
|
||||
/* #undef NEED_MEMORY_H */
|
||||
|
||||
/* a suitable file/device to read random data from */
|
||||
#define CARES_RANDOM_FILE "/dev/urandom"
|
||||
|
||||
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
|
||||
#define RECVFROM_QUAL_ARG5
|
||||
|
||||
/* Define to the type of arg 1 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type pointed by arg 2 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG2 void *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG2_IS_VOID 0
|
||||
|
||||
/* Define to the type of arg 3 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG4 int
|
||||
|
||||
/* Define to the type pointed by arg 5 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG5 struct sockaddr *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG5_IS_VOID 0
|
||||
|
||||
/* Define to the type pointed by arg 6 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG6 socklen_t *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG6_IS_VOID 0
|
||||
|
||||
/* Define to the function return type for recvfrom. */
|
||||
#define RECVFROM_TYPE_RETV ssize_t
|
||||
|
||||
/* Define to the type of arg 1 for recv. */
|
||||
#define RECV_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type of arg 2 for recv. */
|
||||
#define RECV_TYPE_ARG2 void *
|
||||
|
||||
/* Define to the type of arg 3 for recv. */
|
||||
#define RECV_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for recv. */
|
||||
#define RECV_TYPE_ARG4 int
|
||||
|
||||
/* Define to the function return type for recv. */
|
||||
#define RECV_TYPE_RETV ssize_t
|
||||
|
||||
/* Define as the return type of signal handlers (`int' or `void'). */
|
||||
#define RETSIGTYPE
|
||||
|
||||
/* Define to the type qualifier of arg 2 for send. */
|
||||
#define SEND_QUAL_ARG2
|
||||
|
||||
/* Define to the type of arg 1 for send. */
|
||||
#define SEND_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type of arg 2 for send. */
|
||||
#define SEND_TYPE_ARG2 void *
|
||||
|
||||
/* Define to the type of arg 3 for send. */
|
||||
#define SEND_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for send. */
|
||||
#define SEND_TYPE_ARG4 int
|
||||
|
||||
/* Define to the function return type for send. */
|
||||
#define SEND_TYPE_RETV ssize_t
|
||||
|
||||
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
|
||||
#define TIME_WITH_SYS_TIME
|
||||
|
||||
/* Define to disable non-blocking sockets. */
|
||||
#undef USE_BLOCKING_SOCKETS
|
||||
|
||||
/* Define to avoid automatic inclusion of winsock.h */
|
||||
#undef WIN32_LEAN_AND_MEAN
|
||||
|
||||
/* Type to use in place of in_addr_t when system does not provide it. */
|
||||
#undef in_addr_t
|
||||
|
43
contrib/c-ares-cmake/linux/ares_build.h
Normal file
43
contrib/c-ares-cmake/linux/ares_build.h
Normal file
@ -0,0 +1,43 @@
|
||||
#ifndef __CARES_BUILD_H
|
||||
#define __CARES_BUILD_H
|
||||
|
||||
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
|
||||
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
|
||||
|
||||
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
|
||||
* files. We need to include some dependent headers that may be system specific
|
||||
* for C-Ares */
|
||||
#define CARES_HAVE_SYS_TYPES_H
|
||||
#define CARES_HAVE_SYS_SOCKET_H
|
||||
/* #undef CARES_HAVE_WINDOWS_H */
|
||||
/* #undef CARES_HAVE_WS2TCPIP_H */
|
||||
/* #undef CARES_HAVE_WINSOCK2_H */
|
||||
/* #undef CARES_HAVE_WINDOWS_H */
|
||||
#define CARES_HAVE_ARPA_NAMESER_H
|
||||
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
|
||||
|
||||
#ifdef CARES_HAVE_SYS_TYPES_H
|
||||
# include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_SYS_SOCKET_H
|
||||
# include <sys/socket.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WINSOCK2_H
|
||||
# include <winsock2.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WS2TCPIP_H
|
||||
# include <ws2tcpip.h>
|
||||
#endif
|
||||
|
||||
#ifdef CARES_HAVE_WINDOWS_H
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
|
||||
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
|
||||
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
|
||||
|
||||
#endif /* __CARES_BUILD_H */
|
432
contrib/c-ares-cmake/linux/ares_config.h
Normal file
432
contrib/c-ares-cmake/linux/ares_config.h
Normal file
@ -0,0 +1,432 @@
|
||||
/* Generated from ares_config.h.cmake */
|
||||
|
||||
/* Define if building universal (internal helper macro) */
|
||||
#undef AC_APPLE_UNIVERSAL_BUILD
|
||||
|
||||
/* define this if ares is built for a big endian system */
|
||||
#undef ARES_BIG_ENDIAN
|
||||
|
||||
/* when building as static part of libcurl */
|
||||
#undef BUILDING_LIBCURL
|
||||
|
||||
/* Defined for build that exposes internal static functions for testing. */
|
||||
#undef CARES_EXPOSE_STATICS
|
||||
|
||||
/* Defined for build with symbol hiding. */
|
||||
#undef CARES_SYMBOL_HIDING
|
||||
|
||||
/* Definition to make a library symbol externally visible. */
|
||||
#undef CARES_SYMBOL_SCOPE_EXTERN
|
||||
|
||||
/* Use resolver library to configure cares */
|
||||
/* #undef CARES_USE_LIBRESOLV */
|
||||
|
||||
/* if a /etc/inet dir is being used */
|
||||
#undef ETC_INET
|
||||
|
||||
/* Define to the type of arg 2 for gethostname. */
|
||||
#define GETHOSTNAME_TYPE_ARG2 size_t
|
||||
|
||||
/* Define to the type qualifier of arg 1 for getnameinfo. */
|
||||
#define GETNAMEINFO_QUAL_ARG1
|
||||
|
||||
/* Define to the type of arg 1 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
|
||||
|
||||
/* Define to the type of arg 2 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG2 socklen_t
|
||||
|
||||
/* Define to the type of args 4 and 6 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG46 socklen_t
|
||||
|
||||
/* Define to the type of arg 7 for getnameinfo. */
|
||||
#define GETNAMEINFO_TYPE_ARG7 int
|
||||
|
||||
/* Specifies the number of arguments to getservbyport_r */
|
||||
#define GETSERVBYPORT_R_ARGS 6
|
||||
|
||||
/* Specifies the number of arguments to getservbyname_r */
|
||||
#define GETSERVBYNAME_R_ARGS 6
|
||||
|
||||
/* Define to 1 if you have AF_INET6. */
|
||||
#define HAVE_AF_INET6
|
||||
|
||||
/* Define to 1 if you have the <arpa/inet.h> header file. */
|
||||
#define HAVE_ARPA_INET_H
|
||||
|
||||
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
|
||||
#define HAVE_ARPA_NAMESER_COMPAT_H
|
||||
|
||||
/* Define to 1 if you have the <arpa/nameser.h> header file. */
|
||||
#define HAVE_ARPA_NAMESER_H
|
||||
|
||||
/* Define to 1 if you have the <assert.h> header file. */
|
||||
#define HAVE_ASSERT_H
|
||||
|
||||
/* Define to 1 if you have the `bitncmp' function. */
|
||||
/* #undef HAVE_BITNCMP */
|
||||
|
||||
/* Define to 1 if bool is an available type. */
|
||||
#define HAVE_BOOL_T
|
||||
|
||||
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
|
||||
#define HAVE_CLOCK_GETTIME_MONOTONIC
|
||||
|
||||
/* Define to 1 if you have the closesocket function. */
|
||||
/* #undef HAVE_CLOSESOCKET */
|
||||
|
||||
/* Define to 1 if you have the CloseSocket camel case function. */
|
||||
/* #undef HAVE_CLOSESOCKET_CAMEL */
|
||||
|
||||
/* Define to 1 if you have the connect function. */
|
||||
#define HAVE_CONNECT
|
||||
|
||||
/* define if the compiler supports basic C++11 syntax */
|
||||
/* #undef HAVE_CXX11 */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#define HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <errno.h> header file. */
|
||||
#define HAVE_ERRNO_H
|
||||
|
||||
/* Define to 1 if you have the fcntl function. */
|
||||
#define HAVE_FCNTL
|
||||
|
||||
/* Define to 1 if you have the <fcntl.h> header file. */
|
||||
#define HAVE_FCNTL_H
|
||||
|
||||
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
|
||||
#define HAVE_FCNTL_O_NONBLOCK
|
||||
|
||||
/* Define to 1 if you have the freeaddrinfo function. */
|
||||
#define HAVE_FREEADDRINFO
|
||||
|
||||
/* Define to 1 if you have a working getaddrinfo function. */
|
||||
#define HAVE_GETADDRINFO
|
||||
|
||||
/* Define to 1 if the getaddrinfo function is threadsafe. */
|
||||
/* #undef HAVE_GETADDRINFO_THREADSAFE */
|
||||
|
||||
/* Define to 1 if you have the getenv function. */
|
||||
#define HAVE_GETENV
|
||||
|
||||
/* Define to 1 if you have the gethostbyaddr function. */
|
||||
#define HAVE_GETHOSTBYADDR
|
||||
|
||||
/* Define to 1 if you have the gethostbyname function. */
|
||||
#define HAVE_GETHOSTBYNAME
|
||||
|
||||
/* Define to 1 if you have the gethostname function. */
|
||||
#define HAVE_GETHOSTNAME
|
||||
|
||||
/* Define to 1 if you have the getnameinfo function. */
|
||||
#define HAVE_GETNAMEINFO
|
||||
|
||||
/* Define to 1 if you have the getservbyport_r function. */
|
||||
#define HAVE_GETSERVBYPORT_R
|
||||
|
||||
/* Define to 1 if you have the getservbyname_r function. */
|
||||
#define HAVE_GETSERVBYNAME_R
|
||||
|
||||
/* Define to 1 if you have the `gettimeofday' function. */
|
||||
#define HAVE_GETTIMEOFDAY
|
||||
|
||||
/* Define to 1 if you have the `if_indextoname' function. */
|
||||
#define HAVE_IF_INDEXTONAME
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
|
||||
/* #undef HAVE_INET_NET_PTON */
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
|
||||
#define HAVE_INET_NTOP
|
||||
|
||||
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
|
||||
#define HAVE_INET_PTON
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the ioctl function. */
|
||||
#define HAVE_IOCTL
|
||||
|
||||
/* Define to 1 if you have the ioctlsocket function. */
|
||||
/* #undef HAVE_IOCTLSOCKET */
|
||||
|
||||
/* Define to 1 if you have the IoctlSocket camel case function. */
|
||||
/* #undef HAVE_IOCTLSOCKET_CAMEL */
|
||||
|
||||
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
|
||||
*/
|
||||
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
|
||||
|
||||
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
|
||||
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
|
||||
|
||||
/* Define to 1 if you have a working ioctl FIONBIO function. */
|
||||
#define HAVE_IOCTL_FIONBIO
|
||||
|
||||
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
|
||||
#define HAVE_IOCTL_SIOCGIFADDR
|
||||
|
||||
/* Define to 1 if you have the `resolve' library (-lresolve). */
|
||||
/* #undef HAVE_LIBRESOLV */
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#define HAVE_LIMITS_H
|
||||
|
||||
/* if your compiler supports LL */
|
||||
#define HAVE_LL
|
||||
|
||||
/* Define to 1 if the compiler supports the 'long long' data type. */
|
||||
#define HAVE_LONGLONG
|
||||
|
||||
/* Define to 1 if you have the malloc.h header file. */
|
||||
#define HAVE_MALLOC_H
|
||||
|
||||
/* Define to 1 if you have the memory.h header file. */
|
||||
#define HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
|
||||
#define HAVE_MSG_NOSIGNAL
|
||||
|
||||
/* Define to 1 if you have the <netdb.h> header file. */
|
||||
#define HAVE_NETDB_H
|
||||
|
||||
/* Define to 1 if you have the <netinet/in.h> header file. */
|
||||
#define HAVE_NETINET_IN_H
|
||||
|
||||
/* Define to 1 if you have the <netinet/tcp.h> header file. */
|
||||
#define HAVE_NETINET_TCP_H
|
||||
|
||||
/* Define to 1 if you have the <net/if.h> header file. */
|
||||
#define HAVE_NET_IF_H
|
||||
|
||||
/* Define to 1 if you have PF_INET6. */
|
||||
#define HAVE_PF_INET6
|
||||
|
||||
/* Define to 1 if you have the recv function. */
|
||||
#define HAVE_RECV
|
||||
|
||||
/* Define to 1 if you have the recvfrom function. */
|
||||
#define HAVE_RECVFROM
|
||||
|
||||
/* Define to 1 if you have the send function. */
|
||||
#define HAVE_SEND
|
||||
|
||||
/* Define to 1 if you have the setsockopt function. */
|
||||
#define HAVE_SETSOCKOPT
|
||||
|
||||
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
|
||||
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
|
||||
|
||||
/* Define to 1 if you have the <signal.h> header file. */
|
||||
#define HAVE_SIGNAL_H
|
||||
|
||||
/* Define to 1 if sig_atomic_t is an available typedef. */
|
||||
#define HAVE_SIG_ATOMIC_T
|
||||
|
||||
/* Define to 1 if sig_atomic_t is already defined as volatile. */
|
||||
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
|
||||
|
||||
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
|
||||
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
|
||||
|
||||
/* Define to 1 if you have the socket function. */
|
||||
#define HAVE_SOCKET
|
||||
|
||||
/* Define to 1 if you have the <socket.h> header file. */
|
||||
/* #undef HAVE_SOCKET_H */
|
||||
|
||||
/* Define to 1 if you have the <stdbool.h> header file. */
|
||||
#define HAVE_STDBOOL_H
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the strcasecmp function. */
|
||||
#define HAVE_STRCASECMP
|
||||
|
||||
/* Define to 1 if you have the strcmpi function. */
|
||||
/* #undef HAVE_STRCMPI */
|
||||
|
||||
/* Define to 1 if you have the strdup function. */
|
||||
#define HAVE_STRDUP
|
||||
|
||||
/* Define to 1 if you have the stricmp function. */
|
||||
/* #undef HAVE_STRICMP */
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the strncasecmp function. */
|
||||
#define HAVE_STRNCASECMP
|
||||
|
||||
/* Define to 1 if you have the strncmpi function. */
|
||||
/* #undef HAVE_STRNCMPI */
|
||||
|
||||
/* Define to 1 if you have the strnicmp function. */
|
||||
/* #undef HAVE_STRNICMP */
|
||||
|
||||
/* Define to 1 if you have the <stropts.h> header file. */
|
||||
#define HAVE_STROPTS_H
|
||||
|
||||
/* Define to 1 if you have struct addrinfo. */
|
||||
#define HAVE_STRUCT_ADDRINFO
|
||||
|
||||
/* Define to 1 if you have struct in6_addr. */
|
||||
#define HAVE_STRUCT_IN6_ADDR
|
||||
|
||||
/* Define to 1 if you have struct sockaddr_in6. */
|
||||
#define HAVE_STRUCT_SOCKADDR_IN6
|
||||
|
||||
/* if struct sockaddr_storage is defined */
|
||||
#define HAVE_STRUCT_SOCKADDR_STORAGE
|
||||
|
||||
/* Define to 1 if you have the timeval struct. */
|
||||
#define HAVE_STRUCT_TIMEVAL
|
||||
|
||||
/* Define to 1 if you have the <sys/ioctl.h> header file. */
|
||||
#define HAVE_SYS_IOCTL_H
|
||||
|
||||
/* Define to 1 if you have the <sys/param.h> header file. */
|
||||
#define HAVE_SYS_PARAM_H
|
||||
|
||||
/* Define to 1 if you have the <sys/select.h> header file. */
|
||||
#define HAVE_SYS_SELECT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/socket.h> header file. */
|
||||
#define HAVE_SYS_SOCKET_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/time.h> header file. */
|
||||
#define HAVE_SYS_TIME_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <sys/uio.h> header file. */
|
||||
#define HAVE_SYS_UIO_H
|
||||
|
||||
/* Define to 1 if you have the <time.h> header file. */
|
||||
#define HAVE_TIME_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if you have the windows.h header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the winsock2.h header file. */
|
||||
/* #undef HAVE_WINSOCK2_H */
|
||||
|
||||
/* Define to 1 if you have the winsock.h header file. */
|
||||
/* #undef HAVE_WINSOCK_H */
|
||||
|
||||
/* Define to 1 if you have the writev function. */
|
||||
#define HAVE_WRITEV
|
||||
|
||||
/* Define to 1 if you have the ws2tcpip.h header file. */
|
||||
/* #undef HAVE_WS2TCPIP_H */
|
||||
|
||||
/* Define to 1 if you have the __system_property_get function */
|
||||
#define HAVE___SYSTEM_PROPERTY_GET
|
||||
|
||||
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
|
||||
/* #undef NEED_MALLOC_H */
|
||||
|
||||
/* Define to 1 if you need the memory.h header file even with stdlib.h */
|
||||
/* #undef NEED_MEMORY_H */
|
||||
|
||||
/* a suitable file/device to read random data from */
|
||||
#define CARES_RANDOM_FILE "/dev/urandom"
|
||||
|
||||
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
|
||||
#define RECVFROM_QUAL_ARG5
|
||||
|
||||
/* Define to the type of arg 1 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type pointed by arg 2 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG2 void *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG2_IS_VOID 0
|
||||
|
||||
/* Define to the type of arg 3 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG4 int
|
||||
|
||||
/* Define to the type pointed by arg 5 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG5 struct sockaddr *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG5_IS_VOID 0
|
||||
|
||||
/* Define to the type pointed by arg 6 for recvfrom. */
|
||||
#define RECVFROM_TYPE_ARG6 socklen_t *
|
||||
|
||||
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
|
||||
#define RECVFROM_TYPE_ARG6_IS_VOID 0
|
||||
|
||||
/* Define to the function return type for recvfrom. */
|
||||
#define RECVFROM_TYPE_RETV ssize_t
|
||||
|
||||
/* Define to the type of arg 1 for recv. */
|
||||
#define RECV_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type of arg 2 for recv. */
|
||||
#define RECV_TYPE_ARG2 void *
|
||||
|
||||
/* Define to the type of arg 3 for recv. */
|
||||
#define RECV_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for recv. */
|
||||
#define RECV_TYPE_ARG4 int
|
||||
|
||||
/* Define to the function return type for recv. */
|
||||
#define RECV_TYPE_RETV ssize_t
|
||||
|
||||
/* Define as the return type of signal handlers (`int' or `void'). */
|
||||
#define RETSIGTYPE
|
||||
|
||||
/* Define to the type qualifier of arg 2 for send. */
|
||||
#define SEND_QUAL_ARG2
|
||||
|
||||
/* Define to the type of arg 1 for send. */
|
||||
#define SEND_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type of arg 2 for send. */
|
||||
#define SEND_TYPE_ARG2 void *
|
||||
|
||||
/* Define to the type of arg 3 for send. */
|
||||
#define SEND_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the type of arg 4 for send. */
|
||||
#define SEND_TYPE_ARG4 int
|
||||
|
||||
/* Define to the function return type for send. */
|
||||
#define SEND_TYPE_RETV ssize_t
|
||||
|
||||
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
|
||||
#define TIME_WITH_SYS_TIME
|
||||
|
||||
/* Define to disable non-blocking sockets. */
|
||||
#undef USE_BLOCKING_SOCKETS
|
||||
|
||||
/* Define to avoid automatic inclusion of winsock.h */
|
||||
#undef WIN32_LEAN_AND_MEAN
|
||||
|
||||
/* Type to use in place of in_addr_t when system does not provide it. */
|
||||
#undef in_addr_t
|
||||
|
@ -415,7 +415,7 @@
|
||||
/*
|
||||
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
|
||||
*/
|
||||
#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
|
||||
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
|
||||
|
||||
/* Performs additional safety checks when defined. */
|
||||
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
|
||||
|
@ -440,7 +440,9 @@
|
||||
#define HAVE_STRERROR 1
|
||||
|
||||
/* Define to 1 if you have the `strerror_r' function. */
|
||||
#ifndef USE_MUSL
|
||||
#define HAVE_STRERROR_R 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H 1
|
||||
|
2
contrib/libcpuid
vendored
2
contrib/libcpuid
vendored
@ -1 +1 @@
|
||||
Subproject commit 8db3b8d2d32d22437f063ce692a1b9bb15e42d18
|
||||
Subproject commit 503083acb77edf9fbce22a05826307dff2ce96e6
|
@ -63,6 +63,13 @@ target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
|
||||
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include")
|
||||
target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs")
|
||||
|
||||
# NOTE: this is a dirty hack to avoid and instead pg_config.h should be shipped
|
||||
# for different OS'es like for jemalloc, not one generic for all OS'es like
|
||||
# now.
|
||||
if (OS_DARWIN OR OS_FREEBSD OR USE_MUSL)
|
||||
target_compile_definitions(_libpq PRIVATE -DSTRERROR_R_INT=1)
|
||||
endif()
|
||||
|
||||
target_link_libraries (_libpq PRIVATE OpenSSL::SSL)
|
||||
|
||||
add_library(ch_contrib::libpq ALIAS _libpq)
|
||||
|
2
contrib/librdkafka
vendored
2
contrib/librdkafka
vendored
@ -1 +1 @@
|
||||
Subproject commit ff32b4e9eeafd0b276f010ee969179e4e9e6d0b2
|
||||
Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f
|
2
contrib/llvm
vendored
2
contrib/llvm
vendored
@ -1 +1 @@
|
||||
Subproject commit 20607e61728e97c969e536644c3c0c1bb1a50672
|
||||
Subproject commit 0db5bf5bd2452cd8f1283a1fcdc04845af705bfc
|
2
contrib/sentry-native
vendored
2
contrib/sentry-native
vendored
@ -1 +1 @@
|
||||
Subproject commit f431047ac8da13179c488018dddf1c0d0771a997
|
||||
Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b
|
2
contrib/vectorscan
vendored
2
contrib/vectorscan
vendored
@ -1 +1 @@
|
||||
Subproject commit 73695e419c27af7fe2a099c7aa57931cc02aea5d
|
||||
Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2
|
@ -304,7 +304,7 @@ target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src")
|
||||
# Please regenerate these files if you update vectorscan.
|
||||
|
||||
if (ARCH_AMD64)
|
||||
target_include_directories (_vectorscan PRIVATE x86_64)
|
||||
target_include_directories (_vectorscan PRIVATE amd64)
|
||||
endif ()
|
||||
|
||||
if (ARCH_AARCH64)
|
||||
|
@ -67,24 +67,5 @@ ENV GOCACHE=/workdir/
|
||||
RUN mkdir /workdir && chmod 777 /workdir
|
||||
WORKDIR /workdir
|
||||
|
||||
# NOTE: thread sanitizer is broken in clang-14, we have to build it with clang-15
|
||||
# https://github.com/ClickHouse/ClickHouse/pull/39450
|
||||
# https://github.com/google/sanitizers/issues/1540
|
||||
# https://github.com/google/sanitizers/issues/1552
|
||||
|
||||
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
|
||||
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-15 main" >> \
|
||||
/etc/apt/sources.list.d/clang.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install \
|
||||
clang-15 \
|
||||
llvm-15 \
|
||||
clang-tidy-15 \
|
||||
--yes --no-install-recommends \
|
||||
&& apt-get clean
|
||||
|
||||
# for external_symbolizer_path
|
||||
RUN ln -s /usr/bin/llvm-symbolizer-15 /usr/bin/llvm-symbolizer
|
||||
|
||||
COPY build.sh /
|
||||
CMD ["bash", "-c", "/build.sh 2>&1"]
|
||||
|
@ -339,17 +339,16 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--compiler",
|
||||
choices=(
|
||||
"clang-15", # For TSAN builds, see #39450
|
||||
"clang-14",
|
||||
"clang-14-darwin",
|
||||
"clang-14-darwin-aarch64",
|
||||
"clang-14-aarch64",
|
||||
"clang-14-ppc64le",
|
||||
"clang-14-amd64sse2",
|
||||
"clang-14-freebsd",
|
||||
"clang-15",
|
||||
"clang-15-darwin",
|
||||
"clang-15-darwin-aarch64",
|
||||
"clang-15-aarch64",
|
||||
"clang-15-ppc64le",
|
||||
"clang-15-amd64sse2",
|
||||
"clang-15-freebsd",
|
||||
"gcc-11",
|
||||
),
|
||||
default="clang-14",
|
||||
default="clang-15",
|
||||
help="a compiler to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
@ -16,11 +16,10 @@ RUN apt-get update \
|
||||
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
|
||||
# TSAN will flush shadow memory when reaching this limit.
|
||||
# It may cause false-negatives, but it's better than OOM.
|
||||
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment; \
|
||||
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
|
||||
echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment; \
|
||||
echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment; \
|
||||
ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
|
||||
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment
|
||||
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
|
||||
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
|
||||
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
|
||||
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
|
||||
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080'
|
||||
|
@ -8,16 +8,41 @@ FROM clickhouse/binary-builder:$FROM_TAG
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-14 libllvm14 libclang-14-dev libmlir-14-dev
|
||||
RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev
|
||||
|
||||
# libclang-15-dev does not contain proper symlink:
|
||||
#
|
||||
# This is what cmake will search for:
|
||||
#
|
||||
# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1
|
||||
# /usr/lib/x86_64-linux-gnu/libclang-15.so.1
|
||||
#
|
||||
# This is what exists:
|
||||
#
|
||||
# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15*
|
||||
# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1
|
||||
# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0
|
||||
# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0
|
||||
#
|
||||
ARG TARGETARCH
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) rarch=x86_64 ;; \
|
||||
arm64) rarch=aarch64 ;; \
|
||||
*) exit 1 ;; \
|
||||
esac \
|
||||
&& ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1
|
||||
|
||||
# repo versions doesn't work correctly with C++17
|
||||
# also we push reports to s3, so we add index.html to subfolder urls
|
||||
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
|
||||
# TODO: remove branch in a few weeks after merge, e.g. in May or June 2022
|
||||
RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser --branch llvm-14 \
|
||||
#
|
||||
# FIXME: update location of a repo
|
||||
RUN git clone https://github.com/azat/woboq_codebrowser --branch llvm-15 \
|
||||
&& cd woboq_codebrowser \
|
||||
&& cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-14 -DCMAKE_C_COMPILER=clang-14 \
|
||||
&& make -j \
|
||||
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \
|
||||
&& ninja \
|
||||
&& cd .. \
|
||||
&& rm -rf woboq_codebrowser
|
||||
|
||||
@ -32,7 +57,7 @@ ENV SHA=nosha
|
||||
ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"
|
||||
|
||||
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
|
||||
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-14 -DCMAKE_C_COMPILER=/usr/bin/clang-14 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
|
||||
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
|
||||
mkdir -p $HTML_RESULT_DIRECTORY && \
|
||||
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
|
||||
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\
|
||||
|
@ -19,7 +19,7 @@ stage=${stage:-}
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
echo "$script_dir"
|
||||
repo_dir=ch
|
||||
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_unsplitted_disable_False_binary"}
|
||||
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"}
|
||||
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
|
||||
|
||||
function clone
|
||||
|
@ -2,7 +2,7 @@
|
||||
set -euo pipefail
|
||||
|
||||
|
||||
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
|
||||
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
|
||||
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
|
||||
|
||||
|
||||
|
@ -61,7 +61,7 @@ function configure
|
||||
cp -rv right/config left ||:
|
||||
|
||||
# Start a temporary server to rename the tables
|
||||
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
|
||||
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
|
||||
echo all killed
|
||||
|
||||
set -m # Spawn temporary in its own process groups
|
||||
@ -88,7 +88,7 @@ function configure
|
||||
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
|
||||
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
|
||||
|
||||
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
|
||||
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
|
||||
echo all killed
|
||||
|
||||
# Make copies of the original db for both servers. Use hardlinks instead
|
||||
@ -106,7 +106,7 @@ function configure
|
||||
|
||||
function restart
|
||||
{
|
||||
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
|
||||
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
|
||||
echo all killed
|
||||
|
||||
# Change the jemalloc settings here.
|
||||
@ -1400,7 +1400,7 @@ case "$stage" in
|
||||
while env kill -- -$watchdog_pid ; do sleep 1; done
|
||||
|
||||
# Stop the servers to free memory for the subsequent query analysis.
|
||||
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
|
||||
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
|
||||
echo Servers stopped.
|
||||
;&
|
||||
"analyze_queries")
|
||||
|
@ -5,7 +5,7 @@ FROM ubuntu:20.04
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14
|
||||
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install \
|
||||
@ -56,6 +56,8 @@ RUN apt-get update \
|
||||
|
||||
# This symlink required by gcc to find lld compiler
|
||||
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||
# for external_symbolizer_path
|
||||
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
|
||||
|
||||
ARG CCACHE_VERSION=4.6.1
|
||||
RUN mkdir /tmp/ccache \
|
||||
|
@ -134,6 +134,13 @@ Example of configuration for versions later or equal to 22.8:
|
||||
<max_size>10000000</max_size>
|
||||
</cache>
|
||||
</disks>
|
||||
<policies>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>cache</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
@ -151,6 +158,13 @@ Example of configuration for versions earlier than 22.8:
|
||||
<data_cache_size>10000000</data_cache_size>
|
||||
</s3>
|
||||
</disks>
|
||||
<policies>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>s3</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
<policies>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
|
@ -2,10 +2,9 @@
|
||||
slug: /en/operations/troubleshooting
|
||||
sidebar_position: 46
|
||||
sidebar_label: Troubleshooting
|
||||
title: Troubleshooting
|
||||
---
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
- [Installation](#troubleshooting-installation-errors)
|
||||
- [Connecting to the server](#troubleshooting-accepts-no-connections)
|
||||
- [Query processing](#troubleshooting-does-not-process-queries)
|
||||
|
@ -1227,6 +1227,8 @@ Result:
|
||||
|
||||
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
|
||||
|
||||
Alias: `fromUnixTimestamp`.
|
||||
|
||||
**Example:**
|
||||
|
||||
Query:
|
||||
|
@ -1823,6 +1823,36 @@ Result:
|
||||
Evaluate external model.
|
||||
Accepts a model name and model arguments. Returns Float64.
|
||||
|
||||
## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n)
|
||||
|
||||
Evaluate external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learing.
|
||||
Accepts a path to a catboost model and model arguments (features). Returns Float64.
|
||||
|
||||
``` sql
|
||||
SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction
|
||||
FROM data_table
|
||||
```
|
||||
|
||||
**Prerequisites**
|
||||
|
||||
1. Build the catboost evaluation library
|
||||
|
||||
Before evaluating catboost models, the `libcatboostmodel.<so|dylib>` library must be made available. See [CatBoost documentation](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html) how to compile it.
|
||||
|
||||
Next, specify the path to `libcatboostmodel.<so|dylib>` in the clickhouse configuration:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
...
|
||||
<catboost_lib_path>/path/to/libcatboostmodel.so</catboost_lib_path>
|
||||
...
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
2. Train a catboost model using libcatboost
|
||||
|
||||
See [Training and applying models](https://catboost.ai/docs/features/training.html#training) for how to train catboost models from a training data set.
|
||||
|
||||
## throwIf(x\[, message\[, error_code\]\])
|
||||
|
||||
Throw an exception if the argument is non zero.
|
||||
|
@ -30,7 +30,12 @@ SELECT name, status FROM system.dictionaries;
|
||||
|
||||
## RELOAD MODELS
|
||||
|
||||
Reloads all [CatBoost](../../guides/developer/apply-catboost-model.md) models if the configuration was updated without restarting the server.
|
||||
:::note
|
||||
This statement and `SYSTEM RELOAD MODEL` merely unload catboost models from the clickhouse-library-bridge. The function `catboostEvaluate()`
|
||||
loads a model upon first access if it is not loaded yet.
|
||||
:::
|
||||
|
||||
Unloads all CatBoost models.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -40,12 +45,12 @@ SYSTEM RELOAD MODELS [ON CLUSTER cluster_name]
|
||||
|
||||
## RELOAD MODEL
|
||||
|
||||
Completely reloads a CatBoost model `model_name` if the configuration was updated without restarting the server.
|
||||
Unloads a CatBoost model at `model_path`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_name>
|
||||
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_path>
|
||||
```
|
||||
|
||||
## RELOAD FUNCTIONS
|
||||
|
@ -13,7 +13,7 @@ Creates a table from a file. This table function is similar to [url](../../sql-r
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
file(path, format, structure)
|
||||
file(path [,format] [,structure])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
@ -11,7 +11,7 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws.
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
|
||||
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -10,7 +10,7 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
|
||||
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -13,7 +13,7 @@ sidebar_label: url
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
url(URL, format, structure)
|
||||
url(URL [,format] [,structure])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
@ -155,7 +155,6 @@ getting_started/index.md getting-started/index.md
|
||||
getting_started/install.md getting-started/install.md
|
||||
getting_started/playground.md getting-started/playground.md
|
||||
getting_started/tutorial.md getting-started/tutorial.md
|
||||
guides/apply_catboost_model.md guides/apply-catboost-model.md
|
||||
images/column_oriented.gif images/column-oriented.gif
|
||||
images/row_oriented.gif images/row-oriented.gif
|
||||
interfaces/http_interface.md interfaces/http.md
|
||||
|
@ -1,241 +0,0 @@
|
||||
---
|
||||
slug: /ru/guides/apply-catboost-model
|
||||
sidebar_position: 41
|
||||
sidebar_label: "Применение модели CatBoost в ClickHouse"
|
||||
---
|
||||
|
||||
# Применение модели CatBoost в ClickHouse {#applying-catboost-model-in-clickhouse}
|
||||
|
||||
[CatBoost](https://catboost.ai) — открытая программная библиотека разработанная компанией [Яндекс](https://yandex.ru/company/) для машинного обучения, которая использует схему градиентного бустинга.
|
||||
|
||||
С помощью этой инструкции вы научитесь применять предобученные модели в ClickHouse: в результате вы запустите вывод модели из SQL.
|
||||
|
||||
Чтобы применить модель CatBoost в ClickHouse:
|
||||
|
||||
1. [Создайте таблицу](#create-table).
|
||||
2. [Вставьте данные в таблицу](#insert-data-to-table).
|
||||
3. [Интегрируйте CatBoost в ClickHouse](#integrate-catboost-into-clickhouse) (Опциональный шаг).
|
||||
4. [Запустите вывод модели из SQL](#run-model-inference).
|
||||
|
||||
Подробнее об обучении моделей в CatBoost, см. [Обучение и применение моделей](https://catboost.ai/docs/features/training.html#training).
|
||||
|
||||
Вы можете перегрузить модели CatBoost, если их конфигурация была обновлена, без перезагрузки сервера. Для этого используйте системные запросы [RELOAD MODEL](../sql-reference/statements/system.md#query_language-system-reload-model) и [RELOAD MODELS](../sql-reference/statements/system.md#query_language-system-reload-models).
|
||||
|
||||
## Перед началом работы {#prerequisites}
|
||||
|
||||
Если у вас еще нет [Docker](https://docs.docker.com/install/), установите его.
|
||||
|
||||
:::note "Примечание"
|
||||
[Docker](https://www.docker.com) – это программная платформа для создания контейнеров, которые изолируют установку CatBoost и ClickHouse от остальной части системы.
|
||||
:::
|
||||
Перед применением модели CatBoost:
|
||||
|
||||
**1.** Скачайте [Docker-образ](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) из реестра:
|
||||
|
||||
``` bash
|
||||
$ docker pull yandex/tutorial-catboost-clickhouse
|
||||
```
|
||||
|
||||
Данный Docker-образ содержит все необходимое для запуска CatBoost и ClickHouse: код, среду выполнения, библиотеки, переменные окружения и файлы конфигурации.
|
||||
|
||||
**2.** Проверьте, что Docker-образ успешно скачался:
|
||||
|
||||
``` bash
|
||||
$ docker image ls
|
||||
REPOSITORY TAG IMAGE ID CREATED SIZE
|
||||
yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB
|
||||
```
|
||||
|
||||
**3.** Запустите Docker-контейнер основанный на данном образе:
|
||||
|
||||
``` bash
|
||||
$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse
|
||||
```
|
||||
|
||||
## 1. Создайте таблицу {#create-table}
|
||||
|
||||
Чтобы создать таблицу для обучающей выборки:
|
||||
|
||||
**1.** Запустите клиент ClickHouse:
|
||||
|
||||
``` bash
|
||||
$ clickhouse client
|
||||
```
|
||||
|
||||
:::note "Примечание"
|
||||
Сервер ClickHouse уже запущен внутри Docker-контейнера.
|
||||
:::
|
||||
**2.** Создайте таблицу в ClickHouse с помощью следующей команды:
|
||||
|
||||
``` sql
|
||||
:) CREATE TABLE amazon_train
|
||||
(
|
||||
date Date MATERIALIZED today(),
|
||||
ACTION UInt8,
|
||||
RESOURCE UInt32,
|
||||
MGR_ID UInt32,
|
||||
ROLE_ROLLUP_1 UInt32,
|
||||
ROLE_ROLLUP_2 UInt32,
|
||||
ROLE_DEPTNAME UInt32,
|
||||
ROLE_TITLE UInt32,
|
||||
ROLE_FAMILY_DESC UInt32,
|
||||
ROLE_FAMILY UInt32,
|
||||
ROLE_CODE UInt32
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY date
|
||||
```
|
||||
|
||||
**3.** Выйдите из клиента ClickHouse:
|
||||
|
||||
``` sql
|
||||
:) exit
|
||||
```
|
||||
|
||||
## 2. Вставьте данные в таблицу {#insert-data-to-table}
|
||||
|
||||
Чтобы вставить данные:
|
||||
|
||||
**1.** Выполните следующую команду:
|
||||
|
||||
``` bash
|
||||
$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv
|
||||
```
|
||||
|
||||
**2.** Запустите клиент ClickHouse:
|
||||
|
||||
``` bash
|
||||
$ clickhouse client
|
||||
```
|
||||
|
||||
**3.** Проверьте, что данные успешно загрузились:
|
||||
|
||||
``` sql
|
||||
:) SELECT count() FROM amazon_train
|
||||
|
||||
SELECT count()
|
||||
FROM amazon_train
|
||||
|
||||
+-count()-+
|
||||
| 65538 |
|
||||
+---------+
|
||||
```
|
||||
|
||||
## 3. Интегрируйте CatBoost в ClickHouse {#integrate-catboost-into-clickhouse}
|
||||
|
||||
:::note "Примечание"
|
||||
**Опциональный шаг.** Docker-образ содержит все необходимое для запуска CatBoost и ClickHouse.
|
||||
:::
|
||||
Чтобы интегрировать CatBoost в ClickHouse:
|
||||
|
||||
**1.** Создайте библиотеку для оценки модели.
|
||||
|
||||
Наиболее быстрый способ оценить модель CatBoost — это скомпилировать библиотеку `libcatboostmodel.<so|dll|dylib>`. Подробнее о том, как скомпилировать библиотеку, читайте в [документации CatBoost](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
|
||||
|
||||
**2.** Создайте в любом месте новую директорию с произвольным названием, например `data` и поместите в нее созданную библиотеку. Docker-образ уже содержит библиотеку `data/libcatboostmodel.so`.
|
||||
|
||||
**3.** Создайте в любом месте новую директорию для конфигурации модели с произвольным названием, например `models`.
|
||||
|
||||
**4.** Создайте файл конфигурации модели с произвольным названием, например `models/amazon_model.xml`.
|
||||
|
||||
**5.** Опишите конфигурацию модели:
|
||||
|
||||
``` xml
|
||||
<models>
|
||||
<model>
|
||||
<!-- Тип модели. В настоящий момент ClickHouse предоставляет только модель catboost. -->
|
||||
<type>catboost</type>
|
||||
<!-- Имя модели. -->
|
||||
<name>amazon</name>
|
||||
<!-- Путь к обученной модели. -->
|
||||
<path>/home/catboost/tutorial/catboost_model.bin</path>
|
||||
<!-- Интервал обновления. -->
|
||||
<lifetime>0</lifetime>
|
||||
</model>
|
||||
</models>
|
||||
```
|
||||
|
||||
**6.** Добавьте в конфигурацию ClickHouse путь к CatBoost и конфигурации модели:
|
||||
|
||||
``` xml
|
||||
<!-- Файл etc/clickhouse-server/config.d/models_config.xml. -->
|
||||
<catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
|
||||
<models_config>/home/catboost/models/*_model.xml</models_config>
|
||||
```
|
||||
:::note "Примечание"
|
||||
Вы можете позднее изменить путь к конфигурации модели CatBoost без перезагрузки сервера.
|
||||
:::
|
||||
## 4. Запустите вывод модели из SQL {#run-model-inference}
|
||||
|
||||
Для тестирования модели запустите клиент ClickHouse `$ clickhouse client`.
|
||||
|
||||
Проверьте, что модель работает:
|
||||
|
||||
``` sql
|
||||
:) SELECT
|
||||
modelEvaluate('amazon',
|
||||
RESOURCE,
|
||||
MGR_ID,
|
||||
ROLE_ROLLUP_1,
|
||||
ROLE_ROLLUP_2,
|
||||
ROLE_DEPTNAME,
|
||||
ROLE_TITLE,
|
||||
ROLE_FAMILY_DESC,
|
||||
ROLE_FAMILY,
|
||||
ROLE_CODE) > 0 AS prediction,
|
||||
ACTION AS target
|
||||
FROM amazon_train
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
:::note "Примечание"
|
||||
Функция [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) возвращает кортежи (tuple) с исходными прогнозами по классам для моделей с несколькими классами.
|
||||
:::
|
||||
Спрогнозируйте вероятность:
|
||||
|
||||
``` sql
|
||||
:) SELECT
|
||||
modelEvaluate('amazon',
|
||||
RESOURCE,
|
||||
MGR_ID,
|
||||
ROLE_ROLLUP_1,
|
||||
ROLE_ROLLUP_2,
|
||||
ROLE_DEPTNAME,
|
||||
ROLE_TITLE,
|
||||
ROLE_FAMILY_DESC,
|
||||
ROLE_FAMILY,
|
||||
ROLE_CODE) AS prediction,
|
||||
1. / (1 + exp(-prediction)) AS probability,
|
||||
ACTION AS target
|
||||
FROM amazon_train
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
:::note "Примечание"
|
||||
Подробнее про функцию [exp()](../sql-reference/functions/math-functions.md).
|
||||
:::
|
||||
Посчитайте логистическую функцию потерь (LogLoss) на всей выборке:
|
||||
|
||||
``` sql
|
||||
:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
modelEvaluate('amazon',
|
||||
RESOURCE,
|
||||
MGR_ID,
|
||||
ROLE_ROLLUP_1,
|
||||
ROLE_ROLLUP_2,
|
||||
ROLE_DEPTNAME,
|
||||
ROLE_TITLE,
|
||||
ROLE_FAMILY_DESC,
|
||||
ROLE_FAMILY,
|
||||
ROLE_CODE) AS prediction,
|
||||
1. / (1. + exp(-prediction)) AS prob,
|
||||
ACTION AS tg
|
||||
FROM amazon_train
|
||||
)
|
||||
```
|
||||
|
||||
:::note "Примечание"
|
||||
Подробнее про функции [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg), [log()](../sql-reference/functions/math-functions.md).
|
||||
:::
|
@ -7,5 +7,3 @@ sidebar_label: "Руководства"
|
||||
# Руководства {#rukovodstva}
|
||||
|
||||
Подробные пошаговые инструкции, которые помогут вам решать различные задачи с помощью ClickHouse.
|
||||
|
||||
- [Применение модели CatBoost в ClickHouse](apply-catboost-model.md)
|
||||
|
@ -29,7 +29,12 @@ SELECT name, status FROM system.dictionaries;
|
||||
|
||||
## RELOAD MODELS {#query_language-system-reload-models}
|
||||
|
||||
Перегружает все модели [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-model-in-clickhouse), если их конфигурация была обновлена, без перезагрузки сервера.
|
||||
:::note
|
||||
Это утверждение и `SYSTEM RELOAD MODEL` просто выгружают модели catboost из clickhouse-library-bridge. Функция `catboostEvaluate()`
|
||||
загружает модель при первом обращении, если она еще не загружена.
|
||||
:::
|
||||
|
||||
Разгрузите все модели CatBoost.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
@ -39,12 +44,12 @@ SYSTEM RELOAD MODELS
|
||||
|
||||
## RELOAD MODEL {#query_language-system-reload-model}
|
||||
|
||||
Полностью перегружает модель [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-model-in-clickhouse) `model_name`, если ее конфигурация была обновлена, без перезагрузки сервера.
|
||||
Выгружает модель CatBoost по адресу `модель_путь`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
SYSTEM RELOAD MODEL <model_name>
|
||||
SYSTEM RELOAD MODEL <model_path>
|
||||
```
|
||||
|
||||
## RELOAD FUNCTIONS {#query_language-system-reload-functions}
|
||||
|
@ -13,7 +13,7 @@ sidebar_label: file
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
file(path, format, structure)
|
||||
file(path [,format] [,structure])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
@ -11,7 +11,7 @@ sidebar_label: s3
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
|
||||
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
**Aргументы**
|
||||
|
@ -11,7 +11,7 @@ sidebar_label: s3Cluster
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
|
||||
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
@ -13,7 +13,7 @@ sidebar_label: url
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
url(URL, format, structure)
|
||||
url(URL [,format] [,structure])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
@ -1,244 +0,0 @@
|
||||
---
|
||||
slug: /zh/guides/apply-catboost-model
|
||||
sidebar_position: 41
|
||||
sidebar_label: "\u5E94\u7528CatBoost\u6A21\u578B"
|
||||
---
|
||||
|
||||
# 在ClickHouse中应用Catboost模型 {#applying-catboost-model-in-clickhouse}
|
||||
|
||||
[CatBoost](https://catboost.ai) 是一个由[Yandex](https://yandex.com/company/)开发的开源免费机器学习库。
|
||||
|
||||
|
||||
通过本篇文档,您将学会如何用SQL语句调用已经存放在Clickhouse中的预训练模型来预测数据。
|
||||
|
||||
|
||||
为了在ClickHouse中应用CatBoost模型,需要进行如下步骤:
|
||||
|
||||
1. [创建数据表](#create-table).
|
||||
2. [将数据插入到表中](#insert-data-to-table).
|
||||
3. [将CatBoost集成到ClickHouse中](#integrate-catboost-into-clickhouse) (可跳过)。
|
||||
4. [从SQL运行模型推断](#run-model-inference).
|
||||
|
||||
有关训练CatBoost模型的详细信息,请参阅 [训练和模型应用](https://catboost.ai/docs/features/training.html#training).
|
||||
|
||||
您可以通过[RELOAD MODEL](https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-reload-model)与[RELOAD MODELS](https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-reload-models)语句来重载CatBoost模型。
|
||||
|
||||
## 先决条件 {#prerequisites}
|
||||
|
||||
请先安装 [Docker](https://docs.docker.com/install/)。
|
||||
|
||||
!!! note "注"
|
||||
[Docker](https://www.docker.com) 是一个软件平台,用户可以用Docker来创建独立于已有系统并集成了CatBoost和ClickHouse的容器。
|
||||
|
||||
在应用CatBoost模型之前:
|
||||
|
||||
**1.** 从容器仓库拉取示例docker镜像 (https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) :
|
||||
|
||||
``` bash
|
||||
$ docker pull yandex/tutorial-catboost-clickhouse
|
||||
```
|
||||
|
||||
此示例Docker镜像包含运行CatBoost和ClickHouse所需的所有内容:代码、运行时、库、环境变量和配置文件。
|
||||
|
||||
**2.** 确保已成功拉取Docker镜像:
|
||||
|
||||
``` bash
|
||||
$ docker image ls
|
||||
REPOSITORY TAG IMAGE ID CREATED SIZE
|
||||
yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB
|
||||
```
|
||||
|
||||
**3.** 基于此镜像启动一个Docker容器:
|
||||
|
||||
``` bash
|
||||
$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse
|
||||
```
|
||||
|
||||
## 1. 创建数据表 {#create-table}
|
||||
|
||||
为训练样本创建ClickHouse表:
|
||||
|
||||
**1.** 在交互模式下启动ClickHouse控制台客户端:
|
||||
|
||||
``` bash
|
||||
$ clickhouse client
|
||||
```
|
||||
|
||||
!!! note "注"
|
||||
ClickHouse服务器已经在Docker容器内运行。
|
||||
|
||||
**2.** 使用以下命令创建表:
|
||||
|
||||
``` sql
|
||||
:) CREATE TABLE amazon_train
|
||||
(
|
||||
date Date MATERIALIZED today(),
|
||||
ACTION UInt8,
|
||||
RESOURCE UInt32,
|
||||
MGR_ID UInt32,
|
||||
ROLE_ROLLUP_1 UInt32,
|
||||
ROLE_ROLLUP_2 UInt32,
|
||||
ROLE_DEPTNAME UInt32,
|
||||
ROLE_TITLE UInt32,
|
||||
ROLE_FAMILY_DESC UInt32,
|
||||
ROLE_FAMILY UInt32,
|
||||
ROLE_CODE UInt32
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY date
|
||||
```
|
||||
|
||||
**3.** 从ClickHouse控制台客户端退出:
|
||||
|
||||
``` sql
|
||||
:) exit
|
||||
```
|
||||
|
||||
## 2. 将数据插入到表中 {#insert-data-to-table}
|
||||
|
||||
插入数据:
|
||||
|
||||
**1.** 运行以下命令:
|
||||
|
||||
``` bash
|
||||
$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv
|
||||
```
|
||||
|
||||
**2.** 在交互模式下启动ClickHouse控制台客户端:
|
||||
|
||||
``` bash
|
||||
$ clickhouse client
|
||||
```
|
||||
|
||||
**3.** 确保数据已上传:
|
||||
|
||||
``` sql
|
||||
:) SELECT count() FROM amazon_train
|
||||
|
||||
SELECT count()
|
||||
FROM amazon_train
|
||||
|
||||
+-count()-+
|
||||
| 65538 |
|
||||
+-------+
|
||||
```
|
||||
|
||||
## 3. 将CatBoost集成到ClickHouse中 {#integrate-catboost-into-clickhouse}
|
||||
|
||||
!!! note "注"
|
||||
**可跳过。** 示例Docker映像已经包含了运行CatBoost和ClickHouse所需的所有内容。
|
||||
|
||||
为了将CatBoost集成进ClickHouse,需要进行如下步骤:
|
||||
|
||||
**1.** 构建评估库。
|
||||
|
||||
评估CatBoost模型的最快方法是编译 `libcatboostmodel.<so|dll|dylib>` 库文件.
|
||||
|
||||
有关如何构建库文件的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
|
||||
|
||||
**2.** 创建一个新目录(位置与名称可随意指定), 如 `data` 并将创建的库文件放入其中。 示例Docker镜像已经包含了库 `data/libcatboostmodel.so`.
|
||||
|
||||
**3.** 创建一个新目录来放配置模型, 如 `models`.
|
||||
|
||||
**4.** 创建一个模型配置文件,如 `models/amazon_model.xml`.
|
||||
|
||||
**5.** 修改模型配置:
|
||||
|
||||
``` xml
|
||||
<models>
|
||||
<model>
|
||||
<!-- Model type. Now catboost only. -->
|
||||
<type>catboost</type>
|
||||
<!-- Model name. -->
|
||||
<name>amazon</name>
|
||||
<!-- Path to trained model. -->
|
||||
<path>/home/catboost/tutorial/catboost_model.bin</path>
|
||||
<!-- Update interval. -->
|
||||
<lifetime>0</lifetime>
|
||||
</model>
|
||||
</models>
|
||||
```
|
||||
|
||||
**6.** 将CatBoost库文件的路径和模型配置添加到ClickHouse配置:
|
||||
|
||||
``` xml
|
||||
<!-- File etc/clickhouse-server/config.d/models_config.xml. -->
|
||||
<catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
|
||||
<models_config>/home/catboost/models/*_model.xml</models_config>
|
||||
```
|
||||
|
||||
## 4. 使用SQL调用预测模型 {#run-model-inference}
|
||||
|
||||
为了测试模型是否正常,可以使用ClickHouse客户端 `$ clickhouse client`.
|
||||
|
||||
让我们确保模型能正常工作:
|
||||
|
||||
``` sql
|
||||
:) SELECT
|
||||
modelEvaluate('amazon',
|
||||
RESOURCE,
|
||||
MGR_ID,
|
||||
ROLE_ROLLUP_1,
|
||||
ROLE_ROLLUP_2,
|
||||
ROLE_DEPTNAME,
|
||||
ROLE_TITLE,
|
||||
ROLE_FAMILY_DESC,
|
||||
ROLE_FAMILY,
|
||||
ROLE_CODE) > 0 AS prediction,
|
||||
ACTION AS target
|
||||
FROM amazon_train
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
!!! note "注"
|
||||
函数 [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) 会对多类别模型返回一个元组,其中包含每一类别的原始预测值。
|
||||
|
||||
执行预测:
|
||||
|
||||
``` sql
|
||||
:) SELECT
|
||||
modelEvaluate('amazon',
|
||||
RESOURCE,
|
||||
MGR_ID,
|
||||
ROLE_ROLLUP_1,
|
||||
ROLE_ROLLUP_2,
|
||||
ROLE_DEPTNAME,
|
||||
ROLE_TITLE,
|
||||
ROLE_FAMILY_DESC,
|
||||
ROLE_FAMILY,
|
||||
ROLE_CODE) AS prediction,
|
||||
1. / (1 + exp(-prediction)) AS probability,
|
||||
ACTION AS target
|
||||
FROM amazon_train
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
!!! note "注"
|
||||
查看函数说明 [exp()](../sql-reference/functions/math-functions.md) 。
|
||||
|
||||
让我们计算样本的LogLoss:
|
||||
|
||||
``` sql
|
||||
:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
modelEvaluate('amazon',
|
||||
RESOURCE,
|
||||
MGR_ID,
|
||||
ROLE_ROLLUP_1,
|
||||
ROLE_ROLLUP_2,
|
||||
ROLE_DEPTNAME,
|
||||
ROLE_TITLE,
|
||||
ROLE_FAMILY_DESC,
|
||||
ROLE_FAMILY,
|
||||
ROLE_CODE) AS prediction,
|
||||
1. / (1. + exp(-prediction)) AS prob,
|
||||
ACTION AS tg
|
||||
FROM amazon_train
|
||||
)
|
||||
```
|
||||
|
||||
!!! note "注"
|
||||
查看函数说明 [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) 和 [log()](../sql-reference/functions/math-functions.md) 。
|
||||
|
||||
[原始文章](https://clickhouse.com/docs/en/guides/apply_catboost_model/) <!--hide-->
|
@ -9,6 +9,5 @@ sidebar_label: ClickHouse指南
|
||||
列出了如何使用 Clickhouse 解决各种任务的详细说明:
|
||||
|
||||
- [关于简单集群设置的教程](../getting-started/tutorial.md)
|
||||
- [在ClickHouse中应用CatBoost模型](apply-catboost-model.md)
|
||||
|
||||
[原始文章](https://clickhouse.com/docs/en/guides/) <!--hide-->
|
||||
|
@ -54,7 +54,7 @@ else ()
|
||||
endif ()
|
||||
|
||||
if (NOT USE_MUSL)
|
||||
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Library dictionary source" ${ENABLE_CLICKHOUSE_ALL})
|
||||
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to external dynamically loaded libraries" ${ENABLE_CLICKHOUSE_ALL})
|
||||
endif ()
|
||||
|
||||
# https://presentations.clickhouse.com/matemarketing_2020/
|
||||
|
@ -446,8 +446,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
fs::path ulimits_file = ulimits_dir / fmt::format("{}.conf", user);
|
||||
fmt::print("Will set ulimits for {} user in {}.\n", user, ulimits_file.string());
|
||||
std::string ulimits_content = fmt::format(
|
||||
"{0}\tsoft\tnofile\t262144\n"
|
||||
"{0}\thard\tnofile\t262144\n", user);
|
||||
"{0}\tsoft\tnofile\t1048576\n"
|
||||
"{0}\thard\tnofile\t1048576\n", user);
|
||||
|
||||
fs::create_directories(ulimits_dir);
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
|
||||
|
||||
set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
|
||||
CatBoostLibraryHandler.cpp
|
||||
CatBoostLibraryHandlerFactory.cpp
|
||||
ExternalDictionaryLibraryAPI.cpp
|
||||
ExternalDictionaryLibraryHandler.cpp
|
||||
ExternalDictionaryLibraryHandlerFactory.cpp
|
||||
|
49
programs/library-bridge/CatBoostLibraryAPI.h
Normal file
49
programs/library-bridge/CatBoostLibraryAPI.h
Normal file
@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
|
||||
// Function pointer typedefs and names of libcatboost.so functions used by ClickHouse
|
||||
struct CatBoostLibraryAPI
|
||||
{
|
||||
using ModelCalcerHandle = void;
|
||||
|
||||
using ModelCalcerCreateFunc = ModelCalcerHandle * (*)();
|
||||
static constexpr const char * ModelCalcerCreateName = "ModelCalcerCreate";
|
||||
|
||||
using ModelCalcerDeleteFunc = void (*)(ModelCalcerHandle *);
|
||||
static constexpr const char * ModelCalcerDeleteName = "ModelCalcerDelete";
|
||||
|
||||
using GetErrorStringFunc = const char * (*)();
|
||||
static constexpr const char * GetErrorStringName = "GetErrorString";
|
||||
|
||||
using LoadFullModelFromFileFunc = bool (*)(ModelCalcerHandle *, const char *);
|
||||
static constexpr const char * LoadFullModelFromFileName = "LoadFullModelFromFile";
|
||||
|
||||
using CalcModelPredictionFlatFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, double *, size_t);
|
||||
static constexpr const char * CalcModelPredictionFlatName = "CalcModelPredictionFlat";
|
||||
|
||||
using CalcModelPredictionFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, const char ***, size_t, double *, size_t);
|
||||
static constexpr const char * CalcModelPredictionName = "CalcModelPrediction";
|
||||
|
||||
using CalcModelPredictionWithHashedCatFeaturesFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, const int **, size_t, double *, size_t);
|
||||
static constexpr const char * CalcModelPredictionWithHashedCatFeaturesName = "CalcModelPredictionWithHashedCatFeatures";
|
||||
|
||||
using GetStringCatFeatureHashFunc = int (*)(const char *, size_t);
|
||||
static constexpr const char * GetStringCatFeatureHashName = "GetStringCatFeatureHash";
|
||||
|
||||
using GetIntegerCatFeatureHashFunc = int (*)(uint64_t);
|
||||
static constexpr const char * GetIntegerCatFeatureHashName = "GetIntegerCatFeatureHash";
|
||||
|
||||
using GetFloatFeaturesCountFunc = size_t (*)(ModelCalcerHandle *);
|
||||
static constexpr const char * GetFloatFeaturesCountName = "GetFloatFeaturesCount";
|
||||
|
||||
using GetCatFeaturesCountFunc = size_t (*)(ModelCalcerHandle *);
|
||||
static constexpr const char * GetCatFeaturesCountName = "GetCatFeaturesCount";
|
||||
|
||||
using GetTreeCountFunc = size_t (*)(ModelCalcerHandle *);
|
||||
static constexpr const char * GetTreeCountName = "GetTreeCount";
|
||||
|
||||
using GetDimensionsCountFunc = size_t (*)(ModelCalcerHandle *);
|
||||
static constexpr const char * GetDimensionsCountName = "GetDimensionsCount";
|
||||
};
|
389
programs/library-bridge/CatBoostLibraryHandler.cpp
Normal file
389
programs/library-bridge/CatBoostLibraryHandler.cpp
Normal file
@ -0,0 +1,389 @@
|
||||
#include "CatBoostLibraryHandler.h"
|
||||
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_APPLY_CATBOOST_MODEL;
|
||||
extern const int CANNOT_LOAD_CATBOOST_MODEL;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
CatBoostLibraryHandler::APIHolder::APIHolder(SharedLibrary & lib)
|
||||
{
|
||||
ModelCalcerCreate = lib.get<CatBoostLibraryAPI::ModelCalcerCreateFunc>(CatBoostLibraryAPI::ModelCalcerCreateName);
|
||||
ModelCalcerDelete = lib.get<CatBoostLibraryAPI::ModelCalcerDeleteFunc>(CatBoostLibraryAPI::ModelCalcerDeleteName);
|
||||
GetErrorString = lib.get<CatBoostLibraryAPI::GetErrorStringFunc>(CatBoostLibraryAPI::GetErrorStringName);
|
||||
LoadFullModelFromFile = lib.get<CatBoostLibraryAPI::LoadFullModelFromFileFunc>(CatBoostLibraryAPI::LoadFullModelFromFileName);
|
||||
CalcModelPredictionFlat = lib.get<CatBoostLibraryAPI::CalcModelPredictionFlatFunc>(CatBoostLibraryAPI::CalcModelPredictionFlatName);
|
||||
CalcModelPrediction = lib.get<CatBoostLibraryAPI::CalcModelPredictionFunc>(CatBoostLibraryAPI::CalcModelPredictionName);
|
||||
CalcModelPredictionWithHashedCatFeatures = lib.get<CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesFunc>(CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesName);
|
||||
GetStringCatFeatureHash = lib.get<CatBoostLibraryAPI::GetStringCatFeatureHashFunc>(CatBoostLibraryAPI::GetStringCatFeatureHashName);
|
||||
GetIntegerCatFeatureHash = lib.get<CatBoostLibraryAPI::GetIntegerCatFeatureHashFunc>(CatBoostLibraryAPI::GetIntegerCatFeatureHashName);
|
||||
GetFloatFeaturesCount = lib.get<CatBoostLibraryAPI::GetFloatFeaturesCountFunc>(CatBoostLibraryAPI::GetFloatFeaturesCountName);
|
||||
GetCatFeaturesCount = lib.get<CatBoostLibraryAPI::GetCatFeaturesCountFunc>(CatBoostLibraryAPI::GetCatFeaturesCountName);
|
||||
GetTreeCount = lib.tryGet<CatBoostLibraryAPI::GetTreeCountFunc>(CatBoostLibraryAPI::GetTreeCountName);
|
||||
GetDimensionsCount = lib.tryGet<CatBoostLibraryAPI::GetDimensionsCountFunc>(CatBoostLibraryAPI::GetDimensionsCountName);
|
||||
}
|
||||
|
||||
CatBoostLibraryHandler::CatBoostLibraryHandler(
|
||||
const std::string & library_path,
|
||||
const std::string & model_path)
|
||||
: loading_start_time(std::chrono::system_clock::now())
|
||||
, library(std::make_shared<SharedLibrary>(library_path))
|
||||
, api(*library)
|
||||
{
|
||||
model_calcer_handle = api.ModelCalcerCreate();
|
||||
|
||||
if (!api.LoadFullModelFromFile(model_calcer_handle, model_path.c_str()))
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
|
||||
"Cannot load CatBoost model: {}", api.GetErrorString());
|
||||
}
|
||||
|
||||
float_features_count = api.GetFloatFeaturesCount(model_calcer_handle);
|
||||
cat_features_count = api.GetCatFeaturesCount(model_calcer_handle);
|
||||
|
||||
tree_count = 1;
|
||||
if (api.GetDimensionsCount)
|
||||
tree_count = api.GetDimensionsCount(model_calcer_handle);
|
||||
|
||||
loading_duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - loading_start_time);
|
||||
}
|
||||
|
||||
CatBoostLibraryHandler::~CatBoostLibraryHandler()
|
||||
{
|
||||
api.ModelCalcerDelete(model_calcer_handle);
|
||||
}
|
||||
|
||||
std::chrono::system_clock::time_point CatBoostLibraryHandler::getLoadingStartTime() const
|
||||
{
|
||||
return loading_start_time;
|
||||
}
|
||||
|
||||
std::chrono::milliseconds CatBoostLibraryHandler::getLoadingDuration() const
|
||||
{
|
||||
return loading_duration;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Buffer should be allocated with features_count * column->size() elements.
|
||||
/// Place column elements in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
|
||||
template <typename T>
|
||||
void placeColumnAsNumber(const IColumn * column, T * buffer, size_t features_count)
|
||||
{
|
||||
size_t size = column->size();
|
||||
FieldVisitorConvertToNumber<T> visitor;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
/// TODO: Replace with column visitor.
|
||||
Field field;
|
||||
column->get(i, field);
|
||||
*buffer = applyVisitor(visitor, field);
|
||||
buffer += features_count;
|
||||
}
|
||||
}
|
||||
|
||||
/// Buffer should be allocated with features_count * column->size() elements.
|
||||
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
|
||||
void placeStringColumn(const ColumnString & column, const char ** buffer, size_t features_count)
|
||||
{
|
||||
size_t size = column.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
*buffer = const_cast<char *>(column.getDataAtWithTerminatingZero(i).data);
|
||||
buffer += features_count;
|
||||
}
|
||||
}
|
||||
|
||||
/// Buffer should be allocated with features_count * column->size() elements.
|
||||
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
|
||||
/// Returns PODArray which holds data (because ColumnFixedString doesn't store terminating zero).
|
||||
PODArray<char> placeFixedStringColumn(const ColumnFixedString & column, const char ** buffer, size_t features_count)
|
||||
{
|
||||
size_t size = column.size();
|
||||
size_t str_size = column.getN();
|
||||
PODArray<char> data(size * (str_size + 1));
|
||||
char * data_ptr = data.data();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
auto ref = column.getDataAt(i);
|
||||
memcpy(data_ptr, ref.data, ref.size);
|
||||
data_ptr[ref.size] = 0;
|
||||
*buffer = data_ptr;
|
||||
data_ptr += ref.size + 1;
|
||||
buffer += features_count;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/// Place columns into buffer, returns column which holds placed data. Buffer should contains column->size() values.
|
||||
template <typename T>
|
||||
ColumnPtr placeNumericColumns(const ColumnRawPtrs & columns, size_t offset, size_t size, const T** buffer)
|
||||
{
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
|
||||
size_t column_size = columns[offset]->size();
|
||||
auto data_column = ColumnVector<T>::create(size * column_size);
|
||||
T * data = data_column->getData().data();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const auto * column = columns[offset + i];
|
||||
if (column->isNumeric())
|
||||
placeColumnAsNumber(column, data + i, size);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
{
|
||||
*buffer = data;
|
||||
++buffer;
|
||||
data += size;
|
||||
}
|
||||
|
||||
return data_column;
|
||||
}
|
||||
|
||||
/// Place columns into buffer, returns data which was used for fixed string columns.
|
||||
/// Buffer should contains column->size() values, each value contains size strings.
|
||||
std::vector<PODArray<char>> placeStringColumns(const ColumnRawPtrs & columns, size_t offset, size_t size, const char ** buffer)
|
||||
{
|
||||
if (size == 0)
|
||||
return {};
|
||||
|
||||
std::vector<PODArray<char>> data;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const auto * column = columns[offset + i];
|
||||
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
|
||||
placeStringColumn(*column_string, buffer + i, size);
|
||||
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
|
||||
data.push_back(placeFixedStringColumn(*column_fixed_string, buffer + i, size));
|
||||
else
|
||||
throw Exception("Cannot place string column.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/// buffer[column_size * cat_features_count] -> char * => cat_features[column_size][cat_features_count] -> char *
|
||||
void fillCatFeaturesBuffer(
|
||||
const char *** cat_features, const char ** buffer,
|
||||
size_t column_size, size_t cat_features_count)
|
||||
{
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
{
|
||||
*cat_features = buffer;
|
||||
++cat_features;
|
||||
buffer += cat_features_count;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calc hash for string cat feature at ps positions.
|
||||
template <typename Column>
|
||||
void calcStringHashes(const Column * column, size_t ps, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
|
||||
{
|
||||
size_t column_size = column->size();
|
||||
for (size_t j = 0; j < column_size; ++j)
|
||||
{
|
||||
auto ref = column->getDataAt(j);
|
||||
const_cast<int *>(*buffer)[ps] = api.GetStringCatFeatureHash(ref.data, ref.size);
|
||||
++buffer;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calc hash for int cat feature at ps position. Buffer at positions ps should contains unhashed values.
|
||||
void calcIntHashes(size_t column_size, size_t ps, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
|
||||
{
|
||||
for (size_t j = 0; j < column_size; ++j)
|
||||
{
|
||||
const_cast<int *>(*buffer)[ps] = api.GetIntegerCatFeatureHash((*buffer)[ps]);
|
||||
++buffer;
|
||||
}
|
||||
}
|
||||
|
||||
/// buffer contains column->size() rows and size columns.
|
||||
/// For int cat features calc hash inplace.
|
||||
/// For string cat features calc hash from column rows.
|
||||
void calcHashes(const ColumnRawPtrs & columns, size_t offset, size_t size, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
size_t column_size = columns[offset]->size();
|
||||
|
||||
std::vector<PODArray<char>> data;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const auto * column = columns[offset + i];
|
||||
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
|
||||
calcStringHashes(column_string, i, buffer, api);
|
||||
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
|
||||
calcStringHashes(column_fixed_string, i, buffer, api);
|
||||
else
|
||||
calcIntHashes(column_size, i, buffer, api);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Convert values to row-oriented format and call evaluation function from CatBoost wrapper api.
|
||||
/// * CalcModelPredictionFlat if no cat features
|
||||
/// * CalcModelPrediction if all cat features are strings
|
||||
/// * CalcModelPredictionWithHashedCatFeatures if has int cat features.
|
||||
ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
const ColumnRawPtrs & columns,
|
||||
bool cat_features_are_strings) const
|
||||
{
|
||||
std::string error_msg = "Error occurred while applying CatBoost model: ";
|
||||
size_t column_size = columns.front()->size();
|
||||
|
||||
auto result = ColumnFloat64::create(column_size * tree_count);
|
||||
auto * result_buf = result->getData().data();
|
||||
|
||||
if (!column_size)
|
||||
return result;
|
||||
|
||||
/// Prepare float features.
|
||||
PODArray<const float *> float_features(column_size);
|
||||
auto * float_features_buf = float_features.data();
|
||||
/// Store all float data into single column. float_features is a list of pointers to it.
|
||||
auto float_features_col = placeNumericColumns<float>(columns, 0, float_features_count, float_features_buf);
|
||||
|
||||
if (cat_features_count == 0)
|
||||
{
|
||||
if (!api.CalcModelPredictionFlat(model_calcer_handle, column_size,
|
||||
float_features_buf, float_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Prepare cat features.
|
||||
if (cat_features_are_strings)
|
||||
{
|
||||
/// cat_features_holder stores pointers to ColumnString data or fixed_strings_data.
|
||||
PODArray<const char *> cat_features_holder(cat_features_count * column_size);
|
||||
PODArray<const char **> cat_features(column_size);
|
||||
auto * cat_features_buf = cat_features.data();
|
||||
|
||||
fillCatFeaturesBuffer(cat_features_buf, cat_features_holder.data(), column_size, cat_features_count);
|
||||
/// Fixed strings are stored without termination zero, so have to copy data into fixed_strings_data.
|
||||
auto fixed_strings_data = placeStringColumns(columns, float_features_count,
|
||||
cat_features_count, cat_features_holder.data());
|
||||
|
||||
if (!api.CalcModelPrediction(model_calcer_handle, column_size,
|
||||
float_features_buf, float_features_count,
|
||||
cat_features_buf, cat_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PODArray<const int *> cat_features(column_size);
|
||||
auto * cat_features_buf = cat_features.data();
|
||||
auto cat_features_col = placeNumericColumns<int>(columns, float_features_count,
|
||||
cat_features_count, cat_features_buf);
|
||||
calcHashes(columns, float_features_count, cat_features_count, cat_features_buf, api);
|
||||
if (!api.CalcModelPredictionWithHashedCatFeatures(
|
||||
model_calcer_handle, column_size,
|
||||
float_features_buf, float_features_count,
|
||||
cat_features_buf, cat_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t CatBoostLibraryHandler::getTreeCount() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return tree_count;
|
||||
}
|
||||
|
||||
ColumnPtr CatBoostLibraryHandler::evaluate(const ColumnRawPtrs & columns) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model.");
|
||||
|
||||
if (columns.size() != float_features_count + cat_features_count)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
||||
columns.size(),
|
||||
float_features_count,
|
||||
cat_features_count);
|
||||
|
||||
for (size_t i = 0; i < float_features_count; ++i)
|
||||
{
|
||||
if (!columns[i]->isNumeric())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i);
|
||||
}
|
||||
}
|
||||
|
||||
bool cat_features_are_strings = true;
|
||||
for (size_t i = float_features_count; i < float_features_count + cat_features_count; ++i)
|
||||
{
|
||||
const auto * column = columns[i];
|
||||
if (column->isNumeric())
|
||||
{
|
||||
cat_features_are_strings = false;
|
||||
}
|
||||
else if (!(typeid_cast<const ColumnString *>(column)
|
||||
|| typeid_cast<const ColumnFixedString *>(column)))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i);
|
||||
}
|
||||
}
|
||||
|
||||
auto result = evalImpl(columns, cat_features_are_strings);
|
||||
|
||||
if (tree_count == 1)
|
||||
return result;
|
||||
|
||||
size_t column_size = columns.front()->size();
|
||||
auto * result_buf = result->getData().data();
|
||||
|
||||
/// Multiple trees case. Copy data to several columns.
|
||||
MutableColumns mutable_columns(tree_count);
|
||||
std::vector<Float64 *> column_ptrs(tree_count);
|
||||
for (size_t i = 0; i < tree_count; ++i)
|
||||
{
|
||||
auto col = ColumnFloat64::create(column_size);
|
||||
column_ptrs[i] = col->getData().data();
|
||||
mutable_columns[i] = std::move(col);
|
||||
}
|
||||
|
||||
Float64 * data = result_buf;
|
||||
for (size_t row = 0; row < column_size; ++row)
|
||||
{
|
||||
for (size_t i = 0; i < tree_count; ++i)
|
||||
{
|
||||
*column_ptrs[i] = *data;
|
||||
++column_ptrs[i];
|
||||
++data;
|
||||
}
|
||||
}
|
||||
|
||||
return ColumnTuple::create(std::move(mutable_columns));
|
||||
}
|
||||
|
||||
}
|
78
programs/library-bridge/CatBoostLibraryHandler.h
Normal file
78
programs/library-bridge/CatBoostLibraryHandler.h
Normal file
@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
|
||||
#include "CatBoostLibraryAPI.h"
|
||||
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/SharedLibrary.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Abstracts access to the CatBoost shared library.
|
||||
class CatBoostLibraryHandler
|
||||
{
|
||||
public:
|
||||
/// Holds pointers to CatBoost library functions
|
||||
struct APIHolder
|
||||
{
|
||||
explicit APIHolder(SharedLibrary & lib);
|
||||
|
||||
// NOLINTBEGIN(readability-identifier-naming)
|
||||
CatBoostLibraryAPI::ModelCalcerCreateFunc ModelCalcerCreate;
|
||||
CatBoostLibraryAPI::ModelCalcerDeleteFunc ModelCalcerDelete;
|
||||
CatBoostLibraryAPI::GetErrorStringFunc GetErrorString;
|
||||
CatBoostLibraryAPI::LoadFullModelFromFileFunc LoadFullModelFromFile;
|
||||
CatBoostLibraryAPI::CalcModelPredictionFlatFunc CalcModelPredictionFlat;
|
||||
CatBoostLibraryAPI::CalcModelPredictionFunc CalcModelPrediction;
|
||||
CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesFunc CalcModelPredictionWithHashedCatFeatures;
|
||||
CatBoostLibraryAPI::GetStringCatFeatureHashFunc GetStringCatFeatureHash;
|
||||
CatBoostLibraryAPI::GetIntegerCatFeatureHashFunc GetIntegerCatFeatureHash;
|
||||
CatBoostLibraryAPI::GetFloatFeaturesCountFunc GetFloatFeaturesCount;
|
||||
CatBoostLibraryAPI::GetCatFeaturesCountFunc GetCatFeaturesCount;
|
||||
CatBoostLibraryAPI::GetTreeCountFunc GetTreeCount;
|
||||
CatBoostLibraryAPI::GetDimensionsCountFunc GetDimensionsCount;
|
||||
// NOLINTEND(readability-identifier-naming)
|
||||
};
|
||||
|
||||
CatBoostLibraryHandler(
|
||||
const String & library_path,
|
||||
const String & model_path);
|
||||
|
||||
~CatBoostLibraryHandler();
|
||||
|
||||
std::chrono::system_clock::time_point getLoadingStartTime() const;
|
||||
std::chrono::milliseconds getLoadingDuration() const;
|
||||
|
||||
size_t getTreeCount() const;
|
||||
|
||||
ColumnPtr evaluate(const ColumnRawPtrs & columns) const;
|
||||
|
||||
private:
|
||||
std::chrono::system_clock::time_point loading_start_time;
|
||||
std::chrono::milliseconds loading_duration;
|
||||
|
||||
const SharedLibraryPtr library;
|
||||
const APIHolder api;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
|
||||
CatBoostLibraryAPI::ModelCalcerHandle * model_calcer_handle TSA_GUARDED_BY(mutex) TSA_PT_GUARDED_BY(mutex);
|
||||
|
||||
size_t float_features_count TSA_GUARDED_BY(mutex);
|
||||
size_t cat_features_count TSA_GUARDED_BY(mutex);
|
||||
size_t tree_count TSA_GUARDED_BY(mutex);
|
||||
|
||||
ColumnFloat64::MutablePtr evalImpl(const ColumnRawPtrs & columns, bool cat_features_are_strings) const TSA_REQUIRES(mutex);
|
||||
};
|
||||
|
||||
using CatBoostLibraryHandlerPtr = std::shared_ptr<CatBoostLibraryHandler>;
|
||||
|
||||
}
|
80
programs/library-bridge/CatBoostLibraryHandlerFactory.cpp
Normal file
80
programs/library-bridge/CatBoostLibraryHandlerFactory.cpp
Normal file
@ -0,0 +1,80 @@
|
||||
#include "CatBoostLibraryHandlerFactory.h"
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
CatBoostLibraryHandlerFactory & CatBoostLibraryHandlerFactory::instance()
|
||||
{
|
||||
static CatBoostLibraryHandlerFactory instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
CatBoostLibraryHandlerFactory::CatBoostLibraryHandlerFactory()
|
||||
: log(&Poco::Logger::get("CatBoostLibraryHandlerFactory"))
|
||||
{
|
||||
}
|
||||
|
||||
CatBoostLibraryHandlerPtr CatBoostLibraryHandlerFactory::tryGetModel(const String & model_path, const String & library_path, bool create_if_not_found)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto handler = library_handlers.find(model_path);
|
||||
bool found = (handler != library_handlers.end());
|
||||
|
||||
if (found)
|
||||
return handler->second;
|
||||
else
|
||||
{
|
||||
if (create_if_not_found)
|
||||
{
|
||||
auto new_handler = std::make_shared<CatBoostLibraryHandler>(library_path, model_path);
|
||||
library_handlers.emplace(model_path, new_handler);
|
||||
LOG_DEBUG(log, "Loaded catboost library handler for model path '{}'", model_path);
|
||||
return new_handler;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void CatBoostLibraryHandlerFactory::removeModel(const String & model_path)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
bool deleted = library_handlers.erase(model_path);
|
||||
if (!deleted)
|
||||
{
|
||||
LOG_DEBUG(log, "Cannot unload catboost library handler for model path '{}'", model_path);
|
||||
return;
|
||||
}
|
||||
LOG_DEBUG(log, "Unloaded catboost library handler for model path '{}'", model_path);
|
||||
}
|
||||
|
||||
void CatBoostLibraryHandlerFactory::removeAllModels()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
library_handlers.clear();
|
||||
LOG_DEBUG(log, "Unloaded all catboost library handlers");
|
||||
}
|
||||
|
||||
ExternalModelInfos CatBoostLibraryHandlerFactory::getModelInfos()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
ExternalModelInfos result;
|
||||
|
||||
for (const auto & handler : library_handlers)
|
||||
result.push_back({
|
||||
.model_path = handler.first,
|
||||
.model_type = "catboost",
|
||||
.loading_start_time = handler.second->getLoadingStartTime(),
|
||||
.loading_duration = handler.second->getLoadingDuration()
|
||||
});
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
}
|
37
programs/library-bridge/CatBoostLibraryHandlerFactory.h
Normal file
37
programs/library-bridge/CatBoostLibraryHandlerFactory.h
Normal file
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include "CatBoostLibraryHandler.h"
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <Common/ExternalModelInfo.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CatBoostLibraryHandlerFactory final : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
static CatBoostLibraryHandlerFactory & instance();
|
||||
|
||||
CatBoostLibraryHandlerFactory();
|
||||
|
||||
CatBoostLibraryHandlerPtr tryGetModel(const String & model_path, const String & library_path, bool create_if_not_found);
|
||||
|
||||
void removeModel(const String & model_path);
|
||||
void removeAllModels();
|
||||
|
||||
ExternalModelInfos getModelInfos();
|
||||
|
||||
private:
|
||||
/// map: model path --> catboost library handler
|
||||
std::unordered_map<String, CatBoostLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
|
||||
std::mutex mutex;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
@ -50,6 +50,6 @@ private:
|
||||
void * lib_data;
|
||||
};
|
||||
|
||||
using SharedLibraryHandlerPtr = std::shared_ptr<ExternalDictionaryLibraryHandler>;
|
||||
using ExternalDictionaryLibraryHandlerPtr = std::shared_ptr<ExternalDictionaryLibraryHandler>;
|
||||
|
||||
}
|
||||
|
@ -1,37 +1,40 @@
|
||||
#include "ExternalDictionaryLibraryHandlerFactory.h"
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
SharedLibraryHandlerPtr ExternalDictionaryLibraryHandlerFactory::get(const std::string & dictionary_id)
|
||||
ExternalDictionaryLibraryHandlerPtr ExternalDictionaryLibraryHandlerFactory::get(const String & dictionary_id)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto library_handler = library_handlers.find(dictionary_id);
|
||||
|
||||
if (library_handler != library_handlers.end())
|
||||
return library_handler->second;
|
||||
|
||||
if (auto handler = library_handlers.find(dictionary_id); handler != library_handlers.end())
|
||||
return handler->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
void ExternalDictionaryLibraryHandlerFactory::create(
|
||||
const std::string & dictionary_id,
|
||||
const std::string & library_path,
|
||||
const std::vector<std::string> & library_settings,
|
||||
const String & dictionary_id,
|
||||
const String & library_path,
|
||||
const std::vector<String> & library_settings,
|
||||
const Block & sample_block,
|
||||
const std::vector<std::string> & attributes_names)
|
||||
const std::vector<String> & attributes_names)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (!library_handlers.contains(dictionary_id))
|
||||
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<ExternalDictionaryLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
|
||||
else
|
||||
|
||||
if (library_handlers.contains(dictionary_id))
|
||||
{
|
||||
LOG_WARNING(&Poco::Logger::get("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
|
||||
return;
|
||||
}
|
||||
|
||||
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<ExternalDictionaryLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
|
||||
}
|
||||
|
||||
|
||||
bool ExternalDictionaryLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
|
||||
bool ExternalDictionaryLibraryHandlerFactory::clone(const String & from_dictionary_id, const String & to_dictionary_id)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto from_library_handler = library_handlers.find(from_dictionary_id);
|
||||
@ -45,7 +48,7 @@ bool ExternalDictionaryLibraryHandlerFactory::clone(const std::string & from_dic
|
||||
}
|
||||
|
||||
|
||||
bool ExternalDictionaryLibraryHandlerFactory::remove(const std::string & dictionary_id)
|
||||
bool ExternalDictionaryLibraryHandlerFactory::remove(const String & dictionary_id)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
/// extDict_libDelete is called in destructor.
|
||||
|
@ -17,22 +17,22 @@ class ExternalDictionaryLibraryHandlerFactory final : private boost::noncopyable
|
||||
public:
|
||||
static ExternalDictionaryLibraryHandlerFactory & instance();
|
||||
|
||||
SharedLibraryHandlerPtr get(const std::string & dictionary_id);
|
||||
ExternalDictionaryLibraryHandlerPtr get(const String & dictionary_id);
|
||||
|
||||
void create(
|
||||
const std::string & dictionary_id,
|
||||
const std::string & library_path,
|
||||
const std::vector<std::string> & library_settings,
|
||||
const String & dictionary_id,
|
||||
const String & library_path,
|
||||
const std::vector<String> & library_settings,
|
||||
const Block & sample_block,
|
||||
const std::vector<std::string> & attributes_names);
|
||||
const std::vector<String> & attributes_names);
|
||||
|
||||
bool clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id);
|
||||
bool clone(const String & from_dictionary_id, const String & to_dictionary_id);
|
||||
|
||||
bool remove(const std::string & dictionary_id);
|
||||
bool remove(const String & dictionary_id);
|
||||
|
||||
private:
|
||||
/// map: dict_id -> sharedLibraryHandler
|
||||
std::unordered_map<std::string, SharedLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
|
||||
std::unordered_map<String, ExternalDictionaryLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
|
@ -27,12 +27,16 @@ std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHa
|
||||
{
|
||||
if (uri.getPath() == "/extdict_ping")
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
|
||||
else if (uri.getPath() == "/catboost_ping")
|
||||
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
|
||||
}
|
||||
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
{
|
||||
if (uri.getPath() == "/extdict_request")
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
|
||||
else if (uri.getPath() == "/catboost_request")
|
||||
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
@ -1,24 +1,32 @@
|
||||
#include "LibraryBridgeHandlers.h"
|
||||
|
||||
#include "CatBoostLibraryHandler.h"
|
||||
#include "CatBoostLibraryHandlerFactory.h"
|
||||
#include "ExternalDictionaryLibraryHandler.h"
|
||||
#include "ExternalDictionaryLibraryHandlerFactory.h"
|
||||
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/BridgeProtocolVersion.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/Net/HTMLForm.h>
|
||||
#include <Poco/Net/HTTPServerRequest.h>
|
||||
#include <Poco/Net/HTTPServerResponse.h>
|
||||
#include <Poco/Net/HTMLForm.h>
|
||||
#include <Poco/ThreadPool.h>
|
||||
#include <Processors/Formats/IOutputFormat.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <QueryPipeline/QueryPipeline.h>
|
||||
#include <Processors/Executors/CompletedPipelineExecutor.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <Processors/Formats/IOutputFormat.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
#include <QueryPipeline/QueryPipeline.h>
|
||||
#include <Server/HTTP/HTMLForm.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
|
||||
#include <Formats/NativeReader.h>
|
||||
#include <Formats/NativeWriter.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -31,7 +39,7 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
void processError(HTTPServerResponse & response, const std::string & message)
|
||||
void processError(HTTPServerResponse & response, const String & message)
|
||||
{
|
||||
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
|
||||
|
||||
@ -41,7 +49,7 @@ namespace
|
||||
LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message));
|
||||
}
|
||||
|
||||
std::shared_ptr<Block> parseColumns(std::string && column_string)
|
||||
std::shared_ptr<Block> parseColumns(String && column_string)
|
||||
{
|
||||
auto sample_block = std::make_shared<Block>();
|
||||
auto names_and_types = NamesAndTypesList::parse(column_string);
|
||||
@ -59,10 +67,10 @@ namespace
|
||||
return ids;
|
||||
}
|
||||
|
||||
std::vector<std::string> parseNamesFromBinary(const std::string & names_string)
|
||||
std::vector<String> parseNamesFromBinary(const String & names_string)
|
||||
{
|
||||
ReadBufferFromString buf(names_string);
|
||||
std::vector<std::string> names;
|
||||
std::vector<String> names;
|
||||
readVectorBinary(names, buf);
|
||||
return names;
|
||||
}
|
||||
@ -79,13 +87,15 @@ static void writeData(Block data, OutputFormatPtr format)
|
||||
executor.execute();
|
||||
}
|
||||
|
||||
|
||||
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
{
|
||||
LOG_TRACE(log, "Request URI: {}", request.getURI());
|
||||
@ -97,7 +107,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
version = 0; /// assumed version for too old servers which do not send a version
|
||||
else
|
||||
{
|
||||
String version_str = params.get("version");
|
||||
const String & version_str = params.get("version");
|
||||
if (!tryParse(version, version_str))
|
||||
{
|
||||
processError(response, "Unable to parse 'version' string in request URL: '" + version_str + "' Check if the server and library-bridge have the same version.");
|
||||
@ -124,8 +134,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
return;
|
||||
}
|
||||
|
||||
std::string method = params.get("method");
|
||||
std::string dictionary_id = params.get("dictionary_id");
|
||||
const String & method = params.get("method");
|
||||
const String & dictionary_id = params.get("dictionary_id");
|
||||
|
||||
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
@ -141,7 +151,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
return;
|
||||
}
|
||||
|
||||
std::string from_dictionary_id = params.get("from_dictionary_id");
|
||||
const String & from_dictionary_id = params.get("from_dictionary_id");
|
||||
bool cloned = false;
|
||||
cloned = ExternalDictionaryLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
|
||||
|
||||
@ -166,7 +176,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
return;
|
||||
}
|
||||
|
||||
std::string library_path = params.get("library_path");
|
||||
const String & library_path = params.get("library_path");
|
||||
|
||||
if (!params.has("library_settings"))
|
||||
{
|
||||
@ -174,10 +184,10 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & settings_string = params.get("library_settings");
|
||||
const String & settings_string = params.get("library_settings");
|
||||
|
||||
LOG_DEBUG(log, "Parsing library settings from binary string");
|
||||
std::vector<std::string> library_settings = parseNamesFromBinary(settings_string);
|
||||
std::vector<String> library_settings = parseNamesFromBinary(settings_string);
|
||||
|
||||
/// Needed for library dictionary
|
||||
if (!params.has("attributes_names"))
|
||||
@ -186,10 +196,10 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & attributes_string = params.get("attributes_names");
|
||||
const String & attributes_string = params.get("attributes_names");
|
||||
|
||||
LOG_DEBUG(log, "Parsing attributes names from binary string");
|
||||
std::vector<std::string> attributes_names = parseNamesFromBinary(attributes_string);
|
||||
std::vector<String> attributes_names = parseNamesFromBinary(attributes_string);
|
||||
|
||||
/// Needed to parse block from binary string format
|
||||
if (!params.has("sample_block"))
|
||||
@ -197,7 +207,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
processError(response, "No 'sample_block' in request URL");
|
||||
return;
|
||||
}
|
||||
std::string sample_block_string = params.get("sample_block");
|
||||
String sample_block_string = params.get("sample_block");
|
||||
|
||||
std::shared_ptr<Block> sample_block;
|
||||
try
|
||||
@ -297,7 +307,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
return;
|
||||
}
|
||||
|
||||
std::string requested_block_string = params.get("requested_block_sample");
|
||||
String requested_block_string = params.get("requested_block_sample");
|
||||
|
||||
std::shared_ptr<Block> requested_sample_block;
|
||||
try
|
||||
@ -332,7 +342,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_WARNING(log, "Unknown library method: '{}'", method);
|
||||
processError(response, "Unknown library method '" + method + "'");
|
||||
LOG_ERROR(log, "Unknown library method: '{}'", method);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
@ -362,6 +373,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
@ -369,6 +381,7 @@ ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExi
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
{
|
||||
try
|
||||
@ -382,7 +395,7 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
|
||||
return;
|
||||
}
|
||||
|
||||
std::string dictionary_id = params.get("dictionary_id");
|
||||
const String & dictionary_id = params.get("dictionary_id");
|
||||
|
||||
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
|
||||
|
||||
@ -399,4 +412,230 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
|
||||
}
|
||||
|
||||
|
||||
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
|
||||
size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(&Poco::Logger::get("CatBoostLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
{
|
||||
LOG_TRACE(log, "Request URI: {}", request.getURI());
|
||||
HTMLForm params(getContext()->getSettingsRef(), request);
|
||||
|
||||
size_t version;
|
||||
|
||||
if (!params.has("version"))
|
||||
version = 0; /// assumed version for too old servers which do not send a version
|
||||
else
|
||||
{
|
||||
const String & version_str = params.get("version");
|
||||
if (!tryParse(version, version_str))
|
||||
{
|
||||
processError(response, "Unable to parse 'version' string in request URL: '" + version_str + "' Check if the server and library-bridge have the same version.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (version != LIBRARY_BRIDGE_PROTOCOL_VERSION)
|
||||
{
|
||||
/// backwards compatibility is considered unnecessary for now, just let the user know that the server and the bridge must be upgraded together
|
||||
processError(response, "Server and library-bridge have different versions: '" + std::to_string(version) + "' vs. '" + std::to_string(LIBRARY_BRIDGE_PROTOCOL_VERSION) + "'");
|
||||
return;
|
||||
}
|
||||
if (!params.has("method"))
|
||||
{
|
||||
processError(response, "No 'method' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
const String & method = params.get("method");
|
||||
|
||||
LOG_TRACE(log, "Library method: '{}'", method);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
|
||||
try
|
||||
{
|
||||
if (method == "catboost_list")
|
||||
{
|
||||
ExternalModelInfos model_infos = CatBoostLibraryHandlerFactory::instance().getModelInfos();
|
||||
|
||||
writeIntBinary(static_cast<UInt64>(model_infos.size()), out);
|
||||
|
||||
for (const auto & info : model_infos)
|
||||
{
|
||||
writeStringBinary(info.model_path, out);
|
||||
writeStringBinary(info.model_type, out);
|
||||
|
||||
UInt64 t = std::chrono::system_clock::to_time_t(info.loading_start_time);
|
||||
writeIntBinary(t, out);
|
||||
|
||||
t = info.loading_duration.count();
|
||||
writeIntBinary(t, out);
|
||||
|
||||
}
|
||||
}
|
||||
else if (method == "catboost_removeModel")
|
||||
{
|
||||
auto & read_buf = request.getStream();
|
||||
params.read(read_buf);
|
||||
|
||||
if (!params.has("model_path"))
|
||||
{
|
||||
processError(response, "No 'model_path' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
const String & model_path = params.get("model_path");
|
||||
|
||||
CatBoostLibraryHandlerFactory::instance().removeModel(model_path);
|
||||
|
||||
String res = "1";
|
||||
writeStringBinary(res, out);
|
||||
}
|
||||
else if (method == "catboost_removeAllModels")
|
||||
{
|
||||
CatBoostLibraryHandlerFactory::instance().removeAllModels();
|
||||
|
||||
String res = "1";
|
||||
writeStringBinary(res, out);
|
||||
}
|
||||
else if (method == "catboost_GetTreeCount")
|
||||
{
|
||||
auto & read_buf = request.getStream();
|
||||
params.read(read_buf);
|
||||
|
||||
if (!params.has("library_path"))
|
||||
{
|
||||
processError(response, "No 'library_path' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
const String & library_path = params.get("library_path");
|
||||
|
||||
if (!params.has("model_path"))
|
||||
{
|
||||
processError(response, "No 'model_path' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
const String & model_path = params.get("model_path");
|
||||
|
||||
auto catboost_handler = CatBoostLibraryHandlerFactory::instance().tryGetModel(model_path, library_path, /*create_if_not_found*/ true);
|
||||
size_t tree_count = catboost_handler->getTreeCount();
|
||||
writeIntBinary(tree_count, out);
|
||||
}
|
||||
else if (method == "catboost_libEvaluate")
|
||||
{
|
||||
auto & read_buf = request.getStream();
|
||||
params.read(read_buf);
|
||||
|
||||
if (!params.has("model_path"))
|
||||
{
|
||||
processError(response, "No 'model_path' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
const String & model_path = params.get("model_path");
|
||||
|
||||
if (!params.has("data"))
|
||||
{
|
||||
processError(response, "No 'data' in request URL");
|
||||
return;
|
||||
}
|
||||
|
||||
const String & data = params.get("data");
|
||||
|
||||
ReadBufferFromString string_read_buf(data);
|
||||
NativeReader deserializer(string_read_buf, /*server_revision*/ 0);
|
||||
Block block_read = deserializer.read();
|
||||
|
||||
Columns col_ptrs = block_read.getColumns();
|
||||
ColumnRawPtrs col_raw_ptrs;
|
||||
for (const auto & p : col_ptrs)
|
||||
col_raw_ptrs.push_back(&*p);
|
||||
|
||||
auto catboost_handler = CatBoostLibraryHandlerFactory::instance().tryGetModel(model_path, "DummyLibraryPath", /*create_if_not_found*/ false);
|
||||
|
||||
if (!catboost_handler)
|
||||
{
|
||||
processError(response, "CatBoost library is not loaded for model '" + model_path + "'. Please try again.");
|
||||
return;
|
||||
}
|
||||
|
||||
ColumnPtr res_col = catboost_handler->evaluate(col_raw_ptrs);
|
||||
|
||||
DataTypePtr res_col_type = std::make_shared<DataTypeFloat64>();
|
||||
String res_col_name = "res_col";
|
||||
|
||||
ColumnsWithTypeAndName res_cols_with_type_and_name = {{res_col, res_col_type, res_col_name}};
|
||||
|
||||
Block block_write(res_cols_with_type_and_name);
|
||||
NativeWriter serializer{out, /*client_revision*/ 0, block_write};
|
||||
serializer.write(block_write);
|
||||
}
|
||||
else
|
||||
{
|
||||
processError(response, "Unknown library method '" + method + "'");
|
||||
LOG_ERROR(log, "Unknown library method: '{}'", method);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
auto message = getCurrentExceptionMessage(true);
|
||||
LOG_ERROR(log, "Failed to process request. Error: {}", message);
|
||||
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR, message); // can't call process_error, because of too soon response sending
|
||||
try
|
||||
{
|
||||
writeStringBinary(message, out);
|
||||
out.finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
out.finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(&Poco::Logger::get("CatBoostLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
{
|
||||
try
|
||||
{
|
||||
LOG_TRACE(log, "Request URI: {}", request.getURI());
|
||||
HTMLForm params(getContext()->getSettingsRef(), request);
|
||||
|
||||
String res = "1";
|
||||
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
LOG_TRACE(log, "Sending ping response: {}", res);
|
||||
response.sendBuffer(res.data(), res.size());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("PingHandler");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,9 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Server/HTTP/HTTPRequestHandler.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include "ExternalDictionaryLibraryHandler.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -26,11 +25,12 @@ public:
|
||||
private:
|
||||
static constexpr inline auto FORMAT = "RowBinary";
|
||||
|
||||
const size_t keep_alive_timeout;
|
||||
Poco::Logger * log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
|
||||
// Handler for checking if the external dictionary library is loaded (used for handshake)
|
||||
class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
@ -43,4 +43,47 @@ private:
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
|
||||
/// Handler for requests to catboost library. The call protocol is as follows:
|
||||
/// (1) Send a "catboost_GetTreeCount" request from the server to the bridge. It contains a library path (e.g /home/user/libcatboost.so) and
|
||||
/// a model path (e.g. /home/user/model.bin). This loads the catboost library handler associated with the model path, then executes
|
||||
/// GetTreeCount() on the library handler and sends the result back to the server.
|
||||
/// (2) Send "catboost_Evaluate" from the server to the bridge. It contains a model path and the features to run the interference on. Step
|
||||
/// (2) is called multiple times (once per chunk) by the server.
|
||||
///
|
||||
/// We would ideally like to have steps (1) and (2) in one atomic handler but can't because the evaluation on the server side is divided
|
||||
/// into two dependent phases: FunctionCatBoostEvaluate::getReturnTypeImpl() and ::executeImpl(). So the model may in principle be unloaded
|
||||
/// from the library-bridge between steps (1) and (2). Step (2) checks if that is the case and fails gracefully. This is okay because that
|
||||
/// situation considered exceptional and rare.
|
||||
///
|
||||
/// An update of a model is performed by unloading it. The first call to "catboost_GetTreeCount" brings it into memory again.
|
||||
///
|
||||
/// Further handlers are provided for unloading a specific model, for unloading all models or for retrieving information about the loaded
|
||||
/// models for display in a system view.
|
||||
class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
|
||||
// Handler for pinging the library-bridge for catboost access (used for handshake)
|
||||
class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -51,7 +51,6 @@
|
||||
#include <Interpreters/DNSCacheUpdater.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/ExternalDictionariesLoader.h>
|
||||
#include <Interpreters/ExternalModelsLoader.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Interpreters/loadMetadata.h>
|
||||
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
|
||||
@ -1158,7 +1157,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
global_context->setExternalAuthenticatorsConfig(*config);
|
||||
|
||||
global_context->loadOrReloadDictionaries(*config);
|
||||
global_context->loadOrReloadModels(*config);
|
||||
global_context->loadOrReloadUserDefinedExecutableFunctions(*config);
|
||||
|
||||
global_context->setRemoteHostFilter(*config);
|
||||
@ -1739,17 +1737,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
throw;
|
||||
}
|
||||
|
||||
/// try to load models immediately, throw on error and die
|
||||
try
|
||||
{
|
||||
global_context->loadOrReloadModels(config());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "Caught exception while loading dictionaries.");
|
||||
throw;
|
||||
}
|
||||
|
||||
/// try to load user defined executable functions, throw on error and die
|
||||
try
|
||||
{
|
||||
|
@ -278,6 +278,71 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t) const final
|
||||
{
|
||||
std::unique_ptr<UInt8[]> final_null_flags = std::make_unique<UInt8[]>(row_end);
|
||||
const size_t filter_column_num = number_of_arguments - 1;
|
||||
|
||||
if (is_nullable[filter_column_num])
|
||||
{
|
||||
const ColumnNullable * nullable_column = assert_cast<const ColumnNullable *>(columns[filter_column_num]);
|
||||
const IColumn & filter_column = nullable_column->getNestedColumn();
|
||||
const UInt8 * filter_null_map = nullable_column->getNullMapColumn().getData().data();
|
||||
const UInt8 * filter_values = assert_cast<const ColumnUInt8 &>(filter_column).getData().data();
|
||||
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
final_null_flags[i] = (null_is_skipped && filter_null_map[i]) || !filter_values[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const IColumn * filter_column = columns[filter_column_num];
|
||||
const UInt8 * filter_values = assert_cast<const ColumnUInt8 *>(filter_column)->getData().data();
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
final_null_flags[i] = !filter_values[i];
|
||||
}
|
||||
|
||||
const IColumn * nested_columns[number_of_arguments];
|
||||
for (size_t arg = 0; arg < number_of_arguments; arg++)
|
||||
{
|
||||
if (is_nullable[arg])
|
||||
{
|
||||
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[arg]);
|
||||
if (null_is_skipped && (arg != filter_column_num))
|
||||
{
|
||||
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
|
||||
const UInt8 * col_null_map = nullmap_column.getData().data();
|
||||
for (size_t r = row_begin; r < row_end; r++)
|
||||
{
|
||||
final_null_flags[r] |= col_null_map[r];
|
||||
}
|
||||
}
|
||||
nested_columns[arg] = &nullable_col.getNestedColumn();
|
||||
}
|
||||
else
|
||||
nested_columns[arg] = columns[arg];
|
||||
}
|
||||
|
||||
bool at_least_one = false;
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (!final_null_flags[i])
|
||||
{
|
||||
at_least_one = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (at_least_one)
|
||||
{
|
||||
this->setFlag(place);
|
||||
this->nested_function->addBatchSinglePlaceNotNull(
|
||||
row_begin, row_end, this->nestedPlace(place), nested_columns, final_null_flags.get(), arena, -1);
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
|
||||
|
@ -414,6 +414,109 @@ public:
|
||||
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const final
|
||||
{
|
||||
/// We are going to merge all the flags into a single one to be able to call the nested batching functions
|
||||
std::vector<const UInt8 *> nullable_filters;
|
||||
const IColumn * nested_columns[number_of_arguments];
|
||||
|
||||
std::unique_ptr<UInt8[]> final_flags = nullptr;
|
||||
const UInt8 * final_flags_ptr = nullptr;
|
||||
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
final_flags = std::make_unique<UInt8[]>(row_end);
|
||||
final_flags_ptr = final_flags.get();
|
||||
|
||||
bool included_elements = 0;
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
final_flags[i] = !flags.data()[i];
|
||||
included_elements += !!flags.data()[i];
|
||||
}
|
||||
|
||||
if (included_elements == 0)
|
||||
return;
|
||||
if (included_elements != (row_end - row_begin))
|
||||
{
|
||||
nullable_filters.push_back(final_flags_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < number_of_arguments; ++i)
|
||||
{
|
||||
if (is_nullable[i])
|
||||
{
|
||||
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
|
||||
nested_columns[i] = &nullable_col.getNestedColumn();
|
||||
if constexpr (null_is_skipped)
|
||||
{
|
||||
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
|
||||
nullable_filters.push_back(nullmap_column.getData().data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nested_columns[i] = columns[i];
|
||||
}
|
||||
}
|
||||
|
||||
bool found_one = false;
|
||||
|
||||
chassert(nullable_filters.size() > 0); /// We work under the assumption that we reach this because one argument was NULL
|
||||
if (nullable_filters.size() == 1)
|
||||
{
|
||||
/// We can avoid making copies of the only filter but we still need to check that there is data to be added
|
||||
final_flags_ptr = nullable_filters[0];
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (!final_flags_ptr[i])
|
||||
{
|
||||
found_one = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!final_flags)
|
||||
{
|
||||
final_flags = std::make_unique<UInt8[]>(row_end);
|
||||
final_flags_ptr = final_flags.get();
|
||||
}
|
||||
|
||||
const size_t filter_start = nullable_filters[0] == final_flags_ptr ? 1 : 0;
|
||||
for (size_t filter = filter_start; filter < nullable_filters.size(); filter++)
|
||||
{
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
final_flags[i] |= nullable_filters[filter][i];
|
||||
}
|
||||
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (!final_flags_ptr[i])
|
||||
{
|
||||
found_one = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_one)
|
||||
return; // Nothing to do and nothing to mark
|
||||
|
||||
this->setFlag(place);
|
||||
this->nested_function->addBatchSinglePlaceNotNull(
|
||||
row_begin, row_end, this->nestedPlace(place), nested_columns, final_flags_ptr, arena, -1);
|
||||
}
|
||||
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
||||
|
@ -424,14 +424,30 @@ public:
|
||||
|
||||
alloc(new_size_degree);
|
||||
|
||||
for (size_t i = 0; i < m_size; ++i)
|
||||
if (m_size <= 1)
|
||||
{
|
||||
HashValue x = 0;
|
||||
DB::readIntBinary(x, rb);
|
||||
if (x == 0)
|
||||
has_zero = true;
|
||||
else
|
||||
reinsertImpl(x);
|
||||
for (size_t i = 0; i < m_size; ++i)
|
||||
{
|
||||
HashValue x = 0;
|
||||
DB::readIntBinary(x, rb);
|
||||
if (x == 0)
|
||||
has_zero = true;
|
||||
else
|
||||
reinsertImpl(x);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto hs = std::make_unique<HashValue[]>(m_size);
|
||||
rb.readStrict(reinterpret_cast<char *>(hs.get()), m_size * sizeof(HashValue));
|
||||
|
||||
for (size_t i = 0; i < m_size; ++i)
|
||||
{
|
||||
if (hs[i] == 0)
|
||||
has_zero = true;
|
||||
else
|
||||
reinsertImpl(hs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -458,11 +474,24 @@ public:
|
||||
resize(new_size_degree);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < rhs_size; ++i)
|
||||
if (rhs_size <= 1)
|
||||
{
|
||||
HashValue x = 0;
|
||||
DB::readIntBinary(x, rb);
|
||||
insertHash(x);
|
||||
for (size_t i = 0; i < rhs_size; ++i)
|
||||
{
|
||||
HashValue x = 0;
|
||||
DB::readIntBinary(x, rb);
|
||||
insertHash(x);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto hs = std::make_unique<HashValue[]>(rhs_size);
|
||||
rb.readStrict(reinterpret_cast<char *>(hs.get()), rhs_size * sizeof(HashValue));
|
||||
|
||||
for (size_t i = 0; i < rhs_size; ++i)
|
||||
{
|
||||
insertHash(hs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
194
src/BridgeHelper/CatBoostLibraryBridgeHelper.cpp
Normal file
194
src/BridgeHelper/CatBoostLibraryBridgeHelper.cpp
Normal file
@ -0,0 +1,194 @@
|
||||
#include "CatBoostLibraryBridgeHelper.h"
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Core/Block.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Formats/NativeReader.h>
|
||||
#include <Formats/NativeWriter.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
|
||||
#include <random>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
CatBoostLibraryBridgeHelper::CatBoostLibraryBridgeHelper(
|
||||
ContextPtr context_,
|
||||
std::optional<String> model_path_,
|
||||
std::optional<String> library_path_)
|
||||
: LibraryBridgeHelper(context_->getGlobalContext())
|
||||
, model_path(model_path_)
|
||||
, library_path(library_path_)
|
||||
{
|
||||
}
|
||||
|
||||
Poco::URI CatBoostLibraryBridgeHelper::getPingURI() const
|
||||
{
|
||||
auto uri = createBaseURI();
|
||||
uri.setPath(PING_HANDLER);
|
||||
return uri;
|
||||
}
|
||||
|
||||
Poco::URI CatBoostLibraryBridgeHelper::getMainURI() const
|
||||
{
|
||||
auto uri = createBaseURI();
|
||||
uri.setPath(MAIN_HANDLER);
|
||||
return uri;
|
||||
}
|
||||
|
||||
|
||||
Poco::URI CatBoostLibraryBridgeHelper::createRequestURI(const String & method) const
|
||||
{
|
||||
auto uri = getMainURI();
|
||||
uri.addQueryParameter("version", std::to_string(LIBRARY_BRIDGE_PROTOCOL_VERSION));
|
||||
uri.addQueryParameter("method", method);
|
||||
return uri;
|
||||
}
|
||||
|
||||
bool CatBoostLibraryBridgeHelper::bridgeHandShake()
|
||||
{
|
||||
String result;
|
||||
try
|
||||
{
|
||||
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials);
|
||||
readString(result, buf);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (result != "1")
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {}. Check that bridge and server have the same version.", result);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ExternalModelInfos CatBoostLibraryBridgeHelper::listModels()
|
||||
{
|
||||
startBridgeSync();
|
||||
|
||||
ReadWriteBufferFromHTTP buf(
|
||||
createRequestURI(CATBOOST_LIST_METHOD),
|
||||
Poco::Net::HTTPRequest::HTTP_POST,
|
||||
[](std::ostream &) {},
|
||||
http_timeouts, credentials);
|
||||
|
||||
ExternalModelInfos result;
|
||||
|
||||
UInt64 num_rows;
|
||||
readIntBinary(num_rows, buf);
|
||||
|
||||
for (UInt64 i = 0; i < num_rows; ++i)
|
||||
{
|
||||
ExternalModelInfo info;
|
||||
|
||||
readStringBinary(info.model_path, buf);
|
||||
readStringBinary(info.model_type, buf);
|
||||
|
||||
UInt64 t;
|
||||
readIntBinary(t, buf);
|
||||
info.loading_start_time = std::chrono::system_clock::from_time_t(t);
|
||||
|
||||
readIntBinary(t, buf);
|
||||
info.loading_duration = std::chrono::milliseconds(t);
|
||||
|
||||
result.push_back(info);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CatBoostLibraryBridgeHelper::removeModel()
|
||||
{
|
||||
startBridgeSync();
|
||||
|
||||
assert(model_path);
|
||||
|
||||
ReadWriteBufferFromHTTP buf(
|
||||
createRequestURI(CATBOOST_REMOVEMODEL_METHOD),
|
||||
Poco::Net::HTTPRequest::HTTP_POST,
|
||||
[this](std::ostream & os)
|
||||
{
|
||||
os << "model_path=" << escapeForFileName(*model_path);
|
||||
},
|
||||
http_timeouts, credentials);
|
||||
|
||||
String result;
|
||||
readStringBinary(result, buf);
|
||||
assert(result == "1");
|
||||
}
|
||||
|
||||
void CatBoostLibraryBridgeHelper::removeAllModels()
|
||||
{
|
||||
startBridgeSync();
|
||||
|
||||
ReadWriteBufferFromHTTP buf(
|
||||
createRequestURI(CATBOOST_REMOVEALLMODELS_METHOD),
|
||||
Poco::Net::HTTPRequest::HTTP_POST,
|
||||
[](std::ostream &){},
|
||||
http_timeouts, credentials);
|
||||
|
||||
String result;
|
||||
readStringBinary(result, buf);
|
||||
assert(result == "1");
|
||||
}
|
||||
|
||||
size_t CatBoostLibraryBridgeHelper::getTreeCount()
|
||||
{
|
||||
startBridgeSync();
|
||||
|
||||
assert(model_path && library_path);
|
||||
|
||||
ReadWriteBufferFromHTTP buf(
|
||||
createRequestURI(CATBOOST_GETTREECOUNT_METHOD),
|
||||
Poco::Net::HTTPRequest::HTTP_POST,
|
||||
[this](std::ostream & os)
|
||||
{
|
||||
os << "library_path=" << escapeForFileName(*library_path) << "&";
|
||||
os << "model_path=" << escapeForFileName(*model_path);
|
||||
},
|
||||
http_timeouts, credentials);
|
||||
|
||||
size_t result;
|
||||
readIntBinary(result, buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnPtr CatBoostLibraryBridgeHelper::evaluate(const ColumnsWithTypeAndName & columns)
|
||||
{
|
||||
startBridgeSync();
|
||||
|
||||
WriteBufferFromOwnString string_write_buf;
|
||||
Block block(columns);
|
||||
NativeWriter serializer(string_write_buf, /*client_revision*/ 0, block);
|
||||
serializer.write(block);
|
||||
|
||||
assert(model_path);
|
||||
|
||||
ReadWriteBufferFromHTTP buf(
|
||||
createRequestURI(CATBOOST_LIB_EVALUATE_METHOD),
|
||||
Poco::Net::HTTPRequest::HTTP_POST,
|
||||
[this, serialized = string_write_buf.str()](std::ostream & os)
|
||||
{
|
||||
os << "model_path=" << escapeForFileName(*model_path) << "&";
|
||||
os << "data=" << escapeForFileName(serialized);
|
||||
},
|
||||
http_timeouts, credentials);
|
||||
|
||||
NativeReader deserializer(buf, /*server_revision*/ 0);
|
||||
Block block_read = deserializer.read();
|
||||
|
||||
return block_read.getColumns()[0];
|
||||
}
|
||||
|
||||
}
|
53
src/BridgeHelper/CatBoostLibraryBridgeHelper.h
Normal file
53
src/BridgeHelper/CatBoostLibraryBridgeHelper.h
Normal file
@ -0,0 +1,53 @@
|
||||
#pragma once
|
||||
|
||||
#include <BridgeHelper/LibraryBridgeHelper.h>
|
||||
#include <Common/ExternalModelInfo.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <IO/ReadWriteBufferFromHTTP.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <optional>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CatBoostLibraryBridgeHelper : public LibraryBridgeHelper
|
||||
{
|
||||
public:
|
||||
static constexpr inline auto PING_HANDLER = "/catboost_ping";
|
||||
static constexpr inline auto MAIN_HANDLER = "/catboost_request";
|
||||
|
||||
explicit CatBoostLibraryBridgeHelper(
|
||||
ContextPtr context_,
|
||||
std::optional<String> model_path_ = std::nullopt,
|
||||
std::optional<String> library_path_ = std::nullopt);
|
||||
|
||||
ExternalModelInfos listModels();
|
||||
|
||||
void removeModel(); /// requires model_path
|
||||
void removeAllModels();
|
||||
|
||||
size_t getTreeCount(); /// requires model_path and library_path
|
||||
ColumnPtr evaluate(const ColumnsWithTypeAndName & columns); /// requires model_path
|
||||
|
||||
protected:
|
||||
Poco::URI getPingURI() const override;
|
||||
|
||||
Poco::URI getMainURI() const override;
|
||||
|
||||
bool bridgeHandShake() override;
|
||||
|
||||
private:
|
||||
static constexpr inline auto CATBOOST_LIST_METHOD = "catboost_list";
|
||||
static constexpr inline auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel";
|
||||
static constexpr inline auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels";
|
||||
static constexpr inline auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount";
|
||||
static constexpr inline auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate";
|
||||
|
||||
Poco::URI createRequestURI(const String & method) const;
|
||||
|
||||
const std::optional<String> model_path;
|
||||
const std::optional<String> library_path;
|
||||
};
|
||||
|
||||
}
|
@ -12,8 +12,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Common base class for XDBC and Library bridge helpers.
|
||||
/// Contains helper methods to check/start bridge sync.
|
||||
/// Base class for server-side bridge helpers, e.g. xdbc-bridge and library-bridge.
|
||||
/// Contains helper methods to check/start bridge sync
|
||||
class IBridgeHelper: protected WithContext
|
||||
{
|
||||
|
||||
|
@ -722,5 +722,3 @@ public:
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Key, typename Payload> constexpr size_t ArrayCache<Key, Payload>::min_chunk_size;
|
||||
|
@ -176,10 +176,10 @@ static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string
|
||||
|
||||
void tryLogCurrentException(const char * log_name, const std::string & start_of_message)
|
||||
{
|
||||
/// Under high memory pressure, any new allocation will definitelly lead
|
||||
/// to MEMORY_LIMIT_EXCEEDED exception.
|
||||
/// Under high memory pressure, new allocations throw a
|
||||
/// MEMORY_LIMIT_EXCEEDED exception.
|
||||
///
|
||||
/// And in this case the exception will not be logged, so let's block the
|
||||
/// In this case the exception will not be logged, so let's block the
|
||||
/// MemoryTracker until the exception will be logged.
|
||||
LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global);
|
||||
|
||||
@ -189,8 +189,8 @@ void tryLogCurrentException(const char * log_name, const std::string & start_of_
|
||||
|
||||
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message)
|
||||
{
|
||||
/// Under high memory pressure, any new allocation will definitelly lead
|
||||
/// to MEMORY_LIMIT_EXCEEDED exception.
|
||||
/// Under high memory pressure, new allocations throw a
|
||||
/// MEMORY_LIMIT_EXCEEDED exception.
|
||||
///
|
||||
/// And in this case the exception will not be logged, so let's block the
|
||||
/// MemoryTracker until the exception will be logged.
|
||||
|
20
src/Common/ExternalModelInfo.h
Normal file
20
src/Common/ExternalModelInfo.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Details about external machine learning model, used by clickhouse-server and clickhouse-library-bridge
|
||||
struct ExternalModelInfo
|
||||
{
|
||||
String model_path;
|
||||
String model_type;
|
||||
std::chrono::system_clock::time_point loading_start_time; /// serialized as std::time_t
|
||||
std::chrono::milliseconds loading_duration; /// serialized as UInt64
|
||||
};
|
||||
|
||||
using ExternalModelInfos = std::vector<ExternalModelInfo>;
|
||||
|
||||
}
|
@ -135,7 +135,7 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const
|
||||
struct kinfo_proc kp;
|
||||
size_t len = sizeof(struct kinfo_proc);
|
||||
|
||||
if (-1 == ::sysctl(mib, 4, &kp, &len, NULL, 0))
|
||||
if (-1 == ::sysctl(mib, 4, &kp, &len, nullptr, 0))
|
||||
throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
if (sizeof(struct kinfo_proc) != len)
|
||||
|
@ -130,16 +130,15 @@ void SpanHolder::finish() noexcept
|
||||
try
|
||||
{
|
||||
auto log = current_thread_trace_context.span_log.lock();
|
||||
if (!log)
|
||||
|
||||
/// The log might be disabled, check it before use
|
||||
if (log)
|
||||
{
|
||||
// The log might be disabled.
|
||||
return;
|
||||
this->finish_time_us
|
||||
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
log->add(OpenTelemetrySpanLogElement(*this));
|
||||
}
|
||||
|
||||
this->finish_time_us
|
||||
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
log->add(OpenTelemetrySpanLogElement(*this));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -189,6 +189,9 @@ KeeperConfigurationAndSettings::loadFromConfig(const Poco::Util::AbstractConfigu
|
||||
|
||||
ret->coordination_settings->loadFromConfig("keeper_server.coordination_settings", config);
|
||||
|
||||
if (ret->coordination_settings->quorum_reads)
|
||||
LOG_WARNING(&Poco::Logger::get("KeeperConfigurationAndSettings"), "Setting 'quorum_reads' is deprecated. Please use 'read_mode'");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,7 @@ struct Settings;
|
||||
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
|
||||
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(Milliseconds, leadership_expiry, 0, "How often will leader node check if it still has majority. Set it lower or equal to election_timeout_lower_bound_ms to have linearizable reads.", 0) \
|
||||
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
|
||||
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
|
||||
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
|
||||
@ -38,11 +39,12 @@ struct Settings;
|
||||
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
|
||||
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
|
||||
M(UInt64, max_requests_batch_size, 100, "Max size of batch in requests count before it will be sent to RAFT", 0) \
|
||||
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
|
||||
M(Bool, quorum_reads, false, "Deprecated - use read_mode. Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
|
||||
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
|
||||
M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
|
||||
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
|
||||
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0)
|
||||
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
|
||||
M(String, read_mode, "nonlinear", "How should reads be processed. Valid values: 'nonlinear', 'fastlinear', 'quorum'. 'nonlinear' is the fastest option because there are no consistency requirements", 0)
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Coordination/KeeperDispatcher.h>
|
||||
#include <libnuraft/async.hxx>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <future>
|
||||
@ -6,6 +7,8 @@
|
||||
#include <Poco/Path.h>
|
||||
#include <Common/hex.h>
|
||||
#include <filesystem>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
|
||||
@ -30,22 +33,83 @@ namespace ErrorCodes
|
||||
|
||||
KeeperDispatcher::KeeperDispatcher()
|
||||
: responses_queue(std::numeric_limits<size_t>::max())
|
||||
, read_requests_queue(std::numeric_limits<size_t>::max())
|
||||
, finalize_requests_queue(std::numeric_limits<size_t>::max())
|
||||
, configuration_and_settings(std::make_shared<KeeperConfigurationAndSettings>())
|
||||
, log(&Poco::Logger::get("KeeperDispatcher"))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/// ZooKeepers has 2 requirements:
|
||||
/// - writes need to be linearizable
|
||||
/// - all requests from single session need to be processed in the order of their arrival
|
||||
///
|
||||
/// Because of that, we cannot process read and write requests from SAME session at the same time.
|
||||
/// To be able to process read and write requests in parallel we need to make sure that only 1 type
|
||||
/// of request is being processed from a single session.
|
||||
/// Multiple types from different sessions can be processed at the same time.
|
||||
///
|
||||
/// We do some in-session housekeeping to make sure that the multithreaded request processing is correct.
|
||||
/// When a request is received from a client, we check if there are requests being processed from that same
|
||||
/// session, and if yes, of what type. If the types are the same, and there are no requests of different
|
||||
/// type inbetetween, we can instanly add it to active request queue. Otherwise, we need to wait until
|
||||
/// all requests of the other type are processed.
|
||||
///
|
||||
/// There are multiple threads used for processing the request, each of them communicating with a queue.
|
||||
/// Assumption: only one type of request is being processed from a same session at any point in time (read or write).
|
||||
///
|
||||
/// requestThread -> requests currently being processed
|
||||
/// readRequestThread -> thread for processing read requests
|
||||
/// finalizeRequestThread -> thread for finalizing requests:
|
||||
/// - in-session housekeeping, add requests to the active request queue if there are any
|
||||
///
|
||||
/// If reads are linearizable without quorum, a request can possibly wait for a certain log to be committed.
|
||||
/// In that case we add it to the waiting queue for that log.
|
||||
/// When that log is committed, the committing thread will send that read request to readRequestThread so it can be processed.
|
||||
///
|
||||
void KeeperDispatcher::requestThread()
|
||||
{
|
||||
setThreadName("KeeperReqT");
|
||||
|
||||
/// Result of requests batch from previous iteration
|
||||
RaftAppendResult prev_result = nullptr;
|
||||
/// Requests from previous iteration. We store them to be able
|
||||
/// to send errors to the client.
|
||||
KeeperStorage::RequestsForSessions prev_batch;
|
||||
RaftResult prev_result = nullptr;
|
||||
const auto previous_quorum_done = [&] { return !prev_result || prev_result->has_result() || prev_result->get_result_code() != nuraft::cmd_result_code::OK; };
|
||||
|
||||
const auto needs_quorum = [](const auto & coordination_settings, const auto & request)
|
||||
{
|
||||
return coordination_settings->quorum_reads || coordination_settings->read_mode.toString() == "quorum" || !request.request->isReadRequest();
|
||||
};
|
||||
|
||||
KeeperStorage::RequestsForSessions quorum_requests;
|
||||
KeeperStorage::RequestsForSessions read_requests;
|
||||
|
||||
auto process_quorum_requests = [&, this]() mutable
|
||||
{
|
||||
/// Forcefully process all previous pending requests
|
||||
if (prev_result)
|
||||
forceWaitAndProcessResult(prev_result);
|
||||
|
||||
prev_result = server->putRequestBatch(quorum_requests);
|
||||
|
||||
if (prev_result)
|
||||
{
|
||||
prev_result->when_ready([&, requests_for_sessions = std::move(quorum_requests)](nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & result, nuraft::ptr<std::exception> &) mutable
|
||||
{
|
||||
if (!result.get_accepted() || result.get_result_code() == nuraft::cmd_result_code::TIMEOUT)
|
||||
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
|
||||
else if (result.get_result_code() != nuraft::cmd_result_code::OK)
|
||||
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
|
||||
});
|
||||
}
|
||||
|
||||
quorum_requests.clear();
|
||||
};
|
||||
|
||||
/// ZooKeeper requires that the requests inside a single session are processed in a strict order
|
||||
/// (we cannot process later requests before all the previous once are processed)
|
||||
/// By making sure that at this point we can either have just read requests or just write requests
|
||||
/// from a single session, we can process them independently
|
||||
while (!shutdown_called)
|
||||
{
|
||||
KeeperStorage::RequestForSession request;
|
||||
@ -54,94 +118,67 @@ void KeeperDispatcher::requestThread()
|
||||
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
|
||||
uint64_t max_batch_size = coordination_settings->max_requests_batch_size;
|
||||
|
||||
/// The code below do a very simple thing: batch all write (quorum) requests into vector until
|
||||
/// previous write batch is not finished or max_batch size achieved. The main complexity goes from
|
||||
/// the ability to process read requests without quorum (from local state). So when we are collecting
|
||||
/// requests into a batch we must check that the new request is not read request. Otherwise we have to
|
||||
/// process all already accumulated write requests, wait them synchronously and only after that process
|
||||
/// read request. So reads are some kind of "separator" for writes.
|
||||
try
|
||||
{
|
||||
if (requests_queue->tryPop(request, max_wait))
|
||||
if (active_requests_queue->tryPop(request, max_wait))
|
||||
{
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
KeeperStorage::RequestsForSessions current_batch;
|
||||
if (needs_quorum(coordination_settings, request))
|
||||
quorum_requests.emplace_back(request);
|
||||
else
|
||||
read_requests.emplace_back(request);
|
||||
|
||||
bool has_read_request = false;
|
||||
|
||||
/// If new request is not read request or we must to process it through quorum.
|
||||
/// Otherwise we will process it locally.
|
||||
if (coordination_settings->quorum_reads || !request.request->isReadRequest())
|
||||
/// Waiting until previous append will be successful, or batch is big enough
|
||||
/// has_result == false && get_result_code == OK means that our request still not processed.
|
||||
/// Sometimes NuRaft set errorcode without setting result, so we check both here.
|
||||
while (true)
|
||||
{
|
||||
current_batch.emplace_back(request);
|
||||
if (quorum_requests.size() > max_batch_size)
|
||||
break;
|
||||
|
||||
/// Waiting until previous append will be successful, or batch is big enough
|
||||
/// has_result == false && get_result_code == OK means that our request still not processed.
|
||||
/// Sometimes NuRaft set errorcode without setting result, so we check both here.
|
||||
while (prev_result && (!prev_result->has_result() && prev_result->get_result_code() == nuraft::cmd_result_code::OK) && current_batch.size() <= max_batch_size)
|
||||
if (read_requests.size() > max_batch_size)
|
||||
{
|
||||
/// Trying to get batch requests as fast as possible
|
||||
if (requests_queue->tryPop(request, 1))
|
||||
{
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
|
||||
/// Don't append read request into batch, we have to process them separately
|
||||
if (!coordination_settings->quorum_reads && request.request->isReadRequest())
|
||||
{
|
||||
has_read_request = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
processReadRequests(coordination_settings, read_requests);
|
||||
|
||||
current_batch.emplace_back(request);
|
||||
}
|
||||
}
|
||||
|
||||
if (shutdown_called)
|
||||
if (previous_quorum_done())
|
||||
break;
|
||||
}
|
||||
|
||||
/// Trying to get batch requests as fast as possible
|
||||
if (active_requests_queue->tryPop(request, 1))
|
||||
{
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
|
||||
if (needs_quorum(coordination_settings, request))
|
||||
quorum_requests.emplace_back(request);
|
||||
else
|
||||
read_requests.emplace_back(request);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// batch of read requests can send at most one request
|
||||
/// so we don't care if the previous batch hasn't received response
|
||||
if (!read_requests.empty())
|
||||
processReadRequests(coordination_settings, read_requests);
|
||||
|
||||
/// if we still didn't process previous batch we can
|
||||
/// increase are current batch even more
|
||||
if (previous_quorum_done())
|
||||
break;
|
||||
}
|
||||
|
||||
if (shutdown_called)
|
||||
break;
|
||||
}
|
||||
else
|
||||
has_read_request = true;
|
||||
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
/// Forcefully process all previous pending requests
|
||||
if (prev_result)
|
||||
forceWaitAndProcessResult(prev_result, prev_batch);
|
||||
if (!quorum_requests.empty())
|
||||
process_quorum_requests();
|
||||
|
||||
/// Process collected write requests batch
|
||||
if (!current_batch.empty())
|
||||
{
|
||||
auto result = server->putRequestBatch(current_batch);
|
||||
|
||||
if (result)
|
||||
{
|
||||
if (has_read_request) /// If we will execute read request next, than we have to process result now
|
||||
forceWaitAndProcessResult(result, current_batch);
|
||||
}
|
||||
else
|
||||
{
|
||||
addErrorResponses(current_batch, Coordination::Error::ZCONNECTIONLOSS);
|
||||
current_batch.clear();
|
||||
}
|
||||
|
||||
prev_batch = std::move(current_batch);
|
||||
prev_result = result;
|
||||
}
|
||||
|
||||
/// Read request always goes after write batch (last request)
|
||||
if (has_read_request)
|
||||
{
|
||||
if (server->isLeaderAlive())
|
||||
server->putLocalReadRequest(request);
|
||||
else
|
||||
addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
@ -151,6 +188,72 @@ void KeeperDispatcher::requestThread()
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperDispatcher::processReadRequests(const CoordinationSettingsPtr & coordination_settings, KeeperStorage::RequestsForSessions & read_requests)
|
||||
{
|
||||
if (coordination_settings->read_mode.toString() == "fastlinear")
|
||||
{
|
||||
// we just want to know what's the current latest committed log on Leader node
|
||||
auto leader_info_result = server->getLeaderInfo();
|
||||
if (leader_info_result)
|
||||
{
|
||||
leader_info_result->when_ready([&, requests_for_sessions = std::move(read_requests)](nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & result, nuraft::ptr<std::exception> & exception) mutable
|
||||
{
|
||||
if (!result.get_accepted() || result.get_result_code() == nuraft::cmd_result_code::TIMEOUT)
|
||||
{
|
||||
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.get_result_code() != nuraft::cmd_result_code::OK)
|
||||
{
|
||||
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
|
||||
return;
|
||||
}
|
||||
|
||||
if (exception)
|
||||
{
|
||||
LOG_INFO(log, "Got exception while waiting for read results {}", exception->what());
|
||||
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
|
||||
return;
|
||||
}
|
||||
|
||||
auto & leader_info_ctx = result.get();
|
||||
|
||||
if (!leader_info_ctx)
|
||||
{
|
||||
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
|
||||
return;
|
||||
}
|
||||
|
||||
KeeperServer::NodeInfo leader_info;
|
||||
leader_info.term = leader_info_ctx->get_ulong();
|
||||
leader_info.last_committed_index = leader_info_ctx->get_ulong();
|
||||
std::lock_guard lock(leader_waiter_mutex);
|
||||
auto node_info = server->getNodeInfo();
|
||||
|
||||
/// we're behind, we need to wait
|
||||
if (node_info.term < leader_info.term || node_info.last_committed_index < leader_info.last_committed_index)
|
||||
{
|
||||
auto & leader_waiter = leader_waiters[leader_info];
|
||||
leader_waiter.insert(leader_waiter.end(), requests_for_sessions.begin(), requests_for_sessions.end());
|
||||
LOG_TRACE(log, "waiting for term {}, idx {}", leader_info.term, leader_info.last_committed_index);
|
||||
}
|
||||
/// process it in background thread
|
||||
else if (!read_requests_queue.push(std::move(requests_for_sessions)))
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
|
||||
});
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(coordination_settings->read_mode.toString() == "nonlinear");
|
||||
if (!read_requests_queue.push(std::move(read_requests)))
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
|
||||
}
|
||||
|
||||
read_requests.clear();
|
||||
}
|
||||
|
||||
void KeeperDispatcher::responseThread()
|
||||
{
|
||||
setThreadName("KeeperRspT");
|
||||
@ -200,6 +303,65 @@ void KeeperDispatcher::snapshotThread()
|
||||
}
|
||||
}
|
||||
|
||||
/// Background thread for processing read requests
|
||||
void KeeperDispatcher::readRequestThread()
|
||||
{
|
||||
setThreadName("KeeperReadT");
|
||||
while (!shutdown_called)
|
||||
{
|
||||
KeeperStorage::RequestsForSessions requests;
|
||||
if (!read_requests_queue.pop(requests))
|
||||
break;
|
||||
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
try
|
||||
{
|
||||
for (const auto & request_info : requests)
|
||||
{
|
||||
if (server->isLeaderAlive())
|
||||
server->putLocalReadRequest(request_info);
|
||||
else
|
||||
addErrorResponses({request_info}, Coordination::Error::ZCONNECTIONLOSS);
|
||||
}
|
||||
|
||||
if (!finalize_requests_queue.push(std::move(requests)))
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We finalize requests every time we commit a single log with request
|
||||
/// or process a batch of read requests.
|
||||
/// Because it can get heavy, we do it in background thread.
|
||||
void KeeperDispatcher::finalizeRequestsThread()
|
||||
{
|
||||
setThreadName("KeeperFinalT");
|
||||
while (!shutdown_called)
|
||||
{
|
||||
KeeperStorage::RequestsForSessions requests;
|
||||
if (!finalize_requests_queue.pop(requests))
|
||||
break;
|
||||
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
try
|
||||
{
|
||||
finalizeRequests(requests);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
@ -255,6 +417,30 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
|
||||
request_info.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
|
||||
request_info.session_id = session_id;
|
||||
|
||||
{
|
||||
std::lock_guard lock{unprocessed_request_mutex};
|
||||
auto unprocessed_requests_it = unprocessed_requests_for_session.find(session_id);
|
||||
if (unprocessed_requests_it == unprocessed_requests_for_session.end())
|
||||
{
|
||||
auto & unprocessed_requests = unprocessed_requests_for_session[session_id];
|
||||
unprocessed_requests.unprocessed_num = 1;
|
||||
unprocessed_requests.is_read = request->isReadRequest();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & unprocessed_requests = unprocessed_requests_it->second;
|
||||
|
||||
/// queue is not empty, or the request types don't match, put it in the waiting queue
|
||||
if (!unprocessed_requests.request_queue.empty() || unprocessed_requests.is_read != request->isReadRequest())
|
||||
{
|
||||
unprocessed_requests.request_queue.push_back(std::move(request_info));
|
||||
return true;
|
||||
}
|
||||
|
||||
++unprocessed_requests.unprocessed_num;
|
||||
}
|
||||
}
|
||||
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
|
||||
if (shutdown_called)
|
||||
@ -263,10 +449,10 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
|
||||
/// Put close requests without timeouts
|
||||
if (request->getOpNum() == Coordination::OpNum::Close)
|
||||
{
|
||||
if (!requests_queue->push(std::move(request_info)))
|
||||
if (!active_requests_queue->push(std::move(request_info)))
|
||||
throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR);
|
||||
}
|
||||
else if (!requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
|
||||
else if (!active_requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
|
||||
{
|
||||
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
}
|
||||
@ -279,13 +465,23 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
|
||||
LOG_DEBUG(log, "Initializing storage dispatcher");
|
||||
|
||||
configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
|
||||
requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_requests_batch_size);
|
||||
active_requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_requests_batch_size);
|
||||
|
||||
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
|
||||
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
|
||||
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
|
||||
read_request_thread = ThreadFromGlobalPool([this] { readRequestThread(); });
|
||||
finalize_requests_thread = ThreadFromGlobalPool([this] { finalizeRequestsThread(); });
|
||||
|
||||
server = std::make_unique<KeeperServer>(configuration_and_settings, config, responses_queue, snapshots_queue);
|
||||
server = std::make_unique<KeeperServer>(
|
||||
configuration_and_settings,
|
||||
config,
|
||||
responses_queue,
|
||||
snapshots_queue,
|
||||
[this](const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx)
|
||||
{ onRequestCommit(request_for_session, log_term, log_idx); },
|
||||
[this](uint64_t term, uint64_t last_idx)
|
||||
{ onApplySnapshot(term, last_idx); });
|
||||
|
||||
try
|
||||
{
|
||||
@ -333,9 +529,9 @@ void KeeperDispatcher::shutdown()
|
||||
if (session_cleaner_thread.joinable())
|
||||
session_cleaner_thread.join();
|
||||
|
||||
if (requests_queue)
|
||||
if (active_requests_queue)
|
||||
{
|
||||
requests_queue->finish();
|
||||
active_requests_queue->finish();
|
||||
|
||||
if (request_thread.joinable())
|
||||
request_thread.join();
|
||||
@ -349,6 +545,14 @@ void KeeperDispatcher::shutdown()
|
||||
if (snapshot_thread.joinable())
|
||||
snapshot_thread.join();
|
||||
|
||||
read_requests_queue.finish();
|
||||
if (read_request_thread.joinable())
|
||||
read_request_thread.join();
|
||||
|
||||
finalize_requests_queue.finish();
|
||||
if (finalize_requests_thread.joinable())
|
||||
finalize_requests_thread.join();
|
||||
|
||||
update_configuration_queue.finish();
|
||||
if (update_configuration_thread.joinable())
|
||||
update_configuration_thread.join();
|
||||
@ -357,7 +561,7 @@ void KeeperDispatcher::shutdown()
|
||||
KeeperStorage::RequestForSession request_for_session;
|
||||
|
||||
/// Set session expired for all pending requests
|
||||
while (requests_queue && requests_queue->tryPop(request_for_session))
|
||||
while (active_requests_queue && active_requests_queue->tryPop(request_for_session))
|
||||
{
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
|
||||
auto response = request_for_session.request->makeResponse();
|
||||
@ -474,7 +678,7 @@ void KeeperDispatcher::sessionCleanerTask()
|
||||
};
|
||||
{
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
if (!requests_queue->push(std::move(request_info)))
|
||||
if (!active_requests_queue->push(std::move(request_info)))
|
||||
LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions");
|
||||
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
|
||||
}
|
||||
@ -524,19 +728,12 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions)
|
||||
void KeeperDispatcher::forceWaitAndProcessResult(RaftResult & result)
|
||||
{
|
||||
if (!result->has_result())
|
||||
result->get();
|
||||
|
||||
/// If we get some errors, than send them to clients
|
||||
if (!result->get_accepted() || result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
|
||||
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
|
||||
else if (result->get_result_code() != nuraft::cmd_result_code::OK)
|
||||
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
|
||||
|
||||
result = nullptr;
|
||||
requests_for_sessions.clear();
|
||||
}
|
||||
|
||||
int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
|
||||
@ -584,7 +781,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
|
||||
/// Push new session request to queue
|
||||
{
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms))
|
||||
if (!active_requests_queue->tryPush(std::move(request_info), session_timeout_ms))
|
||||
throw Exception("Cannot push session id request to queue within session timeout", ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
|
||||
}
|
||||
@ -657,6 +854,122 @@ void KeeperDispatcher::updateConfigurationThread()
|
||||
}
|
||||
}
|
||||
|
||||
// Used to update the state for a session based on the requests
|
||||
// - update the number of current unprocessed requests for the session
|
||||
// - if the number of unprocessed requests is 0, we can start adding next type of requests
|
||||
// from unprocessed requests queue to the active queue
|
||||
void KeeperDispatcher::finalizeRequests(const KeeperStorage::RequestsForSessions & requests_for_sessions)
|
||||
{
|
||||
std::unordered_map<int64_t, size_t> counts_for_session;
|
||||
|
||||
for (const auto & request_for_session : requests_for_sessions)
|
||||
{
|
||||
++counts_for_session[request_for_session.session_id];
|
||||
}
|
||||
|
||||
std::lock_guard lock{unprocessed_request_mutex};
|
||||
for (const auto [session_id, count] : counts_for_session)
|
||||
{
|
||||
auto unprocessed_requests_it = unprocessed_requests_for_session.find(session_id);
|
||||
if (unprocessed_requests_it == unprocessed_requests_for_session.end())
|
||||
continue;
|
||||
|
||||
auto & unprocessed_requests = unprocessed_requests_it->second;
|
||||
unprocessed_requests.unprocessed_num -= count;
|
||||
|
||||
if (unprocessed_requests.unprocessed_num == 0)
|
||||
{
|
||||
if (!unprocessed_requests.request_queue.empty())
|
||||
{
|
||||
auto & unprocessed_requests_queue = unprocessed_requests.request_queue;
|
||||
unprocessed_requests.is_read = !unprocessed_requests.is_read;
|
||||
// start adding next type of requests
|
||||
while (!unprocessed_requests_queue.empty() && unprocessed_requests_queue.front().request->isReadRequest() == unprocessed_requests.is_read)
|
||||
{
|
||||
auto & front_request = unprocessed_requests_queue.front();
|
||||
|
||||
/// Put close requests without timeouts
|
||||
if (front_request.request->getOpNum() == Coordination::OpNum::Close)
|
||||
{
|
||||
if (!active_requests_queue->push(std::move(front_request)))
|
||||
throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR);
|
||||
}
|
||||
else if (!active_requests_queue->tryPush(std::move(front_request), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
|
||||
{
|
||||
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
}
|
||||
|
||||
++unprocessed_requests.unprocessed_num;
|
||||
unprocessed_requests_queue.pop_front();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unprocessed_requests_for_session.erase(unprocessed_requests_it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize request
|
||||
// Process read requests that were waiting for this commit
|
||||
void KeeperDispatcher::onRequestCommit(const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx)
|
||||
{
|
||||
if (!finalize_requests_queue.push({request_for_session}))
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
|
||||
|
||||
KeeperStorage::RequestsForSessions requests;
|
||||
{
|
||||
std::lock_guard lock(leader_waiter_mutex);
|
||||
auto request_queue_it = leader_waiters.find(KeeperServer::NodeInfo{.term = log_term, .last_committed_index = log_idx});
|
||||
if (request_queue_it != leader_waiters.end())
|
||||
{
|
||||
requests = std::move(request_queue_it->second);
|
||||
leader_waiters.erase(request_queue_it);
|
||||
}
|
||||
}
|
||||
|
||||
if (requests.empty())
|
||||
return;
|
||||
|
||||
if (!read_requests_queue.push(std::move(requests)))
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
|
||||
}
|
||||
|
||||
/// Process all read request that are waiting for lower or currently last processed log index
|
||||
void KeeperDispatcher::onApplySnapshot(uint64_t term, uint64_t last_idx)
|
||||
{
|
||||
KeeperServer::NodeInfo current_node_info{term, last_idx};
|
||||
KeeperStorage::RequestsForSessions requests;
|
||||
{
|
||||
std::lock_guard lock(leader_waiter_mutex);
|
||||
for (auto leader_waiter_it = leader_waiters.begin(); leader_waiter_it != leader_waiters.end();)
|
||||
{
|
||||
auto waiting_node_info = leader_waiter_it->first;
|
||||
if (waiting_node_info.term <= current_node_info.term
|
||||
&& waiting_node_info.last_committed_index <= current_node_info.last_committed_index)
|
||||
{
|
||||
for (auto & request : leader_waiter_it->second)
|
||||
{
|
||||
requests.push_back(std::move(request));
|
||||
}
|
||||
|
||||
leader_waiter_it = leader_waiters.erase(leader_waiter_it);
|
||||
}
|
||||
else
|
||||
{
|
||||
++leader_waiter_it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (requests.empty())
|
||||
return;
|
||||
|
||||
if (!read_requests_queue.push(std::move(requests)))
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
|
||||
}
|
||||
|
||||
bool KeeperDispatcher::isServerActive() const
|
||||
{
|
||||
return checkInit() && hasLeader() && !server->isRecovering();
|
||||
@ -721,7 +1034,7 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const
|
||||
Keeper4LWInfo result = server->getPartiallyFilled4LWInfo();
|
||||
{
|
||||
std::lock_guard lock(push_request_mutex);
|
||||
result.outstanding_requests_count = requests_queue->size();
|
||||
result.outstanding_requests_count = active_requests_queue->size();
|
||||
}
|
||||
{
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
|
@ -32,9 +32,12 @@ private:
|
||||
using UpdateConfigurationQueue = ConcurrentBoundedQueue<ConfigUpdateAction>;
|
||||
|
||||
/// Size depends on coordination settings
|
||||
std::unique_ptr<RequestsQueue> requests_queue;
|
||||
/// Request currently being processed
|
||||
std::unique_ptr<RequestsQueue> active_requests_queue;
|
||||
ResponsesQueue responses_queue;
|
||||
SnapshotsQueue snapshots_queue{1};
|
||||
ConcurrentBoundedQueue<KeeperStorage::RequestsForSessions> read_requests_queue;
|
||||
ConcurrentBoundedQueue<KeeperStorage::RequestsForSessions> finalize_requests_queue;
|
||||
|
||||
/// More than 1k updates is definitely misconfiguration.
|
||||
UpdateConfigurationQueue update_configuration_queue{1000};
|
||||
@ -64,6 +67,8 @@ private:
|
||||
ThreadFromGlobalPool snapshot_thread;
|
||||
/// Apply or wait for configuration changes
|
||||
ThreadFromGlobalPool update_configuration_thread;
|
||||
ThreadFromGlobalPool read_request_thread;
|
||||
ThreadFromGlobalPool finalize_requests_thread;
|
||||
|
||||
/// RAFT wrapper.
|
||||
std::unique_ptr<KeeperServer> server;
|
||||
@ -77,6 +82,34 @@ private:
|
||||
/// Counter for new session_id requests.
|
||||
std::atomic<int64_t> internal_session_id_counter{0};
|
||||
|
||||
/// A read request needs to have at least the log it was the last committed log on the leader
|
||||
/// at the time the request was being made.
|
||||
/// If the node is stale, we need to wait to commit that log before doing local read requests to achieve
|
||||
/// linearizability.
|
||||
std::unordered_map<KeeperServer::NodeInfo, KeeperStorage::RequestsForSessions> leader_waiters;
|
||||
std::mutex leader_waiter_mutex;
|
||||
|
||||
/// We can be actively processing one type of requests (either read or write) from a single session.
|
||||
/// If we receive a request of a type that is not currently being processed, we put it in the waiting queue.
|
||||
/// Also, we want to process them in ariving order, so if we have a different type in the queue, we cannot process that request
|
||||
/// but wait for all the previous requests to finish.
|
||||
/// E.g. READ -> WRITE -> READ, the last READ will go to the waiting queue even though we are currently processing the first READ
|
||||
/// because we have WRITE request before it that needs to be processed.
|
||||
struct UnprocessedRequests
|
||||
{
|
||||
/// how many requests are currently in the active request queue
|
||||
size_t unprocessed_num{0};
|
||||
/// is_read currently being processed
|
||||
bool is_read{false};
|
||||
std::list<KeeperStorage::RequestForSession> request_queue;
|
||||
};
|
||||
|
||||
// Called every time a batch of requests are processed.
|
||||
void finalizeRequests(const KeeperStorage::RequestsForSessions & requests_for_sessions);
|
||||
|
||||
std::unordered_map<int64_t, UnprocessedRequests> unprocessed_requests_for_session;
|
||||
std::mutex unprocessed_request_mutex;
|
||||
|
||||
/// Thread put requests to raft
|
||||
void requestThread();
|
||||
/// Thread put responses for subscribed sessions
|
||||
@ -88,6 +121,12 @@ private:
|
||||
/// Thread apply or wait configuration changes from leader
|
||||
void updateConfigurationThread();
|
||||
|
||||
void readRequestThread();
|
||||
|
||||
void finalizeRequestsThread();
|
||||
|
||||
void processReadRequests(const CoordinationSettingsPtr & coordination_settings, KeeperStorage::RequestsForSessions & read_requests);
|
||||
|
||||
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
|
||||
|
||||
/// Add error responses for requests to responses queue.
|
||||
@ -96,7 +135,7 @@ private:
|
||||
|
||||
/// Forcefully wait for result and sets errors if something when wrong.
|
||||
/// Clears both arguments
|
||||
void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions);
|
||||
static void forceWaitAndProcessResult(RaftResult & result);
|
||||
|
||||
public:
|
||||
/// Just allocate some objects, real initialization is done by `intialize method`
|
||||
@ -116,6 +155,12 @@ public:
|
||||
return server && server->checkInit();
|
||||
}
|
||||
|
||||
/// Called when a single log with request is committed.
|
||||
void onRequestCommit(const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx);
|
||||
|
||||
/// Called when a snapshot is applied
|
||||
void onApplySnapshot(uint64_t term, uint64_t last_idx);
|
||||
|
||||
/// Is server accepting requests, i.e. connected to the cluster
|
||||
/// and achieved quorum
|
||||
bool isServerActive() const;
|
||||
|
@ -105,7 +105,9 @@ KeeperServer::KeeperServer(
|
||||
const KeeperConfigurationAndSettingsPtr & configuration_and_settings_,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
ResponsesQueue & responses_queue_,
|
||||
SnapshotsQueue & snapshots_queue_)
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
KeeperStateMachine::CommitCallback commit_callback,
|
||||
KeeperStateMachine::ApplySnapshotCallback apply_snapshot_callback)
|
||||
: server_id(configuration_and_settings_->server_id)
|
||||
, coordination_settings(configuration_and_settings_->coordination_settings)
|
||||
, log(&Poco::Logger::get("KeeperServer"))
|
||||
@ -113,7 +115,7 @@ KeeperServer::KeeperServer(
|
||||
, keeper_context{std::make_shared<KeeperContext>()}
|
||||
, create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true))
|
||||
{
|
||||
if (coordination_settings->quorum_reads)
|
||||
if (coordination_settings->quorum_reads || coordination_settings->read_mode.toString() == "quorum")
|
||||
LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
|
||||
|
||||
keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false);
|
||||
@ -125,7 +127,9 @@ KeeperServer::KeeperServer(
|
||||
configuration_and_settings_->snapshot_storage_path,
|
||||
coordination_settings,
|
||||
keeper_context,
|
||||
checkAndGetSuperdigest(configuration_and_settings_->super_digest));
|
||||
checkAndGetSuperdigest(configuration_and_settings_->super_digest),
|
||||
std::move(commit_callback),
|
||||
std::move(apply_snapshot_callback));
|
||||
|
||||
state_manager = nuraft::cs_new<KeeperStateManager>(
|
||||
server_id,
|
||||
@ -176,6 +180,13 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
|
||||
reconfigure(new_config);
|
||||
}
|
||||
|
||||
RaftResult getLeaderInfo()
|
||||
{
|
||||
nuraft::ptr<nuraft::req_msg> req
|
||||
= nuraft::cs_new<nuraft::req_msg>(0ull, nuraft::msg_type::leader_status_request, 0, 0, 0ull, 0ull, 0ull);
|
||||
return send_msg_to_leader(req);
|
||||
}
|
||||
|
||||
void commit_in_bg() override
|
||||
{
|
||||
// For NuRaft, if any commit fails (uncaught exception) the whole server aborts as a safety
|
||||
@ -269,6 +280,20 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
|
||||
coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(), "election_timeout_lower_bound_ms", log);
|
||||
params.election_timeout_upper_bound_ = getValueOrMaxInt32AndLogWarning(
|
||||
coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(), "election_timeout_upper_bound_ms", log);
|
||||
|
||||
params.leadership_expiry_ = getValueOrMaxInt32AndLogWarning(coordination_settings->leadership_expiry.totalMilliseconds(), "leadership_expiry", log);
|
||||
|
||||
if (coordination_settings->read_mode.toString() == "fastlinear")
|
||||
{
|
||||
if (params.leadership_expiry_ == 0)
|
||||
params.leadership_expiry_ = params.election_timeout_lower_bound_;
|
||||
else if (params.leadership_expiry_ > params.election_timeout_lower_bound_)
|
||||
{
|
||||
LOG_WARNING(log, "To use fast linearizable reads, leadership_expiry should be set to a value that is less or equal to the election_timeout_upper_bound_ms. "
|
||||
"Based on current settings, there are no guarantees for linearizability of reads.");
|
||||
}
|
||||
}
|
||||
|
||||
params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log);
|
||||
params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log);
|
||||
|
||||
@ -487,7 +512,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession &
|
||||
state_machine->processReadRequest(request_for_session);
|
||||
}
|
||||
|
||||
RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
|
||||
RaftResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
|
||||
{
|
||||
std::vector<nuraft::ptr<nuraft::buffer>> entries;
|
||||
for (const auto & request_for_session : requests_for_sessions)
|
||||
@ -713,6 +738,20 @@ std::vector<int64_t> KeeperServer::getDeadSessions()
|
||||
return state_machine->getDeadSessions();
|
||||
}
|
||||
|
||||
RaftResult KeeperServer::getLeaderInfo()
|
||||
{
|
||||
std::lock_guard lock{server_write_mutex};
|
||||
if (is_recovering)
|
||||
return nullptr;
|
||||
|
||||
return raft_instance->getLeaderInfo();
|
||||
}
|
||||
|
||||
KeeperServer::NodeInfo KeeperServer::getNodeInfo()
|
||||
{
|
||||
return { .term = raft_instance->get_term(), .last_committed_index = state_machine->last_commit_index() };
|
||||
}
|
||||
|
||||
ConfigUpdateActions KeeperServer::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto diff = state_manager->getConfigurationDiff(config);
|
||||
|
@ -14,7 +14,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using RaftAppendResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>;
|
||||
using RaftResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>;
|
||||
|
||||
class KeeperServer
|
||||
{
|
||||
@ -71,7 +71,9 @@ public:
|
||||
const KeeperConfigurationAndSettingsPtr & settings_,
|
||||
const Poco::Util::AbstractConfiguration & config_,
|
||||
ResponsesQueue & responses_queue_,
|
||||
SnapshotsQueue & snapshots_queue_);
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
KeeperStateMachine::CommitCallback commit_callback,
|
||||
KeeperStateMachine::ApplySnapshotCallback apply_snapshot_callback);
|
||||
|
||||
/// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings.
|
||||
void startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6 = true);
|
||||
@ -84,7 +86,7 @@ public:
|
||||
|
||||
/// Put batch of requests into Raft and get result of put. Responses will be set separately into
|
||||
/// responses_queue.
|
||||
RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
|
||||
RaftResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
|
||||
|
||||
/// Return set of the non-active sessions
|
||||
std::vector<int64_t> getDeadSessions();
|
||||
@ -119,6 +121,17 @@ public:
|
||||
|
||||
int getServerID() const { return server_id; }
|
||||
|
||||
struct NodeInfo
|
||||
{
|
||||
uint64_t term;
|
||||
uint64_t last_committed_index;
|
||||
|
||||
bool operator==(const NodeInfo &) const = default;
|
||||
};
|
||||
|
||||
RaftResult getLeaderInfo();
|
||||
NodeInfo getNodeInfo();
|
||||
|
||||
/// Get configuration diff between current configuration in RAFT and in XML file
|
||||
ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
@ -126,10 +139,23 @@ public:
|
||||
/// Synchronously check for update results with retries.
|
||||
void applyConfigurationUpdate(const ConfigUpdateAction & task);
|
||||
|
||||
|
||||
/// Wait configuration update for action. Used by followers.
|
||||
/// Return true if update was successfully received.
|
||||
bool waitConfigurationUpdate(const ConfigUpdateAction & task);
|
||||
};
|
||||
|
||||
}
|
||||
namespace std
|
||||
{
|
||||
template <>
|
||||
struct hash<DB::KeeperServer::NodeInfo>
|
||||
{
|
||||
size_t operator()(const DB::KeeperServer::NodeInfo & info) const
|
||||
{
|
||||
SipHash hash_state;
|
||||
hash_state.update(info.term);
|
||||
hash_state.update(info.last_committed_index);
|
||||
return hash_state.get64();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -44,7 +44,9 @@ KeeperStateMachine::KeeperStateMachine(
|
||||
const std::string & snapshots_path_,
|
||||
const CoordinationSettingsPtr & coordination_settings_,
|
||||
const KeeperContextPtr & keeper_context_,
|
||||
const std::string & superdigest_)
|
||||
const std::string & superdigest_,
|
||||
CommitCallback commit_callback_,
|
||||
ApplySnapshotCallback apply_snapshot_callback_)
|
||||
: coordination_settings(coordination_settings_)
|
||||
, snapshot_manager(
|
||||
snapshots_path_,
|
||||
@ -58,6 +60,8 @@ KeeperStateMachine::KeeperStateMachine(
|
||||
, last_committed_idx(0)
|
||||
, log(&Poco::Logger::get("KeeperStateMachine"))
|
||||
, superdigest(superdigest_)
|
||||
, commit_callback(std::move(commit_callback_))
|
||||
, apply_snapshot_callback(std::move(apply_snapshot_callback_))
|
||||
, keeper_context(keeper_context_)
|
||||
{
|
||||
}
|
||||
@ -223,11 +227,11 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
|
||||
return true;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data)
|
||||
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit_ext(const ext_op_params & params)
|
||||
{
|
||||
auto request_for_session = parseRequest(data);
|
||||
auto request_for_session = parseRequest(*params.data);
|
||||
if (!request_for_session.zxid)
|
||||
request_for_session.zxid = log_idx;
|
||||
request_for_session.zxid = params.log_idx;
|
||||
|
||||
/// Special processing of session_id request
|
||||
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
|
||||
@ -272,8 +276,9 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
|
||||
assertDigest(*request_for_session.digest, storage->getNodesDigest(true), *request_for_session.request, true);
|
||||
}
|
||||
|
||||
last_committed_idx = params.log_idx;
|
||||
commit_callback(request_for_session, params.log_term, params.log_idx);
|
||||
ProfileEvents::increment(ProfileEvents::KeeperCommits);
|
||||
last_committed_idx = log_idx;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -306,6 +311,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::KeeperSnapshotApplys);
|
||||
last_committed_idx = s.get_last_log_idx();
|
||||
apply_snapshot_callback(s.get_last_log_term(), s.get_last_log_idx());
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -320,6 +326,10 @@ void KeeperStateMachine::commit_config(const uint64_t /* log_idx */, nuraft::ptr
|
||||
void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)
|
||||
{
|
||||
auto request_for_session = parseRequest(data);
|
||||
|
||||
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
|
||||
return;
|
||||
|
||||
// If we received a log from an older node, use the log_idx as the zxid
|
||||
// log_idx will always be larger or equal to the zxid so we can safely do this
|
||||
// (log_idx is increased for all logs, while zxid is only increased for requests)
|
||||
|
@ -20,13 +20,18 @@ using SnapshotsQueue = ConcurrentBoundedQueue<CreateSnapshotTask>;
|
||||
class KeeperStateMachine : public nuraft::state_machine
|
||||
{
|
||||
public:
|
||||
using CommitCallback = std::function<void(const KeeperStorage::RequestForSession &, uint64_t, uint64_t)>;
|
||||
using ApplySnapshotCallback = std::function<void(uint64_t, uint64_t)>;
|
||||
|
||||
KeeperStateMachine(
|
||||
ResponsesQueue & responses_queue_,
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
const std::string & snapshots_path_,
|
||||
const CoordinationSettingsPtr & coordination_settings_,
|
||||
const KeeperContextPtr & keeper_context_,
|
||||
const std::string & superdigest_ = "");
|
||||
const std::string & superdigest_ = "",
|
||||
CommitCallback commit_callback_ = [](const KeeperStorage::RequestForSession &, uint64_t, uint64_t){},
|
||||
ApplySnapshotCallback apply_snapshot_callback_ = [](uint64_t, uint64_t){});
|
||||
|
||||
/// Read state from the latest snapshot
|
||||
void init();
|
||||
@ -37,7 +42,7 @@ public:
|
||||
|
||||
nuraft::ptr<nuraft::buffer> pre_commit(uint64_t log_idx, nuraft::buffer & data) override;
|
||||
|
||||
nuraft::ptr<nuraft::buffer> commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT
|
||||
nuraft::ptr<nuraft::buffer> commit_ext(const ext_op_params & params) override; /// NOLINT
|
||||
|
||||
/// Save new cluster config to our snapshot (copy of the config stored in StateManager)
|
||||
void commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf) override; /// NOLINT
|
||||
@ -145,6 +150,11 @@ private:
|
||||
/// Special part of ACL system -- superdigest specified in server config.
|
||||
const std::string superdigest;
|
||||
|
||||
/// call when a request is committed
|
||||
const CommitCallback commit_callback;
|
||||
/// call when snapshot is applied
|
||||
const ApplySnapshotCallback apply_snapshot_callback;
|
||||
|
||||
KeeperContextPtr keeper_context;
|
||||
};
|
||||
|
||||
|
@ -1330,8 +1330,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
|
||||
changelog.append(entry);
|
||||
changelog.end_of_append_batch(0, 0);
|
||||
|
||||
state_machine->pre_commit(i, changelog.entry_at(i)->get_buf());
|
||||
state_machine->commit(i, changelog.entry_at(i)->get_buf());
|
||||
auto entry_buf = changelog.entry_at(i)->get_buf_ptr();
|
||||
state_machine->pre_commit(i, *entry_buf);
|
||||
state_machine->commit_ext(nuraft::state_machine::ext_op_params{i, entry_buf});
|
||||
bool snapshot_created = false;
|
||||
if (i % settings->snapshot_distance == 0)
|
||||
{
|
||||
@ -1375,8 +1376,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
|
||||
|
||||
for (size_t i = restore_machine->last_commit_index() + 1; i < restore_changelog.next_slot(); ++i)
|
||||
{
|
||||
restore_machine->pre_commit(i, changelog.entry_at(i)->get_buf());
|
||||
restore_machine->commit(i, changelog.entry_at(i)->get_buf());
|
||||
auto entry = changelog.entry_at(i)->get_buf_ptr();
|
||||
restore_machine->pre_commit(i, *entry);
|
||||
restore_machine->commit_ext(nuraft::state_machine::ext_op_params{i, entry});
|
||||
}
|
||||
|
||||
auto & source_storage = state_machine->getStorage();
|
||||
@ -1477,18 +1479,18 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove)
|
||||
std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();
|
||||
request_c->path = "/hello";
|
||||
request_c->is_ephemeral = true;
|
||||
auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c);
|
||||
state_machine->pre_commit(1, entry_c->get_buf());
|
||||
state_machine->commit(1, entry_c->get_buf());
|
||||
auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c)->get_buf_ptr();
|
||||
state_machine->pre_commit(1, *entry_c);
|
||||
state_machine->commit_ext(nuraft::state_machine::ext_op_params{1, entry_c});
|
||||
const auto & storage = state_machine->getStorage();
|
||||
|
||||
EXPECT_EQ(storage.ephemerals.size(), 1);
|
||||
std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
|
||||
request_d->path = "/hello";
|
||||
/// Delete from other session
|
||||
auto entry_d = getLogEntryFromZKRequest(0, 2, state_machine->getNextZxid(), request_d);
|
||||
state_machine->pre_commit(2, entry_d->get_buf());
|
||||
state_machine->commit(2, entry_d->get_buf());
|
||||
auto entry_d = getLogEntryFromZKRequest(0, 2, state_machine->getNextZxid(), request_d)->get_buf_ptr();
|
||||
state_machine->pre_commit(2, *entry_d);
|
||||
state_machine->commit_ext(nuraft::state_machine::ext_op_params{2, entry_d});
|
||||
|
||||
EXPECT_EQ(storage.ephemerals.size(), 0);
|
||||
}
|
||||
|
@ -481,7 +481,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
|
||||
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
|
||||
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
|
||||
M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \
|
||||
M(Bool, optimize_monotonous_functions_in_order_by, false, "Replace monotonous function with its argument in ORDER BY", 0) \
|
||||
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
|
||||
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
|
||||
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
|
||||
@ -527,7 +527,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, describe_extend_object_types, false, "Deduce concrete type of columns of type Object in DESCRIBE query", 0) \
|
||||
M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
|
||||
\
|
||||
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||
M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
|
||||
\
|
||||
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
namespace std // NOLINT(cert-dcl58-cpp)
|
||||
{
|
||||
using namespace experimental::coroutines_v1;
|
||||
using namespace experimental::coroutines_v1; // NOLINT(cert-dcl58-cpp)
|
||||
}
|
||||
|
||||
#if __has_warning("-Wdeprecated-experimental-coroutine")
|
||||
|
@ -143,9 +143,11 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
|
||||
}
|
||||
|
||||
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
|
||||
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(size_t offset) const
|
||||
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment) const
|
||||
{
|
||||
auto path = cache->getPathInLocalCache(cache_key, offset, is_persistent);
|
||||
/// Use is_persistent flag from in-memory state of the filesegment,
|
||||
/// because it is consistent with what is written on disk.
|
||||
auto path = file_segment.getPathInLocalCache();
|
||||
|
||||
ReadSettings local_read_settings{settings};
|
||||
/// Do not allow to use asynchronous version of LocalFSReadMethod.
|
||||
@ -206,7 +208,7 @@ CachedOnDiskReadBufferFromFile::getRemoteFSReadBuffer(FileSegment & file_segment
|
||||
return remote_file_reader;
|
||||
|
||||
auto remote_fs_segment_reader = file_segment.extractRemoteFileReader();
|
||||
if (remote_fs_segment_reader && file_offset_of_buffer_end == remote_file_reader->getFileOffsetOfBufferEnd())
|
||||
if (remote_fs_segment_reader && file_offset_of_buffer_end == remote_fs_segment_reader->getFileOffsetOfBufferEnd())
|
||||
remote_file_reader = remote_fs_segment_reader;
|
||||
else
|
||||
remote_file_reader = implementation_buffer_creator();
|
||||
@ -237,8 +239,6 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co
|
||||
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
|
||||
CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & file_segment)
|
||||
{
|
||||
auto range = file_segment->range();
|
||||
|
||||
auto download_state = file_segment->state();
|
||||
LOG_TEST(log, "getReadBufferForFileSegment: {}", file_segment->getInfoForLog());
|
||||
|
||||
@ -247,7 +247,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
if (download_state == FileSegment::State::DOWNLOADED)
|
||||
{
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(range.left);
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -280,7 +280,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
/// file_offset_of_buffer_end
|
||||
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(range.left);
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
}
|
||||
|
||||
download_state = file_segment->wait();
|
||||
@ -289,7 +289,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
case FileSegment::State::DOWNLOADED:
|
||||
{
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(range.left);
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
}
|
||||
case FileSegment::State::EMPTY:
|
||||
case FileSegment::State::PARTIALLY_DOWNLOADED:
|
||||
@ -305,7 +305,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
/// file_offset_of_buffer_end
|
||||
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(range.left);
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
}
|
||||
|
||||
auto downloader_id = file_segment->getOrSetDownloader();
|
||||
@ -323,7 +323,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
|
||||
read_type = ReadType::CACHED;
|
||||
file_segment->resetDownloader();
|
||||
return getCacheReadBuffer(range.left);
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
}
|
||||
|
||||
if (file_segment->getCurrentWriteOffset() < file_offset_of_buffer_end)
|
||||
@ -339,7 +339,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
LOG_TEST(log, "Predownload. File segment info: {}", file_segment->getInfoForLog());
|
||||
chassert(file_offset_of_buffer_end > file_segment->getCurrentWriteOffset());
|
||||
bytes_to_predownload = file_offset_of_buffer_end - file_segment->getCurrentWriteOffset();
|
||||
chassert(bytes_to_predownload < range.size());
|
||||
chassert(bytes_to_predownload < file_segment->range().size());
|
||||
}
|
||||
|
||||
read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
|
||||
@ -354,7 +354,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
|
||||
{
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(range.left);
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -68,7 +68,7 @@ private:
|
||||
|
||||
ImplementationBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment);
|
||||
|
||||
ImplementationBufferPtr getCacheReadBuffer(size_t offset) const;
|
||||
ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const;
|
||||
|
||||
std::optional<size_t> getLastNonDownloadedOffset() const;
|
||||
|
||||
|
@ -13,7 +13,6 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_FORMAT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
|
||||
@ -131,9 +130,6 @@ DiskObjectStorageMetadata::DiskObjectStorageMetadata(
|
||||
|
||||
void DiskObjectStorageMetadata::addObject(const String & path, size_t size)
|
||||
{
|
||||
if (!object_storage_root_path.empty() && path.starts_with(object_storage_root_path))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected relative path");
|
||||
|
||||
total_size += size;
|
||||
storage_objects.emplace_back(path, size);
|
||||
}
|
||||
|
@ -530,6 +530,7 @@ String FormatFactory::getFormatFromFileDescriptor(int fd)
|
||||
return getFormatFromFileName(file_path, false);
|
||||
return "";
|
||||
#else
|
||||
(void)fd;
|
||||
return "";
|
||||
#endif
|
||||
}
|
||||
|
@ -1,18 +1,18 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include <base/range.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExternalModelsLoader.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <BridgeHelper/CatBoostLibraryBridgeHelper.h>
|
||||
#include <BridgeHelper/IBridgeHelper.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
@ -21,66 +21,80 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
class ExternalModelsLoader;
|
||||
|
||||
|
||||
/// Evaluate external model.
|
||||
/// First argument - model name, the others - model arguments.
|
||||
/// * for CatBoost model - float features first, then categorical
|
||||
/// Result - Float64.
|
||||
class FunctionModelEvaluate final : public IFunction
|
||||
/// Evaluate CatBoost model.
|
||||
/// - Arguments: float features first, then categorical features.
|
||||
/// - Result: Float64.
|
||||
class FunctionCatBoostEvaluate final : public IFunction, WithContext
|
||||
{
|
||||
private:
|
||||
mutable std::unique_ptr<CatBoostLibraryBridgeHelper> bridge_helper;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "modelEvaluate";
|
||||
static constexpr auto name = "catboostEvaluate";
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionModelEvaluate>(context->getExternalModelsLoader());
|
||||
}
|
||||
|
||||
explicit FunctionModelEvaluate(const ExternalModelsLoader & models_loader_)
|
||||
: models_loader(models_loader_) {}
|
||||
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionCatBoostEvaluate>(context_); }
|
||||
|
||||
explicit FunctionCatBoostEvaluate(ContextPtr context_) : WithContext(context_) {}
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
void initBridge(const ColumnConst * name_col) const
|
||||
{
|
||||
String library_path = getContext()->getConfigRef().getString("catboost_lib_path");
|
||||
if (!std::filesystem::exists(library_path))
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can't load library {}: file doesn't exist", library_path);
|
||||
|
||||
String model_path = name_col->getValue<String>();
|
||||
if (!std::filesystem::exists(model_path))
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can't load model {}: file doesn't exist", model_path);
|
||||
|
||||
bridge_helper = std::make_unique<CatBoostLibraryBridgeHelper>(getContext(), model_path, library_path);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeFromLibraryBridge() const
|
||||
{
|
||||
size_t tree_count = bridge_helper->getTreeCount();
|
||||
auto type = std::make_shared<DataTypeFloat64>();
|
||||
if (tree_count == 1)
|
||||
return type;
|
||||
|
||||
DataTypes types(tree_count, type);
|
||||
|
||||
return std::make_shared<DataTypeTuple>(types);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception("Function " + getName() + " expects at least 2 arguments",
|
||||
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION);
|
||||
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 2 arguments", getName());
|
||||
|
||||
if (!isString(arguments[0].type))
|
||||
throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
|
||||
+ ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of first argument of function {}, expected a string.", arguments[0].type->getName(), getName());
|
||||
|
||||
const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
|
||||
if (!name_col)
|
||||
throw Exception("First argument of function " + getName() + " must be a constant string",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be a constant string", getName());
|
||||
|
||||
initBridge(name_col);
|
||||
|
||||
auto type = getReturnTypeFromLibraryBridge();
|
||||
|
||||
bool has_nullable = false;
|
||||
for (size_t i = 1; i < arguments.size(); ++i)
|
||||
has_nullable = has_nullable || arguments[i].type->isNullable();
|
||||
|
||||
auto model = models_loader.getModel(name_col->getValue<String>());
|
||||
auto type = model->getReturnType();
|
||||
|
||||
if (has_nullable)
|
||||
{
|
||||
if (const auto * tuple = typeid_cast<const DataTypeTuple *>(type.get()))
|
||||
@ -98,31 +112,25 @@ public:
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
{
|
||||
const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
|
||||
if (!name_col)
|
||||
throw Exception("First argument of function " + getName() + " must be a constant string",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
auto model = models_loader.getModel(name_col->getValue<String>());
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be a constant string", getName());
|
||||
|
||||
ColumnRawPtrs column_ptrs;
|
||||
Columns materialized_columns;
|
||||
ColumnPtr null_map;
|
||||
|
||||
column_ptrs.reserve(arguments.size());
|
||||
for (auto arg : collections::range(1, arguments.size()))
|
||||
ColumnsWithTypeAndName feature_arguments(arguments.begin() + 1, arguments.end());
|
||||
for (auto & arg : feature_arguments)
|
||||
{
|
||||
const auto & column = arguments[arg].column;
|
||||
column_ptrs.push_back(column.get());
|
||||
if (auto full_column = column->convertToFullColumnIfConst())
|
||||
if (auto full_column = arg.column->convertToFullColumnIfConst())
|
||||
{
|
||||
materialized_columns.push_back(full_column);
|
||||
column_ptrs.back() = full_column.get();
|
||||
arg.column = full_column;
|
||||
}
|
||||
if (const auto * col_nullable = checkAndGetColumn<ColumnNullable>(*column_ptrs.back()))
|
||||
if (const auto * col_nullable = checkAndGetColumn<ColumnNullable>(&*arg.column))
|
||||
{
|
||||
if (!null_map)
|
||||
null_map = col_nullable->getNullMapColumnPtr();
|
||||
@ -140,11 +148,12 @@ public:
|
||||
null_map = std::move(mut_null_map);
|
||||
}
|
||||
|
||||
column_ptrs.back() = &col_nullable->getNestedColumn();
|
||||
arg.column = col_nullable->getNestedColumn().getPtr();
|
||||
arg.type = static_cast<const DataTypeNullable &>(*arg.type).getNestedType();
|
||||
}
|
||||
}
|
||||
|
||||
auto res = model->evaluate(column_ptrs);
|
||||
auto res = bridge_helper->evaluate(feature_arguments);
|
||||
|
||||
if (null_map)
|
||||
{
|
||||
@ -162,15 +171,12 @@ public:
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
const ExternalModelsLoader & models_loader;
|
||||
};
|
||||
|
||||
|
||||
REGISTER_FUNCTION(ExternalModels)
|
||||
REGISTER_FUNCTION(CatBoostEvaluate)
|
||||
{
|
||||
factory.registerFunction<FunctionModelEvaluate>();
|
||||
factory.registerFunction<FunctionCatBoostEvaluate>();
|
||||
}
|
||||
|
||||
}
|
@ -233,7 +233,7 @@ void ReadBufferFromFileDescriptor::rewind()
|
||||
|
||||
|
||||
/// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
|
||||
bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds)
|
||||
bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds) const
|
||||
{
|
||||
fd_set fds;
|
||||
FD_ZERO(&fds);
|
||||
|
@ -66,7 +66,7 @@ public:
|
||||
|
||||
private:
|
||||
/// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
|
||||
bool poll(size_t timeout_microseconds);
|
||||
bool poll(size_t timeout_microseconds) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,525 +0,0 @@
|
||||
#include "CatBoostModel.h"
|
||||
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
#include <mutex>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/SharedLibrary.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_LOAD_CATBOOST_MODEL;
|
||||
extern const int CANNOT_APPLY_CATBOOST_MODEL;
|
||||
}
|
||||
|
||||
/// CatBoost wrapper interface functions.
|
||||
class CatBoostWrapperAPI
|
||||
{
|
||||
public:
|
||||
using ModelCalcerHandle = void;
|
||||
|
||||
ModelCalcerHandle * (* ModelCalcerCreate)(); // NOLINT
|
||||
|
||||
void (* ModelCalcerDelete)(ModelCalcerHandle * calcer); // NOLINT
|
||||
|
||||
const char * (* GetErrorString)(); // NOLINT
|
||||
|
||||
bool (* LoadFullModelFromFile)(ModelCalcerHandle * calcer, const char * filename); // NOLINT
|
||||
|
||||
bool (* CalcModelPredictionFlat)(ModelCalcerHandle * calcer, size_t docCount, // NOLINT
|
||||
const float ** floatFeatures, size_t floatFeaturesSize,
|
||||
double * result, size_t resultSize);
|
||||
|
||||
bool (* CalcModelPrediction)(ModelCalcerHandle * calcer, size_t docCount, // NOLINT
|
||||
const float ** floatFeatures, size_t floatFeaturesSize,
|
||||
const char *** catFeatures, size_t catFeaturesSize,
|
||||
double * result, size_t resultSize);
|
||||
|
||||
bool (* CalcModelPredictionWithHashedCatFeatures)(ModelCalcerHandle * calcer, size_t docCount, // NOLINT
|
||||
const float ** floatFeatures, size_t floatFeaturesSize,
|
||||
const int ** catFeatures, size_t catFeaturesSize,
|
||||
double * result, size_t resultSize);
|
||||
|
||||
int (* GetStringCatFeatureHash)(const char * data, size_t size); // NOLINT
|
||||
int (* GetIntegerCatFeatureHash)(uint64_t val); // NOLINT
|
||||
|
||||
size_t (* GetFloatFeaturesCount)(ModelCalcerHandle* calcer); // NOLINT
|
||||
size_t (* GetCatFeaturesCount)(ModelCalcerHandle* calcer); // NOLINT
|
||||
size_t (* GetTreeCount)(ModelCalcerHandle* modelHandle); // NOLINT
|
||||
size_t (* GetDimensionsCount)(ModelCalcerHandle* modelHandle); // NOLINT
|
||||
|
||||
bool (* CheckModelMetadataHasKey)(ModelCalcerHandle* modelHandle, const char* keyPtr, size_t keySize); // NOLINT
|
||||
size_t (*GetModelInfoValueSize)(ModelCalcerHandle* modelHandle, const char* keyPtr, size_t keySize); // NOLINT
|
||||
const char* (*GetModelInfoValue)(ModelCalcerHandle* modelHandle, const char* keyPtr, size_t keySize); // NOLINT
|
||||
};
|
||||
|
||||
|
||||
class CatBoostModelHolder
|
||||
{
|
||||
private:
|
||||
CatBoostWrapperAPI::ModelCalcerHandle * handle;
|
||||
const CatBoostWrapperAPI * api;
|
||||
public:
|
||||
explicit CatBoostModelHolder(const CatBoostWrapperAPI * api_) : api(api_) { handle = api->ModelCalcerCreate(); }
|
||||
~CatBoostModelHolder() { api->ModelCalcerDelete(handle); }
|
||||
|
||||
CatBoostWrapperAPI::ModelCalcerHandle * get() { return handle; }
|
||||
};
|
||||
|
||||
|
||||
/// Holds CatBoost wrapper library and provides wrapper interface.
|
||||
class CatBoostLibHolder
|
||||
{
|
||||
public:
|
||||
explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); }
|
||||
|
||||
const CatBoostWrapperAPI & getAPI() const { return api; }
|
||||
const std::string & getCurrentPath() const { return lib_path; }
|
||||
|
||||
private:
|
||||
CatBoostWrapperAPI api;
|
||||
std::string lib_path;
|
||||
SharedLibrary lib;
|
||||
|
||||
void initAPI()
|
||||
{
|
||||
load(api.ModelCalcerCreate, "ModelCalcerCreate");
|
||||
load(api.ModelCalcerDelete, "ModelCalcerDelete");
|
||||
load(api.GetErrorString, "GetErrorString");
|
||||
load(api.LoadFullModelFromFile, "LoadFullModelFromFile");
|
||||
load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat");
|
||||
load(api.CalcModelPrediction, "CalcModelPrediction");
|
||||
load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures");
|
||||
load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash");
|
||||
load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash");
|
||||
load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount");
|
||||
load(api.GetCatFeaturesCount, "GetCatFeaturesCount");
|
||||
tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey");
|
||||
tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize");
|
||||
tryLoad(api.GetModelInfoValue, "GetModelInfoValue");
|
||||
tryLoad(api.GetTreeCount, "GetTreeCount");
|
||||
tryLoad(api.GetDimensionsCount, "GetDimensionsCount");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void load(T& func, const std::string & name) { func = lib.get<T>(name); }
|
||||
|
||||
template <typename T>
|
||||
void tryLoad(T& func, const std::string & name) { func = lib.tryGet<T>(name); }
|
||||
};
|
||||
|
||||
std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
|
||||
{
|
||||
static std::shared_ptr<CatBoostLibHolder> ptr;
|
||||
static std::mutex mutex;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!ptr || ptr->getCurrentPath() != lib_path)
|
||||
ptr = std::make_shared<CatBoostLibHolder>(lib_path);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
class CatBoostModelImpl
|
||||
{
|
||||
public:
|
||||
CatBoostModelImpl(const CatBoostWrapperAPI * api_, const std::string & model_path) : api(api_)
|
||||
{
|
||||
handle = std::make_unique<CatBoostModelHolder>(api);
|
||||
if (!handle)
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
|
||||
"Cannot create CatBoost model: {}",
|
||||
api->GetErrorString());
|
||||
}
|
||||
if (!api->LoadFullModelFromFile(handle->get(), model_path.c_str()))
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
|
||||
"Cannot load CatBoost model: {}",
|
||||
api->GetErrorString());
|
||||
}
|
||||
|
||||
float_features_count = api->GetFloatFeaturesCount(handle->get());
|
||||
cat_features_count = api->GetCatFeaturesCount(handle->get());
|
||||
tree_count = 1;
|
||||
if (api->GetDimensionsCount)
|
||||
tree_count = api->GetDimensionsCount(handle->get());
|
||||
}
|
||||
|
||||
ColumnPtr evaluate(const ColumnRawPtrs & columns) const
|
||||
{
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model.");
|
||||
|
||||
if (columns.size() != float_features_count + cat_features_count)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
||||
columns.size(),
|
||||
float_features_count,
|
||||
cat_features_count);
|
||||
|
||||
for (size_t i = 0; i < float_features_count; ++i)
|
||||
{
|
||||
if (!columns[i]->isNumeric())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i);
|
||||
}
|
||||
}
|
||||
|
||||
bool cat_features_are_strings = true;
|
||||
for (size_t i = float_features_count; i < float_features_count + cat_features_count; ++i)
|
||||
{
|
||||
const auto * column = columns[i];
|
||||
if (column->isNumeric())
|
||||
{
|
||||
cat_features_are_strings = false;
|
||||
}
|
||||
else if (!(typeid_cast<const ColumnString *>(column)
|
||||
|| typeid_cast<const ColumnFixedString *>(column)))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i);
|
||||
}
|
||||
}
|
||||
|
||||
auto result = evalImpl(columns, cat_features_are_strings);
|
||||
|
||||
if (tree_count == 1)
|
||||
return result;
|
||||
|
||||
size_t column_size = columns.front()->size();
|
||||
auto * result_buf = result->getData().data();
|
||||
|
||||
/// Multiple trees case. Copy data to several columns.
|
||||
MutableColumns mutable_columns(tree_count);
|
||||
std::vector<Float64 *> column_ptrs(tree_count);
|
||||
for (size_t i = 0; i < tree_count; ++i)
|
||||
{
|
||||
auto col = ColumnFloat64::create(column_size);
|
||||
column_ptrs[i] = col->getData().data();
|
||||
mutable_columns[i] = std::move(col);
|
||||
}
|
||||
|
||||
Float64 * data = result_buf;
|
||||
for (size_t row = 0; row < column_size; ++row)
|
||||
{
|
||||
for (size_t i = 0; i < tree_count; ++i)
|
||||
{
|
||||
*column_ptrs[i] = *data;
|
||||
++column_ptrs[i];
|
||||
++data;
|
||||
}
|
||||
}
|
||||
|
||||
return ColumnTuple::create(std::move(mutable_columns));
|
||||
}
|
||||
|
||||
size_t getFloatFeaturesCount() const { return float_features_count; }
|
||||
size_t getCatFeaturesCount() const { return cat_features_count; }
|
||||
size_t getTreeCount() const { return tree_count; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<CatBoostModelHolder> handle;
|
||||
const CatBoostWrapperAPI * api;
|
||||
size_t float_features_count;
|
||||
size_t cat_features_count;
|
||||
size_t tree_count;
|
||||
|
||||
/// Buffer should be allocated with features_count * column->size() elements.
|
||||
/// Place column elements in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
|
||||
template <typename T>
|
||||
void placeColumnAsNumber(const IColumn * column, T * buffer, size_t features_count) const
|
||||
{
|
||||
size_t size = column->size();
|
||||
FieldVisitorConvertToNumber<T> visitor;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
/// TODO: Replace with column visitor.
|
||||
Field field;
|
||||
column->get(i, field);
|
||||
*buffer = applyVisitor(visitor, field);
|
||||
buffer += features_count;
|
||||
}
|
||||
}
|
||||
|
||||
/// Buffer should be allocated with features_count * column->size() elements.
|
||||
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
|
||||
static void placeStringColumn(const ColumnString & column, const char ** buffer, size_t features_count)
|
||||
{
|
||||
size_t size = column.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
*buffer = const_cast<char *>(column.getDataAtWithTerminatingZero(i).data);
|
||||
buffer += features_count;
|
||||
}
|
||||
}
|
||||
|
||||
/// Buffer should be allocated with features_count * column->size() elements.
|
||||
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
|
||||
/// Returns PODArray which holds data (because ColumnFixedString doesn't store terminating zero).
|
||||
static PODArray<char> placeFixedStringColumn(
|
||||
const ColumnFixedString & column, const char ** buffer, size_t features_count)
|
||||
{
|
||||
size_t size = column.size();
|
||||
size_t str_size = column.getN();
|
||||
PODArray<char> data(size * (str_size + 1));
|
||||
char * data_ptr = data.data();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
auto ref = column.getDataAt(i);
|
||||
memcpy(data_ptr, ref.data, ref.size);
|
||||
data_ptr[ref.size] = 0;
|
||||
*buffer = data_ptr;
|
||||
data_ptr += ref.size + 1;
|
||||
buffer += features_count;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/// Place columns into buffer, returns column which holds placed data. Buffer should contains column->size() values.
|
||||
template <typename T>
|
||||
ColumnPtr placeNumericColumns(const ColumnRawPtrs & columns,
|
||||
size_t offset, size_t size, const T** buffer) const
|
||||
{
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
size_t column_size = columns[offset]->size();
|
||||
auto data_column = ColumnVector<T>::create(size * column_size);
|
||||
T * data = data_column->getData().data();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const auto * column = columns[offset + i];
|
||||
if (column->isNumeric())
|
||||
placeColumnAsNumber(column, data + i, size);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
{
|
||||
*buffer = data;
|
||||
++buffer;
|
||||
data += size;
|
||||
}
|
||||
|
||||
return data_column;
|
||||
}
|
||||
|
||||
/// Place columns into buffer, returns data which was used for fixed string columns.
|
||||
/// Buffer should contains column->size() values, each value contains size strings.
|
||||
static std::vector<PODArray<char>> placeStringColumns(
|
||||
const ColumnRawPtrs & columns, size_t offset, size_t size, const char ** buffer)
|
||||
{
|
||||
if (size == 0)
|
||||
return {};
|
||||
|
||||
std::vector<PODArray<char>> data;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const auto * column = columns[offset + i];
|
||||
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
|
||||
placeStringColumn(*column_string, buffer + i, size);
|
||||
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
|
||||
data.push_back(placeFixedStringColumn(*column_fixed_string, buffer + i, size));
|
||||
else
|
||||
throw Exception("Cannot place string column.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/// Calc hash for string cat feature at ps positions.
|
||||
template <typename Column>
|
||||
void calcStringHashes(const Column * column, size_t ps, const int ** buffer) const
|
||||
{
|
||||
size_t column_size = column->size();
|
||||
for (size_t j = 0; j < column_size; ++j)
|
||||
{
|
||||
auto ref = column->getDataAt(j);
|
||||
const_cast<int *>(*buffer)[ps] = api->GetStringCatFeatureHash(ref.data, ref.size);
|
||||
++buffer;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calc hash for int cat feature at ps position. Buffer at positions ps should contains unhashed values.
|
||||
void calcIntHashes(size_t column_size, size_t ps, const int ** buffer) const
|
||||
{
|
||||
for (size_t j = 0; j < column_size; ++j)
|
||||
{
|
||||
const_cast<int *>(*buffer)[ps] = api->GetIntegerCatFeatureHash((*buffer)[ps]);
|
||||
++buffer;
|
||||
}
|
||||
}
|
||||
|
||||
/// buffer contains column->size() rows and size columns.
|
||||
/// For int cat features calc hash inplace.
|
||||
/// For string cat features calc hash from column rows.
|
||||
void calcHashes(const ColumnRawPtrs & columns, size_t offset, size_t size, const int ** buffer) const
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
size_t column_size = columns[offset]->size();
|
||||
|
||||
std::vector<PODArray<char>> data;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const auto * column = columns[offset + i];
|
||||
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
|
||||
calcStringHashes(column_string, i, buffer);
|
||||
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
|
||||
calcStringHashes(column_fixed_string, i, buffer);
|
||||
else
|
||||
calcIntHashes(column_size, i, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/// buffer[column_size * cat_features_count] -> char * => cat_features[column_size][cat_features_count] -> char *
|
||||
void fillCatFeaturesBuffer(const char *** cat_features, const char ** buffer,
|
||||
size_t column_size) const
|
||||
{
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
{
|
||||
*cat_features = buffer;
|
||||
++cat_features;
|
||||
buffer += cat_features_count;
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert values to row-oriented format and call evaluation function from CatBoost wrapper api.
|
||||
/// * CalcModelPredictionFlat if no cat features
|
||||
/// * CalcModelPrediction if all cat features are strings
|
||||
/// * CalcModelPredictionWithHashedCatFeatures if has int cat features.
|
||||
ColumnFloat64::MutablePtr evalImpl(
|
||||
const ColumnRawPtrs & columns,
|
||||
bool cat_features_are_strings) const
|
||||
{
|
||||
std::string error_msg = "Error occurred while applying CatBoost model: ";
|
||||
size_t column_size = columns.front()->size();
|
||||
|
||||
auto result = ColumnFloat64::create(column_size * tree_count);
|
||||
auto * result_buf = result->getData().data();
|
||||
|
||||
if (!column_size)
|
||||
return result;
|
||||
|
||||
/// Prepare float features.
|
||||
PODArray<const float *> float_features(column_size);
|
||||
auto * float_features_buf = float_features.data();
|
||||
/// Store all float data into single column. float_features is a list of pointers to it.
|
||||
auto float_features_col = placeNumericColumns<float>(columns, 0, float_features_count, float_features_buf);
|
||||
|
||||
if (cat_features_count == 0)
|
||||
{
|
||||
if (!api->CalcModelPredictionFlat(handle->get(), column_size,
|
||||
float_features_buf, float_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
|
||||
throw Exception(error_msg + api->GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Prepare cat features.
|
||||
if (cat_features_are_strings)
|
||||
{
|
||||
/// cat_features_holder stores pointers to ColumnString data or fixed_strings_data.
|
||||
PODArray<const char *> cat_features_holder(cat_features_count * column_size);
|
||||
PODArray<const char **> cat_features(column_size);
|
||||
auto * cat_features_buf = cat_features.data();
|
||||
|
||||
fillCatFeaturesBuffer(cat_features_buf, cat_features_holder.data(), column_size);
|
||||
/// Fixed strings are stored without termination zero, so have to copy data into fixed_strings_data.
|
||||
auto fixed_strings_data = placeStringColumns(columns, float_features_count,
|
||||
cat_features_count, cat_features_holder.data());
|
||||
|
||||
if (!api->CalcModelPrediction(handle->get(), column_size,
|
||||
float_features_buf, float_features_count,
|
||||
cat_features_buf, cat_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
throw Exception(error_msg + api->GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PODArray<const int *> cat_features(column_size);
|
||||
auto * cat_features_buf = cat_features.data();
|
||||
auto cat_features_col = placeNumericColumns<int>(columns, float_features_count,
|
||||
cat_features_count, cat_features_buf);
|
||||
calcHashes(columns, float_features_count, cat_features_count, cat_features_buf);
|
||||
if (!api->CalcModelPredictionWithHashedCatFeatures(
|
||||
handle->get(), column_size,
|
||||
float_features_buf, float_features_count,
|
||||
cat_features_buf, cat_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
throw Exception(error_msg + api->GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_,
|
||||
const ExternalLoadableLifetime & lifetime_)
|
||||
: name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime_)
|
||||
{
|
||||
api_provider = getCatBoostWrapperHolder(lib_path);
|
||||
api = &api_provider->getAPI();
|
||||
model = std::make_unique<CatBoostModelImpl>(api, model_path);
|
||||
}
|
||||
|
||||
CatBoostModel::~CatBoostModel() = default;
|
||||
|
||||
size_t CatBoostModel::getFloatFeaturesCount() const
|
||||
{
|
||||
return model->getFloatFeaturesCount();
|
||||
}
|
||||
|
||||
size_t CatBoostModel::getCatFeaturesCount() const
|
||||
{
|
||||
return model->getCatFeaturesCount();
|
||||
}
|
||||
|
||||
size_t CatBoostModel::getTreeCount() const
|
||||
{
|
||||
return model->getTreeCount();
|
||||
}
|
||||
|
||||
DataTypePtr CatBoostModel::getReturnType() const
|
||||
{
|
||||
size_t tree_count = getTreeCount();
|
||||
auto type = std::make_shared<DataTypeFloat64>();
|
||||
if (tree_count == 1)
|
||||
return type;
|
||||
|
||||
DataTypes types(tree_count, type);
|
||||
|
||||
return std::make_shared<DataTypeTuple>(types);
|
||||
}
|
||||
|
||||
ColumnPtr CatBoostModel::evaluate(const ColumnRawPtrs & columns) const
|
||||
{
|
||||
if (!model)
|
||||
throw Exception("CatBoost model was not loaded.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return model->evaluate(columns);
|
||||
}
|
||||
|
||||
}
|
@ -1,73 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IExternalLoadable.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CatBoostLibHolder;
|
||||
class CatBoostWrapperAPI;
|
||||
class CatBoostModelImpl;
|
||||
|
||||
class IDataType;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
|
||||
/// General ML model evaluator interface.
|
||||
class IMLModel : public IExternalLoadable
|
||||
{
|
||||
public:
|
||||
IMLModel() = default;
|
||||
virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0;
|
||||
virtual std::string getTypeName() const = 0;
|
||||
virtual DataTypePtr getReturnType() const = 0;
|
||||
virtual ~IMLModel() override = default;
|
||||
};
|
||||
|
||||
class CatBoostModel : public IMLModel
|
||||
{
|
||||
public:
|
||||
CatBoostModel(std::string name, std::string model_path,
|
||||
std::string lib_path, const ExternalLoadableLifetime & lifetime);
|
||||
|
||||
~CatBoostModel() override;
|
||||
|
||||
ColumnPtr evaluate(const ColumnRawPtrs & columns) const override;
|
||||
std::string getTypeName() const override { return "catboost"; }
|
||||
|
||||
size_t getFloatFeaturesCount() const;
|
||||
size_t getCatFeaturesCount() const;
|
||||
size_t getTreeCount() const;
|
||||
DataTypePtr getReturnType() const override;
|
||||
|
||||
/// IExternalLoadable interface.
|
||||
|
||||
const ExternalLoadableLifetime & getLifetime() const override { return lifetime; }
|
||||
|
||||
std::string getLoadableName() const override { return name; }
|
||||
|
||||
bool supportUpdates() const override { return true; }
|
||||
|
||||
bool isModified() const override { return true; }
|
||||
|
||||
std::shared_ptr<const IExternalLoadable> clone() const override
|
||||
{
|
||||
return std::make_shared<CatBoostModel>(name, model_path, lib_path, lifetime);
|
||||
}
|
||||
|
||||
private:
|
||||
const std::string name;
|
||||
std::string model_path;
|
||||
std::string lib_path;
|
||||
ExternalLoadableLifetime lifetime;
|
||||
std::shared_ptr<CatBoostLibHolder> api_provider;
|
||||
const CatBoostWrapperAPI * api;
|
||||
|
||||
std::unique_ptr<CatBoostModelImpl> model;
|
||||
|
||||
void init();
|
||||
};
|
||||
|
||||
}
|
@ -52,7 +52,6 @@
|
||||
#include <Interpreters/EmbeddedDictionaries.h>
|
||||
#include <Interpreters/ExternalDictionariesLoader.h>
|
||||
#include <Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h>
|
||||
#include <Interpreters/ExternalModelsLoader.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Interpreters/InterserverCredentials.h>
|
||||
@ -153,7 +152,6 @@ struct ContextSharedPart : boost::noncopyable
|
||||
mutable std::mutex embedded_dictionaries_mutex;
|
||||
mutable std::mutex external_dictionaries_mutex;
|
||||
mutable std::mutex external_user_defined_executable_functions_mutex;
|
||||
mutable std::mutex external_models_mutex;
|
||||
/// Separate mutex for storage policies. During server startup we may
|
||||
/// initialize some important storages (system logs with MergeTree engine)
|
||||
/// under context lock.
|
||||
@ -191,9 +189,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
mutable std::unique_ptr<EmbeddedDictionaries> embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization.
|
||||
mutable std::unique_ptr<ExternalDictionariesLoader> external_dictionaries_loader;
|
||||
mutable std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> external_user_defined_executable_functions_loader;
|
||||
mutable std::unique_ptr<ExternalModelsLoader> external_models_loader;
|
||||
|
||||
ExternalLoaderXMLConfigRepository * external_models_config_repository = nullptr;
|
||||
scope_guard models_repository_guard;
|
||||
|
||||
ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository = nullptr;
|
||||
@ -359,8 +355,6 @@ struct ContextSharedPart : boost::noncopyable
|
||||
external_dictionaries_loader->enablePeriodicUpdates(false);
|
||||
if (external_user_defined_executable_functions_loader)
|
||||
external_user_defined_executable_functions_loader->enablePeriodicUpdates(false);
|
||||
if (external_models_loader)
|
||||
external_models_loader->enablePeriodicUpdates(false);
|
||||
|
||||
Session::shutdownNamedSessions();
|
||||
|
||||
@ -391,7 +385,6 @@ struct ContextSharedPart : boost::noncopyable
|
||||
std::unique_ptr<EmbeddedDictionaries> delete_embedded_dictionaries;
|
||||
std::unique_ptr<ExternalDictionariesLoader> delete_external_dictionaries_loader;
|
||||
std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> delete_external_user_defined_executable_functions_loader;
|
||||
std::unique_ptr<ExternalModelsLoader> delete_external_models_loader;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_buffer_flush_schedule_pool;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_schedule_pool;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_distributed_schedule_pool;
|
||||
@ -430,7 +423,6 @@ struct ContextSharedPart : boost::noncopyable
|
||||
delete_embedded_dictionaries = std::move(embedded_dictionaries);
|
||||
delete_external_dictionaries_loader = std::move(external_dictionaries_loader);
|
||||
delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader);
|
||||
delete_external_models_loader = std::move(external_models_loader);
|
||||
delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool);
|
||||
delete_schedule_pool = std::move(schedule_pool);
|
||||
delete_distributed_schedule_pool = std::move(distributed_schedule_pool);
|
||||
@ -458,7 +450,6 @@ struct ContextSharedPart : boost::noncopyable
|
||||
delete_embedded_dictionaries.reset();
|
||||
delete_external_dictionaries_loader.reset();
|
||||
delete_external_user_defined_executable_functions_loader.reset();
|
||||
delete_external_models_loader.reset();
|
||||
delete_ddl_worker.reset();
|
||||
delete_buffer_flush_schedule_pool.reset();
|
||||
delete_schedule_pool.reset();
|
||||
@ -1476,48 +1467,6 @@ ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedEx
|
||||
return *shared->external_user_defined_executable_functions_loader;
|
||||
}
|
||||
|
||||
const ExternalModelsLoader & Context::getExternalModelsLoader() const
|
||||
{
|
||||
return const_cast<Context *>(this)->getExternalModelsLoader();
|
||||
}
|
||||
|
||||
ExternalModelsLoader & Context::getExternalModelsLoader()
|
||||
{
|
||||
std::lock_guard lock(shared->external_models_mutex);
|
||||
return getExternalModelsLoaderUnlocked();
|
||||
}
|
||||
|
||||
ExternalModelsLoader & Context::getExternalModelsLoaderUnlocked()
|
||||
{
|
||||
if (!shared->external_models_loader)
|
||||
shared->external_models_loader =
|
||||
std::make_unique<ExternalModelsLoader>(getGlobalContext());
|
||||
return *shared->external_models_loader;
|
||||
}
|
||||
|
||||
void Context::loadOrReloadModels(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto patterns_values = getMultipleValuesFromConfig(config, "", "models_config");
|
||||
std::unordered_set<std::string> patterns(patterns_values.begin(), patterns_values.end());
|
||||
|
||||
std::lock_guard lock(shared->external_models_mutex);
|
||||
|
||||
auto & external_models_loader = getExternalModelsLoaderUnlocked();
|
||||
|
||||
if (shared->external_models_config_repository)
|
||||
{
|
||||
shared->external_models_config_repository->updatePatterns(patterns);
|
||||
external_models_loader.reloadConfig(shared->external_models_config_repository->getName());
|
||||
return;
|
||||
}
|
||||
|
||||
auto app_path = getPath();
|
||||
auto config_path = getConfigRef().getString("config-file", "config.xml");
|
||||
auto repository = std::make_unique<ExternalLoaderXMLConfigRepository>(app_path, config_path, patterns);
|
||||
shared->external_models_config_repository = repository.get();
|
||||
shared->models_repository_guard = external_models_loader.addConfigRepository(std::move(repository));
|
||||
}
|
||||
|
||||
EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_error) const
|
||||
{
|
||||
std::lock_guard lock(shared->embedded_dictionaries_mutex);
|
||||
|
@ -53,7 +53,6 @@ class AccessRightsElements;
|
||||
enum class RowPolicyFilterType;
|
||||
class EmbeddedDictionaries;
|
||||
class ExternalDictionariesLoader;
|
||||
class ExternalModelsLoader;
|
||||
class ExternalUserDefinedExecutableFunctionsLoader;
|
||||
class InterserverCredentials;
|
||||
using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
|
||||
@ -645,19 +644,15 @@ public:
|
||||
|
||||
const EmbeddedDictionaries & getEmbeddedDictionaries() const;
|
||||
const ExternalDictionariesLoader & getExternalDictionariesLoader() const;
|
||||
const ExternalModelsLoader & getExternalModelsLoader() const;
|
||||
const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const;
|
||||
EmbeddedDictionaries & getEmbeddedDictionaries();
|
||||
ExternalDictionariesLoader & getExternalDictionariesLoader();
|
||||
ExternalDictionariesLoader & getExternalDictionariesLoaderUnlocked();
|
||||
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader();
|
||||
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked();
|
||||
ExternalModelsLoader & getExternalModelsLoader();
|
||||
ExternalModelsLoader & getExternalModelsLoaderUnlocked();
|
||||
void tryCreateEmbeddedDictionaries(const Poco::Util::AbstractConfiguration & config) const;
|
||||
void loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration & config);
|
||||
void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config);
|
||||
void loadOrReloadModels(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
#if USE_NLP
|
||||
SynonymsExtensions & getSynonymsExtensions() const;
|
||||
|
@ -61,18 +61,23 @@ public:
|
||||
return host_fqdn_id;
|
||||
}
|
||||
|
||||
std::string getQueueDir() const
|
||||
{
|
||||
return queue_dir;
|
||||
}
|
||||
|
||||
void startup();
|
||||
virtual void shutdown();
|
||||
|
||||
bool isCurrentlyActive() const { return initialized && !stop_flag; }
|
||||
|
||||
protected:
|
||||
|
||||
/// Returns cached ZooKeeper session (possibly expired).
|
||||
ZooKeeperPtr tryGetZooKeeper() const;
|
||||
/// If necessary, creates a new session and caches it.
|
||||
ZooKeeperPtr getAndSetZooKeeper();
|
||||
|
||||
protected:
|
||||
/// Iterates through queue tasks in ZooKeeper, runs execution of new tasks
|
||||
void scheduleTasks(bool reinitialized);
|
||||
|
||||
|
@ -1,41 +0,0 @@
|
||||
#include <Interpreters/ExternalModelsLoader.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
}
|
||||
|
||||
|
||||
ExternalModelsLoader::ExternalModelsLoader(ContextPtr context_)
|
||||
: ExternalLoader("external model", &Poco::Logger::get("ExternalModelsLoader")), WithContext(context_)
|
||||
{
|
||||
setConfigSettings({"model", "name", {}, {}});
|
||||
enablePeriodicUpdates(true);
|
||||
}
|
||||
|
||||
std::shared_ptr<const IExternalLoadable> ExternalModelsLoader::create(
|
||||
const std::string & name, const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix, const std::string & /* repository_name */) const
|
||||
{
|
||||
String type = config.getString(config_prefix + ".type");
|
||||
ExternalLoadableLifetime lifetime(config, config_prefix + ".lifetime");
|
||||
|
||||
/// TODO: add models factory.
|
||||
if (type == "catboost")
|
||||
{
|
||||
return std::make_unique<CatBoostModel>(
|
||||
name, config.getString(config_prefix + ".path"),
|
||||
getContext()->getConfigRef().getString("catboost_dynamic_library_path"),
|
||||
lifetime
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception("Unknown model type: " + type, ErrorCodes::INVALID_CONFIG_PARAMETER);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/CatBoostModel.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Interpreters/ExternalLoader.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Manages user-defined models.
|
||||
class ExternalModelsLoader : public ExternalLoader, WithContext
|
||||
{
|
||||
public:
|
||||
using ModelPtr = std::shared_ptr<const IMLModel>;
|
||||
|
||||
/// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds.
|
||||
explicit ExternalModelsLoader(ContextPtr context_);
|
||||
|
||||
ModelPtr getModel(const std::string & model_name) const
|
||||
{
|
||||
return std::static_pointer_cast<const IMLModel>(load(model_name));
|
||||
}
|
||||
|
||||
void reloadModel(const std::string & model_name) const
|
||||
{
|
||||
loadOrReload(model_name);
|
||||
}
|
||||
|
||||
protected:
|
||||
LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix, const std::string & repository_name) const override;
|
||||
|
||||
friend class StorageSystemModels;
|
||||
};
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user