Merge branch 'master' into fix-totals-extremes

This commit is contained in:
Kruglov Pavel 2022-09-16 15:03:59 +02:00 committed by GitHub
commit 2d4a6b38af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
251 changed files with 5910 additions and 2517 deletions

View File

@ -22,6 +22,8 @@ Checks: '*,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result,
-bugprone-unchecked-optional-access,
-bugprone-assignment-in-if-condition,
-cert-dcl16-c,
-cert-err58-cpp,
@ -103,6 +105,7 @@ Checks: '*,
-misc-no-recursion,
-misc-non-private-member-variables-in-classes,
-misc-const-correctness,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
@ -114,6 +117,7 @@ Checks: '*,
-modernize-use-nodiscard,
-modernize-use-override,
-modernize-use-trailing-return-type,
-modernize-macro-to-enum,
-performance-inefficient-string-concatenation,
-performance-no-int-to-ptr,
@ -135,6 +139,7 @@ Checks: '*,
-readability-suspicious-call-argument,
-readability-uppercase-literal-suffix,
-readability-use-anyofallof,
-readability-simplify-boolean-expr,
-zirkon-*,
'

15
.git-blame-ignore-revs Normal file
View File

@ -0,0 +1,15 @@
# This is a file that can be used by git-blame to ignore some revisions.
# (git 2.23+, released in August 2019)
#
# Can be configured as follow:
#
# $ git config blame.ignoreRevsFile .git-blame-ignore-revs
#
# For more information you can look at git-blame(1) man page.
# Changed tabs to spaces in code [#CLICKHOUSE-3]
137ad95929ee016cc6d3c03bccb5586941c163ff
# dbms/ → src/
# (though it is unlikely that you will see it in blame)
06446b4f08a142d6f1bc30664c47ded88ab51782

View File

@ -30,10 +30,11 @@ jobs:
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY"
# Download and push packages to artifactory
python3 ./tests/ci/push_to_artifactory.py --release "${{ github.ref }}" \
--commit '${{ github.sha }}' --artifactory-url "${{ secrets.JFROG_ARTIFACTORY_URL }}" --all
python3 ./tests/ci/push_to_artifactory.py --release '${{ github.ref }}' \
--commit '${{ github.sha }}' --artifactory-url '${{ secrets.JFROG_ARTIFACTORY_URL }}' --all
# Download macos binaries to ${{runner.temp}}/download_binary
python3 ./tests/ci/download_binary.py binary_darwin binary_darwin_aarch64
python3 ./tests/ci/download_binary.py --version '${{ github.ref }}' \
--commit '${{ github.sha }}' binary_darwin binary_darwin_aarch64
mv '${{runner.temp}}/download_binary/'clickhouse-* '${{runner.temp}}/push_to_artifactory'
- name: Upload packages to release assets
uses: svenstaro/upload-release-action@v2

View File

@ -43,6 +43,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
run: |
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
./utils/list-versions/update-docker-version.sh
GID=$(id -g "${UID}")
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \
--volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \

4
.gitignore vendored
View File

@ -58,6 +58,10 @@ cmake_install.cmake
CTestTestfile.cmake
*.a
*.o
*.so
*.dll
*.lib
*.dylib
cmake-build-*
# Python cache

View File

@ -220,6 +220,35 @@ ReplxxLineReader::ReplxxLineReader(
rx.bind_key(Replxx::KEY::control('W'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::KILL_TO_WHITESPACE_ON_LEFT, code); });
rx.bind_key(Replxx::KEY::meta('E'), [this](char32_t) { openEditor(); return Replxx::ACTION_RESULT::CONTINUE; });
/// readline insert-comment
auto insert_comment_action = [this](char32_t code)
{
replxx::Replxx::State state(rx.get_state());
const char * line = state.text();
const char * line_end = line + strlen(line);
std::string commented_line;
if (std::find(line, line_end, '\n') != line_end)
{
/// If query has multiple lines, multiline comment is used over
/// commenting each line separately for easier uncomment (though
/// with invoking editor it is simpler to uncomment multiple lines)
///
/// Note, that using multiline comment is OK even with nested
/// comments, since nested comments are supported.
commented_line = fmt::format("/* {} */", state.text());
}
else
{
// In a simplest case use simple comment.
commented_line = fmt::format("-- {}", state.text());
}
rx.set_state(replxx::Replxx::State(commented_line.c_str(), commented_line.size()));
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
};
rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action);
}
ReplxxLineReader::~ReplxxLineReader()

View File

@ -3,7 +3,7 @@ option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF)
if (ENABLE_CLANG_TIDY)
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
if (CLANG_TIDY_PATH)
message(STATUS

View File

@ -45,6 +45,7 @@ if (CMAKE_CROSSCOMPILING)
endif ()
if (USE_MUSL)
# use of undeclared identifier 'PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP'
set (ENABLE_SENTRY OFF CACHE INTERNAL "")
set (ENABLE_ODBC OFF CACHE INTERNAL "")
set (ENABLE_GRPC OFF CACHE INTERNAL "")

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc
Subproject commit e15858f8ad0ce8aba85cf74e3763874c76bf927c

View File

@ -1,35 +1,95 @@
# Choose to build static or shared library for c-ares.
if (USE_STATIC_LIBRARIES)
set(CARES_STATIC ON CACHE BOOL "" FORCE)
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
else ()
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
set(CARES_SHARED ON CACHE BOOL "" FORCE)
endif ()
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/c-ares")
# Disable looking for libnsl on a platforms that has gethostbyname in glibc
#
# c-ares searching for gethostbyname in the libnsl library, however in the
# version that shipped with gRPC it doing it wrong [1], since it uses
# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in
# another dependent library. The upstream already contains correct macro [2],
# but it is not included in gRPC (even upstream gRPC, not the one that is
# shipped with clickhousee).
#
# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125
# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146
#
# And because if you by some reason have libnsl [3] installed, clickhouse will
# reject to start w/o it. While this is completelly different library.
#
# [3]: https://packages.debian.org/bullseye/libnsl2
if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS")
set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE)
# Generated from contrib/c-ares/src/lib/Makefile.inc
SET(SRCS
"${LIBRARY_DIR}/src/lib/ares__addrinfo2hostent.c"
"${LIBRARY_DIR}/src/lib/ares__addrinfo_localhost.c"
"${LIBRARY_DIR}/src/lib/ares__close_sockets.c"
"${LIBRARY_DIR}/src/lib/ares__get_hostent.c"
"${LIBRARY_DIR}/src/lib/ares__parse_into_addrinfo.c"
"${LIBRARY_DIR}/src/lib/ares__readaddrinfo.c"
"${LIBRARY_DIR}/src/lib/ares__sortaddrinfo.c"
"${LIBRARY_DIR}/src/lib/ares__read_line.c"
"${LIBRARY_DIR}/src/lib/ares__timeval.c"
"${LIBRARY_DIR}/src/lib/ares_android.c"
"${LIBRARY_DIR}/src/lib/ares_cancel.c"
"${LIBRARY_DIR}/src/lib/ares_data.c"
"${LIBRARY_DIR}/src/lib/ares_destroy.c"
"${LIBRARY_DIR}/src/lib/ares_expand_name.c"
"${LIBRARY_DIR}/src/lib/ares_expand_string.c"
"${LIBRARY_DIR}/src/lib/ares_fds.c"
"${LIBRARY_DIR}/src/lib/ares_free_hostent.c"
"${LIBRARY_DIR}/src/lib/ares_free_string.c"
"${LIBRARY_DIR}/src/lib/ares_freeaddrinfo.c"
"${LIBRARY_DIR}/src/lib/ares_getaddrinfo.c"
"${LIBRARY_DIR}/src/lib/ares_getenv.c"
"${LIBRARY_DIR}/src/lib/ares_gethostbyaddr.c"
"${LIBRARY_DIR}/src/lib/ares_gethostbyname.c"
"${LIBRARY_DIR}/src/lib/ares_getnameinfo.c"
"${LIBRARY_DIR}/src/lib/ares_getsock.c"
"${LIBRARY_DIR}/src/lib/ares_init.c"
"${LIBRARY_DIR}/src/lib/ares_library_init.c"
"${LIBRARY_DIR}/src/lib/ares_llist.c"
"${LIBRARY_DIR}/src/lib/ares_mkquery.c"
"${LIBRARY_DIR}/src/lib/ares_create_query.c"
"${LIBRARY_DIR}/src/lib/ares_nowarn.c"
"${LIBRARY_DIR}/src/lib/ares_options.c"
"${LIBRARY_DIR}/src/lib/ares_parse_a_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_aaaa_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_caa_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_mx_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_naptr_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_ns_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_ptr_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_soa_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_srv_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_txt_reply.c"
"${LIBRARY_DIR}/src/lib/ares_parse_uri_reply.c"
"${LIBRARY_DIR}/src/lib/ares_platform.c"
"${LIBRARY_DIR}/src/lib/ares_process.c"
"${LIBRARY_DIR}/src/lib/ares_query.c"
"${LIBRARY_DIR}/src/lib/ares_search.c"
"${LIBRARY_DIR}/src/lib/ares_send.c"
"${LIBRARY_DIR}/src/lib/ares_strcasecmp.c"
"${LIBRARY_DIR}/src/lib/ares_strdup.c"
"${LIBRARY_DIR}/src/lib/ares_strerror.c"
"${LIBRARY_DIR}/src/lib/ares_strsplit.c"
"${LIBRARY_DIR}/src/lib/ares_timeout.c"
"${LIBRARY_DIR}/src/lib/ares_version.c"
"${LIBRARY_DIR}/src/lib/ares_writev.c"
"${LIBRARY_DIR}/src/lib/bitncmp.c"
"${LIBRARY_DIR}/src/lib/inet_net_pton.c"
"${LIBRARY_DIR}/src/lib/inet_ntop.c"
"${LIBRARY_DIR}/src/lib/windows_port.c"
)
if (USE_STATIC_LIBRARIES)
add_library(_c-ares STATIC ${SRCS})
target_compile_definitions(_c-ares PUBLIC CARES_STATICLIB)
else()
add_library(_c-ares SHARED ${SRCS})
target_compile_definitions(_c-ares PUBLIC CARES_BUILDING_LIBRARY)
endif()
# Force use of c-ares inet_net_pton instead of libresolv one
set(HAVE_INET_NET_PTON OFF CACHE BOOL "" FORCE)
target_compile_definitions(_c-ares PRIVATE HAVE_CONFIG_H=1)
add_subdirectory("../c-ares/" "../c-ares/")
target_include_directories(_c-ares SYSTEM PUBLIC
"${LIBRARY_DIR}/src/lib"
"${LIBRARY_DIR}/include"
)
add_library(ch_contrib::c-ares ALIAS c-ares)
# Platform-specific include directories. The original build system does a lot of checks to eventually generate two header files with defines:
# ares_build.h and ares_config.h. To update, run the original CMake build in c-ares for each platform and copy the headers into the
# platform-specific folder.
# For the platform-specific compile definitions, see c-ares top-level CMakeLists.txt.
if (OS_LINUX)
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/linux")
target_compile_definitions(_c-ares PRIVATE -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L -D_XOPEN_SOURCE=600)
elseif (OS_DARWIN)
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/darwin")
target_compile_definitions(_c-ares PRIVATE -D_DARWIN_C_SOURCE)
elseif (OS_FREEBSD)
target_include_directories(_c-ares SYSTEM PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/c-ares-cmake/freebsd")
endif()
add_library(ch_contrib::c-ares ALIAS _c-ares)

View File

@ -0,0 +1,43 @@
#ifndef __CARES_BUILD_H
#define __CARES_BUILD_H
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
* files. We need to include some dependent headers that may be system specific
* for C-Ares */
#define CARES_HAVE_SYS_TYPES_H
#define CARES_HAVE_SYS_SOCKET_H
/* #undef CARES_HAVE_WINDOWS_H */
/* #undef CARES_HAVE_WS2TCPIP_H */
/* #undef CARES_HAVE_WINSOCK2_H */
/* #undef CARES_HAVE_WINDOWS_H */
#define CARES_HAVE_ARPA_NAMESER_H
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
#ifdef CARES_HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#ifdef CARES_HAVE_SYS_SOCKET_H
# include <sys/socket.h>
#endif
#ifdef CARES_HAVE_WINSOCK2_H
# include <winsock2.h>
#endif
#ifdef CARES_HAVE_WS2TCPIP_H
# include <ws2tcpip.h>
#endif
#ifdef CARES_HAVE_WINDOWS_H
# include <windows.h>
#endif
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
#endif /* __CARES_BUILD_H */

View File

@ -0,0 +1,432 @@
/* Generated from ares_config.h.cmake */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* define this if ares is built for a big endian system */
#undef ARES_BIG_ENDIAN
/* when building as static part of libcurl */
#undef BUILDING_LIBCURL
/* Defined for build that exposes internal static functions for testing. */
#undef CARES_EXPOSE_STATICS
/* Defined for build with symbol hiding. */
#undef CARES_SYMBOL_HIDING
/* Definition to make a library symbol externally visible. */
#undef CARES_SYMBOL_SCOPE_EXTERN
/* Use resolver library to configure cares */
/* #undef CARES_USE_LIBRESOLV */
/* if a /etc/inet dir is being used */
#undef ETC_INET
/* Define to the type of arg 2 for gethostname. */
#define GETHOSTNAME_TYPE_ARG2 size_t
/* Define to the type qualifier of arg 1 for getnameinfo. */
#define GETNAMEINFO_QUAL_ARG1
/* Define to the type of arg 1 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
/* Define to the type of arg 2 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG2 socklen_t
/* Define to the type of args 4 and 6 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG46 socklen_t
/* Define to the type of arg 7 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG7 int
/* Specifies the number of arguments to getservbyport_r */
#define GETSERVBYPORT_R_ARGS
/* Specifies the number of arguments to getservbyname_r */
#define GETSERVBYNAME_R_ARGS
/* Define to 1 if you have AF_INET6. */
#define HAVE_AF_INET6
/* Define to 1 if you have the <arpa/inet.h> header file. */
#define HAVE_ARPA_INET_H
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
#define HAVE_ARPA_NAMESER_COMPAT_H
/* Define to 1 if you have the <arpa/nameser.h> header file. */
#define HAVE_ARPA_NAMESER_H
/* Define to 1 if you have the <assert.h> header file. */
#define HAVE_ASSERT_H
/* Define to 1 if you have the `bitncmp' function. */
/* #undef HAVE_BITNCMP */
/* Define to 1 if bool is an available type. */
#define HAVE_BOOL_T
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
#define HAVE_CLOCK_GETTIME_MONOTONIC
/* Define to 1 if you have the closesocket function. */
/* #undef HAVE_CLOSESOCKET */
/* Define to 1 if you have the CloseSocket camel case function. */
/* #undef HAVE_CLOSESOCKET_CAMEL */
/* Define to 1 if you have the connect function. */
#define HAVE_CONNECT
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H
/* Define to 1 if you have the fcntl function. */
#define HAVE_FCNTL
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
#define HAVE_FCNTL_O_NONBLOCK
/* Define to 1 if you have the freeaddrinfo function. */
#define HAVE_FREEADDRINFO
/* Define to 1 if you have a working getaddrinfo function. */
#define HAVE_GETADDRINFO
/* Define to 1 if the getaddrinfo function is threadsafe. */
#define HAVE_GETADDRINFO_THREADSAFE
/* Define to 1 if you have the getenv function. */
#define HAVE_GETENV
/* Define to 1 if you have the gethostbyaddr function. */
#define HAVE_GETHOSTBYADDR
/* Define to 1 if you have the gethostbyname function. */
#define HAVE_GETHOSTBYNAME
/* Define to 1 if you have the gethostname function. */
#define HAVE_GETHOSTNAME
/* Define to 1 if you have the getnameinfo function. */
#define HAVE_GETNAMEINFO
/* Define to 1 if you have the getservbyport_r function. */
/* #undef HAVE_GETSERVBYPORT_R */
/* Define to 1 if you have the getservbyname_r function. */
/* #undef HAVE_GETSERVBYNAME_R */
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY
/* Define to 1 if you have the `if_indextoname' function. */
#define HAVE_IF_INDEXTONAME
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
/* #undef HAVE_INET_NET_PTON */
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
#define HAVE_INET_NTOP
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
#define HAVE_INET_PTON
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H
/* Define to 1 if you have the ioctl function. */
#define HAVE_IOCTL
/* Define to 1 if you have the ioctlsocket function. */
/* #undef HAVE_IOCTLSOCKET */
/* Define to 1 if you have the IoctlSocket camel case function. */
/* #undef HAVE_IOCTLSOCKET_CAMEL */
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
*/
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
/* Define to 1 if you have a working ioctl FIONBIO function. */
#define HAVE_IOCTL_FIONBIO
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
#define HAVE_IOCTL_SIOCGIFADDR
/* Define to 1 if you have the `resolve' library (-lresolve). */
/* #undef HAVE_LIBRESOLV */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H
/* if your compiler supports LL */
#define HAVE_LL
/* Define to 1 if the compiler supports the 'long long' data type. */
#define HAVE_LONGLONG
/* Define to 1 if you have the malloc.h header file. */
/* #undef HAVE_MALLOC_H */
/* Define to 1 if you have the memory.h header file. */
#define HAVE_MEMORY_H
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
/* #undef HAVE_MSG_NOSIGNAL */
/* Define to 1 if you have the <netdb.h> header file. */
#define HAVE_NETDB_H
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#define HAVE_NETINET_TCP_H
/* Define to 1 if you have the <net/if.h> header file. */
#define HAVE_NET_IF_H
/* Define to 1 if you have PF_INET6. */
#define HAVE_PF_INET6
/* Define to 1 if you have the recv function. */
#define HAVE_RECV
/* Define to 1 if you have the recvfrom function. */
#define HAVE_RECVFROM
/* Define to 1 if you have the send function. */
#define HAVE_SEND
/* Define to 1 if you have the setsockopt function. */
#define HAVE_SETSOCKOPT
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H
/* Define to 1 if sig_atomic_t is an available typedef. */
#define HAVE_SIG_ATOMIC_T
/* Define to 1 if sig_atomic_t is already defined as volatile. */
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
/* Define to 1 if you have the socket function. */
#define HAVE_SOCKET
/* Define to 1 if you have the <socket.h> header file. */
/* #undef HAVE_SOCKET_H */
/* Define to 1 if you have the <stdbool.h> header file. */
#define HAVE_STDBOOL_H
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H
/* Define to 1 if you have the strcasecmp function. */
#define HAVE_STRCASECMP
/* Define to 1 if you have the strcmpi function. */
/* #undef HAVE_STRCMPI */
/* Define to 1 if you have the strdup function. */
#define HAVE_STRDUP
/* Define to 1 if you have the stricmp function. */
/* #undef HAVE_STRICMP */
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H
/* Define to 1 if you have the strncasecmp function. */
#define HAVE_STRNCASECMP
/* Define to 1 if you have the strncmpi function. */
/* #undef HAVE_STRNCMPI */
/* Define to 1 if you have the strnicmp function. */
/* #undef HAVE_STRNICMP */
/* Define to 1 if you have the <stropts.h> header file. */
/* #undef HAVE_STROPTS_H */
/* Define to 1 if you have struct addrinfo. */
#define HAVE_STRUCT_ADDRINFO
/* Define to 1 if you have struct in6_addr. */
#define HAVE_STRUCT_IN6_ADDR
/* Define to 1 if you have struct sockaddr_in6. */
#define HAVE_STRUCT_SOCKADDR_IN6
/* if struct sockaddr_storage is defined */
#define HAVE_STRUCT_SOCKADDR_STORAGE
/* Define to 1 if you have the timeval struct. */
#define HAVE_STRUCT_TIMEVAL
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/uio.h> header file. */
#define HAVE_SYS_UIO_H
/* Define to 1 if you have the <time.h> header file. */
#define HAVE_TIME_H
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the windows.h header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the winsock2.h header file. */
/* #undef HAVE_WINSOCK2_H */
/* Define to 1 if you have the winsock.h header file. */
/* #undef HAVE_WINSOCK_H */
/* Define to 1 if you have the writev function. */
#define HAVE_WRITEV
/* Define to 1 if you have the ws2tcpip.h header file. */
/* #undef HAVE_WS2TCPIP_H */
/* Define to 1 if you have the __system_property_get function */
#define HAVE___SYSTEM_PROPERTY_GET
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
/* #undef NEED_MALLOC_H */
/* Define to 1 if you need the memory.h header file even with stdlib.h */
/* #undef NEED_MEMORY_H */
/* a suitable file/device to read random data from */
#define CARES_RANDOM_FILE "/dev/urandom"
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
#define RECVFROM_QUAL_ARG5
/* Define to the type of arg 1 for recvfrom. */
#define RECVFROM_TYPE_ARG1 int
/* Define to the type pointed by arg 2 for recvfrom. */
#define RECVFROM_TYPE_ARG2 void *
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
#define RECVFROM_TYPE_ARG2_IS_VOID 0
/* Define to the type of arg 3 for recvfrom. */
#define RECVFROM_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recvfrom. */
#define RECVFROM_TYPE_ARG4 int
/* Define to the type pointed by arg 5 for recvfrom. */
#define RECVFROM_TYPE_ARG5 struct sockaddr *
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
#define RECVFROM_TYPE_ARG5_IS_VOID 0
/* Define to the type pointed by arg 6 for recvfrom. */
#define RECVFROM_TYPE_ARG6 socklen_t *
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
#define RECVFROM_TYPE_ARG6_IS_VOID 0
/* Define to the function return type for recvfrom. */
#define RECVFROM_TYPE_RETV ssize_t
/* Define to the type of arg 1 for recv. */
#define RECV_TYPE_ARG1 int
/* Define to the type of arg 2 for recv. */
#define RECV_TYPE_ARG2 void *
/* Define to the type of arg 3 for recv. */
#define RECV_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recv. */
#define RECV_TYPE_ARG4 int
/* Define to the function return type for recv. */
#define RECV_TYPE_RETV ssize_t
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE
/* Define to the type qualifier of arg 2 for send. */
#define SEND_QUAL_ARG2
/* Define to the type of arg 1 for send. */
#define SEND_TYPE_ARG1 int
/* Define to the type of arg 2 for send. */
#define SEND_TYPE_ARG2 void *
/* Define to the type of arg 3 for send. */
#define SEND_TYPE_ARG3 size_t
/* Define to the type of arg 4 for send. */
#define SEND_TYPE_ARG4 int
/* Define to the function return type for send. */
#define SEND_TYPE_RETV ssize_t
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#define TIME_WITH_SYS_TIME
/* Define to disable non-blocking sockets. */
#undef USE_BLOCKING_SOCKETS
/* Define to avoid automatic inclusion of winsock.h */
#undef WIN32_LEAN_AND_MEAN
/* Type to use in place of in_addr_t when system does not provide it. */
#undef in_addr_t

View File

@ -0,0 +1,43 @@
#ifndef __CARES_BUILD_H
#define __CARES_BUILD_H
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
* files. We need to include some dependent headers that may be system specific
* for C-Ares */
#define CARES_HAVE_SYS_TYPES_H
#define CARES_HAVE_SYS_SOCKET_H
/* #undef CARES_HAVE_WINDOWS_H */
/* #undef CARES_HAVE_WS2TCPIP_H */
/* #undef CARES_HAVE_WINSOCK2_H */
/* #undef CARES_HAVE_WINDOWS_H */
#define CARES_HAVE_ARPA_NAMESER_H
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
#ifdef CARES_HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#ifdef CARES_HAVE_SYS_SOCKET_H
# include <sys/socket.h>
#endif
#ifdef CARES_HAVE_WINSOCK2_H
# include <winsock2.h>
#endif
#ifdef CARES_HAVE_WS2TCPIP_H
# include <ws2tcpip.h>
#endif
#ifdef CARES_HAVE_WINDOWS_H
# include <windows.h>
#endif
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
#endif /* __CARES_BUILD_H */

View File

@ -0,0 +1,432 @@
/* Generated from ares_config.h.cmake */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* define this if ares is built for a big endian system */
#undef ARES_BIG_ENDIAN
/* when building as static part of libcurl */
#undef BUILDING_LIBCURL
/* Defined for build that exposes internal static functions for testing. */
#undef CARES_EXPOSE_STATICS
/* Defined for build with symbol hiding. */
#undef CARES_SYMBOL_HIDING
/* Definition to make a library symbol externally visible. */
#undef CARES_SYMBOL_SCOPE_EXTERN
/* Use resolver library to configure cares */
/* #undef CARES_USE_LIBRESOLV */
/* if a /etc/inet dir is being used */
#undef ETC_INET
/* Define to the type of arg 2 for gethostname. */
#define GETHOSTNAME_TYPE_ARG2 size_t
/* Define to the type qualifier of arg 1 for getnameinfo. */
#define GETNAMEINFO_QUAL_ARG1
/* Define to the type of arg 1 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
/* Define to the type of arg 2 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG2 socklen_t
/* Define to the type of args 4 and 6 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG46 socklen_t
/* Define to the type of arg 7 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG7 int
/* Specifies the number of arguments to getservbyport_r */
#define GETSERVBYPORT_R_ARGS 6
/* Specifies the number of arguments to getservbyname_r */
#define GETSERVBYNAME_R_ARGS 6
/* Define to 1 if you have AF_INET6. */
#define HAVE_AF_INET6
/* Define to 1 if you have the <arpa/inet.h> header file. */
#define HAVE_ARPA_INET_H
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
#define HAVE_ARPA_NAMESER_COMPAT_H
/* Define to 1 if you have the <arpa/nameser.h> header file. */
#define HAVE_ARPA_NAMESER_H
/* Define to 1 if you have the <assert.h> header file. */
#define HAVE_ASSERT_H
/* Define to 1 if you have the `bitncmp' function. */
/* #undef HAVE_BITNCMP */
/* Define to 1 if bool is an available type. */
#define HAVE_BOOL_T
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
#define HAVE_CLOCK_GETTIME_MONOTONIC
/* Define to 1 if you have the closesocket function. */
/* #undef HAVE_CLOSESOCKET */
/* Define to 1 if you have the CloseSocket camel case function. */
/* #undef HAVE_CLOSESOCKET_CAMEL */
/* Define to 1 if you have the connect function. */
#define HAVE_CONNECT
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H
/* Define to 1 if you have the fcntl function. */
#define HAVE_FCNTL
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
#define HAVE_FCNTL_O_NONBLOCK
/* Define to 1 if you have the freeaddrinfo function. */
#define HAVE_FREEADDRINFO
/* Define to 1 if you have a working getaddrinfo function. */
#define HAVE_GETADDRINFO
/* Define to 1 if the getaddrinfo function is threadsafe. */
#define HAVE_GETADDRINFO_THREADSAFE
/* Define to 1 if you have the getenv function. */
#define HAVE_GETENV
/* Define to 1 if you have the gethostbyaddr function. */
#define HAVE_GETHOSTBYADDR
/* Define to 1 if you have the gethostbyname function. */
#define HAVE_GETHOSTBYNAME
/* Define to 1 if you have the gethostname function. */
#define HAVE_GETHOSTNAME
/* Define to 1 if you have the getnameinfo function. */
#define HAVE_GETNAMEINFO
/* Define to 1 if you have the getservbyport_r function. */
#define HAVE_GETSERVBYPORT_R
/* Define to 1 if you have the getservbyname_r function. */
#define HAVE_GETSERVBYNAME_R
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY
/* Define to 1 if you have the `if_indextoname' function. */
#define HAVE_IF_INDEXTONAME
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
/* #undef HAVE_INET_NET_PTON */
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
#define HAVE_INET_NTOP
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
#define HAVE_INET_PTON
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H
/* Define to 1 if you have the ioctl function. */
#define HAVE_IOCTL
/* Define to 1 if you have the ioctlsocket function. */
/* #undef HAVE_IOCTLSOCKET */
/* Define to 1 if you have the IoctlSocket camel case function. */
/* #undef HAVE_IOCTLSOCKET_CAMEL */
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
*/
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
/* Define to 1 if you have a working ioctl FIONBIO function. */
#define HAVE_IOCTL_FIONBIO
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
#define HAVE_IOCTL_SIOCGIFADDR
/* Define to 1 if you have the `resolve' library (-lresolve). */
/* #undef HAVE_LIBRESOLV */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H
/* if your compiler supports LL */
#define HAVE_LL
/* Define to 1 if the compiler supports the 'long long' data type. */
#define HAVE_LONGLONG
/* Define to 1 if you have the malloc.h header file. */
/* #undef HAVE_MALLOC_H */
/* Define to 1 if you have the memory.h header file. */
#define HAVE_MEMORY_H
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
#define HAVE_MSG_NOSIGNAL
/* Define to 1 if you have the <netdb.h> header file. */
#define HAVE_NETDB_H
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#define HAVE_NETINET_TCP_H
/* Define to 1 if you have the <net/if.h> header file. */
#define HAVE_NET_IF_H
/* Define to 1 if you have PF_INET6. */
#define HAVE_PF_INET6
/* Define to 1 if you have the recv function. */
#define HAVE_RECV
/* Define to 1 if you have the recvfrom function. */
#define HAVE_RECVFROM
/* Define to 1 if you have the send function. */
#define HAVE_SEND
/* Define to 1 if you have the setsockopt function. */
#define HAVE_SETSOCKOPT
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H
/* Define to 1 if sig_atomic_t is an available typedef. */
#define HAVE_SIG_ATOMIC_T
/* Define to 1 if sig_atomic_t is already defined as volatile. */
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
/* Define to 1 if you have the socket function. */
#define HAVE_SOCKET
/* Define to 1 if you have the <socket.h> header file. */
/* #undef HAVE_SOCKET_H */
/* Define to 1 if you have the <stdbool.h> header file. */
#define HAVE_STDBOOL_H
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H
/* Define to 1 if you have the strcasecmp function. */
#define HAVE_STRCASECMP
/* Define to 1 if you have the strcmpi function. */
/* #undef HAVE_STRCMPI */
/* Define to 1 if you have the strdup function. */
#define HAVE_STRDUP
/* Define to 1 if you have the stricmp function. */
/* #undef HAVE_STRICMP */
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H
/* Define to 1 if you have the strncasecmp function. */
#define HAVE_STRNCASECMP
/* Define to 1 if you have the strncmpi function. */
/* #undef HAVE_STRNCMPI */
/* Define to 1 if you have the strnicmp function. */
/* #undef HAVE_STRNICMP */
/* Define to 1 if you have the <stropts.h> header file. */
/* #undef HAVE_STROPTS_H */
/* Define to 1 if you have struct addrinfo. */
#define HAVE_STRUCT_ADDRINFO
/* Define to 1 if you have struct in6_addr. */
#define HAVE_STRUCT_IN6_ADDR
/* Define to 1 if you have struct sockaddr_in6. */
#define HAVE_STRUCT_SOCKADDR_IN6
/* if struct sockaddr_storage is defined */
#define HAVE_STRUCT_SOCKADDR_STORAGE
/* Define to 1 if you have the timeval struct. */
#define HAVE_STRUCT_TIMEVAL
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/uio.h> header file. */
#define HAVE_SYS_UIO_H
/* Define to 1 if you have the <time.h> header file. */
#define HAVE_TIME_H
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the windows.h header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the winsock2.h header file. */
/* #undef HAVE_WINSOCK2_H */
/* Define to 1 if you have the winsock.h header file. */
/* #undef HAVE_WINSOCK_H */
/* Define to 1 if you have the writev function. */
#define HAVE_WRITEV
/* Define to 1 if you have the ws2tcpip.h header file. */
/* #undef HAVE_WS2TCPIP_H */
/* Define to 1 if you have the __system_property_get function */
#define HAVE___SYSTEM_PROPERTY_GET
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
/* #undef NEED_MALLOC_H */
/* Define to 1 if you need the memory.h header file even with stdlib.h */
/* #undef NEED_MEMORY_H */
/* a suitable file/device to read random data from */
#define CARES_RANDOM_FILE "/dev/urandom"
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
#define RECVFROM_QUAL_ARG5
/* Define to the type of arg 1 for recvfrom. */
#define RECVFROM_TYPE_ARG1 int
/* Define to the type pointed by arg 2 for recvfrom. */
#define RECVFROM_TYPE_ARG2 void *
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
#define RECVFROM_TYPE_ARG2_IS_VOID 0
/* Define to the type of arg 3 for recvfrom. */
#define RECVFROM_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recvfrom. */
#define RECVFROM_TYPE_ARG4 int
/* Define to the type pointed by arg 5 for recvfrom. */
#define RECVFROM_TYPE_ARG5 struct sockaddr *
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
#define RECVFROM_TYPE_ARG5_IS_VOID 0
/* Define to the type pointed by arg 6 for recvfrom. */
#define RECVFROM_TYPE_ARG6 socklen_t *
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
#define RECVFROM_TYPE_ARG6_IS_VOID 0
/* Define to the function return type for recvfrom. */
#define RECVFROM_TYPE_RETV ssize_t
/* Define to the type of arg 1 for recv. */
#define RECV_TYPE_ARG1 int
/* Define to the type of arg 2 for recv. */
#define RECV_TYPE_ARG2 void *
/* Define to the type of arg 3 for recv. */
#define RECV_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recv. */
#define RECV_TYPE_ARG4 int
/* Define to the function return type for recv. */
#define RECV_TYPE_RETV ssize_t
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE
/* Define to the type qualifier of arg 2 for send. */
#define SEND_QUAL_ARG2
/* Define to the type of arg 1 for send. */
#define SEND_TYPE_ARG1 int
/* Define to the type of arg 2 for send. */
#define SEND_TYPE_ARG2 void *
/* Define to the type of arg 3 for send. */
#define SEND_TYPE_ARG3 size_t
/* Define to the type of arg 4 for send. */
#define SEND_TYPE_ARG4 int
/* Define to the function return type for send. */
#define SEND_TYPE_RETV ssize_t
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#define TIME_WITH_SYS_TIME
/* Define to disable non-blocking sockets. */
#undef USE_BLOCKING_SOCKETS
/* Define to avoid automatic inclusion of winsock.h */
#undef WIN32_LEAN_AND_MEAN
/* Type to use in place of in_addr_t when system does not provide it. */
#undef in_addr_t

View File

@ -0,0 +1,43 @@
#ifndef __CARES_BUILD_H
#define __CARES_BUILD_H
#define CARES_TYPEOF_ARES_SOCKLEN_T socklen_t
#define CARES_TYPEOF_ARES_SSIZE_T ssize_t
/* Prefix names with CARES_ to make sure they don't conflict with other config.h
* files. We need to include some dependent headers that may be system specific
* for C-Ares */
#define CARES_HAVE_SYS_TYPES_H
#define CARES_HAVE_SYS_SOCKET_H
/* #undef CARES_HAVE_WINDOWS_H */
/* #undef CARES_HAVE_WS2TCPIP_H */
/* #undef CARES_HAVE_WINSOCK2_H */
/* #undef CARES_HAVE_WINDOWS_H */
#define CARES_HAVE_ARPA_NAMESER_H
#define CARES_HAVE_ARPA_NAMESER_COMPAT_H
#ifdef CARES_HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#ifdef CARES_HAVE_SYS_SOCKET_H
# include <sys/socket.h>
#endif
#ifdef CARES_HAVE_WINSOCK2_H
# include <winsock2.h>
#endif
#ifdef CARES_HAVE_WS2TCPIP_H
# include <ws2tcpip.h>
#endif
#ifdef CARES_HAVE_WINDOWS_H
# include <windows.h>
#endif
typedef CARES_TYPEOF_ARES_SOCKLEN_T ares_socklen_t;
typedef CARES_TYPEOF_ARES_SSIZE_T ares_ssize_t;
#endif /* __CARES_BUILD_H */

View File

@ -0,0 +1,432 @@
/* Generated from ares_config.h.cmake */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* define this if ares is built for a big endian system */
#undef ARES_BIG_ENDIAN
/* when building as static part of libcurl */
#undef BUILDING_LIBCURL
/* Defined for build that exposes internal static functions for testing. */
#undef CARES_EXPOSE_STATICS
/* Defined for build with symbol hiding. */
#undef CARES_SYMBOL_HIDING
/* Definition to make a library symbol externally visible. */
#undef CARES_SYMBOL_SCOPE_EXTERN
/* Use resolver library to configure cares */
/* #undef CARES_USE_LIBRESOLV */
/* if a /etc/inet dir is being used */
#undef ETC_INET
/* Define to the type of arg 2 for gethostname. */
#define GETHOSTNAME_TYPE_ARG2 size_t
/* Define to the type qualifier of arg 1 for getnameinfo. */
#define GETNAMEINFO_QUAL_ARG1
/* Define to the type of arg 1 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG1 struct sockaddr *
/* Define to the type of arg 2 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG2 socklen_t
/* Define to the type of args 4 and 6 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG46 socklen_t
/* Define to the type of arg 7 for getnameinfo. */
#define GETNAMEINFO_TYPE_ARG7 int
/* Specifies the number of arguments to getservbyport_r */
#define GETSERVBYPORT_R_ARGS 6
/* Specifies the number of arguments to getservbyname_r */
#define GETSERVBYNAME_R_ARGS 6
/* Define to 1 if you have AF_INET6. */
#define HAVE_AF_INET6
/* Define to 1 if you have the <arpa/inet.h> header file. */
#define HAVE_ARPA_INET_H
/* Define to 1 if you have the <arpa/nameser_compat.h> header file. */
#define HAVE_ARPA_NAMESER_COMPAT_H
/* Define to 1 if you have the <arpa/nameser.h> header file. */
#define HAVE_ARPA_NAMESER_H
/* Define to 1 if you have the <assert.h> header file. */
#define HAVE_ASSERT_H
/* Define to 1 if you have the `bitncmp' function. */
/* #undef HAVE_BITNCMP */
/* Define to 1 if bool is an available type. */
#define HAVE_BOOL_T
/* Define to 1 if you have the clock_gettime function and monotonic timer. */
#define HAVE_CLOCK_GETTIME_MONOTONIC
/* Define to 1 if you have the closesocket function. */
/* #undef HAVE_CLOSESOCKET */
/* Define to 1 if you have the CloseSocket camel case function. */
/* #undef HAVE_CLOSESOCKET_CAMEL */
/* Define to 1 if you have the connect function. */
#define HAVE_CONNECT
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H
/* Define to 1 if you have the fcntl function. */
#define HAVE_FCNTL
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H
/* Define to 1 if you have a working fcntl O_NONBLOCK function. */
#define HAVE_FCNTL_O_NONBLOCK
/* Define to 1 if you have the freeaddrinfo function. */
#define HAVE_FREEADDRINFO
/* Define to 1 if you have a working getaddrinfo function. */
#define HAVE_GETADDRINFO
/* Define to 1 if the getaddrinfo function is threadsafe. */
/* #undef HAVE_GETADDRINFO_THREADSAFE */
/* Define to 1 if you have the getenv function. */
#define HAVE_GETENV
/* Define to 1 if you have the gethostbyaddr function. */
#define HAVE_GETHOSTBYADDR
/* Define to 1 if you have the gethostbyname function. */
#define HAVE_GETHOSTBYNAME
/* Define to 1 if you have the gethostname function. */
#define HAVE_GETHOSTNAME
/* Define to 1 if you have the getnameinfo function. */
#define HAVE_GETNAMEINFO
/* Define to 1 if you have the getservbyport_r function. */
#define HAVE_GETSERVBYPORT_R
/* Define to 1 if you have the getservbyname_r function. */
#define HAVE_GETSERVBYNAME_R
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY
/* Define to 1 if you have the `if_indextoname' function. */
#define HAVE_IF_INDEXTONAME
/* Define to 1 if you have a IPv6 capable working inet_net_pton function. */
/* #undef HAVE_INET_NET_PTON */
/* Define to 1 if you have a IPv6 capable working inet_ntop function. */
#define HAVE_INET_NTOP
/* Define to 1 if you have a IPv6 capable working inet_pton function. */
#define HAVE_INET_PTON
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H
/* Define to 1 if you have the ioctl function. */
#define HAVE_IOCTL
/* Define to 1 if you have the ioctlsocket function. */
/* #undef HAVE_IOCTLSOCKET */
/* Define to 1 if you have the IoctlSocket camel case function. */
/* #undef HAVE_IOCTLSOCKET_CAMEL */
/* Define to 1 if you have a working IoctlSocket camel case FIONBIO function.
*/
/* #undef HAVE_IOCTLSOCKET_CAMEL_FIONBIO */
/* Define to 1 if you have a working ioctlsocket FIONBIO function. */
/* #undef HAVE_IOCTLSOCKET_FIONBIO */
/* Define to 1 if you have a working ioctl FIONBIO function. */
#define HAVE_IOCTL_FIONBIO
/* Define to 1 if you have a working ioctl SIOCGIFADDR function. */
#define HAVE_IOCTL_SIOCGIFADDR
/* Define to 1 if you have the `resolve' library (-lresolve). */
/* #undef HAVE_LIBRESOLV */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H
/* if your compiler supports LL */
#define HAVE_LL
/* Define to 1 if the compiler supports the 'long long' data type. */
#define HAVE_LONGLONG
/* Define to 1 if you have the malloc.h header file. */
#define HAVE_MALLOC_H
/* Define to 1 if you have the memory.h header file. */
#define HAVE_MEMORY_H
/* Define to 1 if you have the MSG_NOSIGNAL flag. */
#define HAVE_MSG_NOSIGNAL
/* Define to 1 if you have the <netdb.h> header file. */
#define HAVE_NETDB_H
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#define HAVE_NETINET_TCP_H
/* Define to 1 if you have the <net/if.h> header file. */
#define HAVE_NET_IF_H
/* Define to 1 if you have PF_INET6. */
#define HAVE_PF_INET6
/* Define to 1 if you have the recv function. */
#define HAVE_RECV
/* Define to 1 if you have the recvfrom function. */
#define HAVE_RECVFROM
/* Define to 1 if you have the send function. */
#define HAVE_SEND
/* Define to 1 if you have the setsockopt function. */
#define HAVE_SETSOCKOPT
/* Define to 1 if you have a working setsockopt SO_NONBLOCK function. */
/* #undef HAVE_SETSOCKOPT_SO_NONBLOCK */
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H
/* Define to 1 if sig_atomic_t is an available typedef. */
#define HAVE_SIG_ATOMIC_T
/* Define to 1 if sig_atomic_t is already defined as volatile. */
/* #undef HAVE_SIG_ATOMIC_T_VOLATILE */
/* Define to 1 if your struct sockaddr_in6 has sin6_scope_id. */
#define HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID
/* Define to 1 if you have the socket function. */
#define HAVE_SOCKET
/* Define to 1 if you have the <socket.h> header file. */
/* #undef HAVE_SOCKET_H */
/* Define to 1 if you have the <stdbool.h> header file. */
#define HAVE_STDBOOL_H
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H
/* Define to 1 if you have the strcasecmp function. */
#define HAVE_STRCASECMP
/* Define to 1 if you have the strcmpi function. */
/* #undef HAVE_STRCMPI */
/* Define to 1 if you have the strdup function. */
#define HAVE_STRDUP
/* Define to 1 if you have the stricmp function. */
/* #undef HAVE_STRICMP */
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H
/* Define to 1 if you have the strncasecmp function. */
#define HAVE_STRNCASECMP
/* Define to 1 if you have the strncmpi function. */
/* #undef HAVE_STRNCMPI */
/* Define to 1 if you have the strnicmp function. */
/* #undef HAVE_STRNICMP */
/* Define to 1 if you have the <stropts.h> header file. */
#define HAVE_STROPTS_H
/* Define to 1 if you have struct addrinfo. */
#define HAVE_STRUCT_ADDRINFO
/* Define to 1 if you have struct in6_addr. */
#define HAVE_STRUCT_IN6_ADDR
/* Define to 1 if you have struct sockaddr_in6. */
#define HAVE_STRUCT_SOCKADDR_IN6
/* if struct sockaddr_storage is defined */
#define HAVE_STRUCT_SOCKADDR_STORAGE
/* Define to 1 if you have the timeval struct. */
#define HAVE_STRUCT_TIMEVAL
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/uio.h> header file. */
#define HAVE_SYS_UIO_H
/* Define to 1 if you have the <time.h> header file. */
#define HAVE_TIME_H
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the windows.h header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the winsock2.h header file. */
/* #undef HAVE_WINSOCK2_H */
/* Define to 1 if you have the winsock.h header file. */
/* #undef HAVE_WINSOCK_H */
/* Define to 1 if you have the writev function. */
#define HAVE_WRITEV
/* Define to 1 if you have the ws2tcpip.h header file. */
/* #undef HAVE_WS2TCPIP_H */
/* Define to 1 if you have the __system_property_get function */
#define HAVE___SYSTEM_PROPERTY_GET
/* Define to 1 if you need the malloc.h header file even with stdlib.h */
/* #undef NEED_MALLOC_H */
/* Define to 1 if you need the memory.h header file even with stdlib.h */
/* #undef NEED_MEMORY_H */
/* a suitable file/device to read random data from */
#define CARES_RANDOM_FILE "/dev/urandom"
/* Define to the type qualifier pointed by arg 5 for recvfrom. */
#define RECVFROM_QUAL_ARG5
/* Define to the type of arg 1 for recvfrom. */
#define RECVFROM_TYPE_ARG1 int
/* Define to the type pointed by arg 2 for recvfrom. */
#define RECVFROM_TYPE_ARG2 void *
/* Define to 1 if the type pointed by arg 2 for recvfrom is void. */
#define RECVFROM_TYPE_ARG2_IS_VOID 0
/* Define to the type of arg 3 for recvfrom. */
#define RECVFROM_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recvfrom. */
#define RECVFROM_TYPE_ARG4 int
/* Define to the type pointed by arg 5 for recvfrom. */
#define RECVFROM_TYPE_ARG5 struct sockaddr *
/* Define to 1 if the type pointed by arg 5 for recvfrom is void. */
#define RECVFROM_TYPE_ARG5_IS_VOID 0
/* Define to the type pointed by arg 6 for recvfrom. */
#define RECVFROM_TYPE_ARG6 socklen_t *
/* Define to 1 if the type pointed by arg 6 for recvfrom is void. */
#define RECVFROM_TYPE_ARG6_IS_VOID 0
/* Define to the function return type for recvfrom. */
#define RECVFROM_TYPE_RETV ssize_t
/* Define to the type of arg 1 for recv. */
#define RECV_TYPE_ARG1 int
/* Define to the type of arg 2 for recv. */
#define RECV_TYPE_ARG2 void *
/* Define to the type of arg 3 for recv. */
#define RECV_TYPE_ARG3 size_t
/* Define to the type of arg 4 for recv. */
#define RECV_TYPE_ARG4 int
/* Define to the function return type for recv. */
#define RECV_TYPE_RETV ssize_t
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE
/* Define to the type qualifier of arg 2 for send. */
#define SEND_QUAL_ARG2
/* Define to the type of arg 1 for send. */
#define SEND_TYPE_ARG1 int
/* Define to the type of arg 2 for send. */
#define SEND_TYPE_ARG2 void *
/* Define to the type of arg 3 for send. */
#define SEND_TYPE_ARG3 size_t
/* Define to the type of arg 4 for send. */
#define SEND_TYPE_ARG4 int
/* Define to the function return type for send. */
#define SEND_TYPE_RETV ssize_t
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#define TIME_WITH_SYS_TIME
/* Define to disable non-blocking sockets. */
#undef USE_BLOCKING_SOCKETS
/* Define to avoid automatic inclusion of winsock.h */
#undef WIN32_LEAN_AND_MEAN
/* Type to use in place of in_addr_t when system does not provide it. */
#undef in_addr_t

View File

@ -415,7 +415,7 @@
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */

View File

@ -440,7 +440,9 @@
#define HAVE_STRERROR 1
/* Define to 1 if you have the `strerror_r' function. */
#ifndef USE_MUSL
#define HAVE_STRERROR_R 1
#endif
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1

2
contrib/libcpuid vendored

@ -1 +1 @@
Subproject commit 8db3b8d2d32d22437f063ce692a1b9bb15e42d18
Subproject commit 503083acb77edf9fbce22a05826307dff2ce96e6

View File

@ -63,6 +63,13 @@ target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include")
target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs")
# NOTE: this is a dirty hack to avoid and instead pg_config.h should be shipped
# for different OS'es like for jemalloc, not one generic for all OS'es like
# now.
if (OS_DARWIN OR OS_FREEBSD OR USE_MUSL)
target_compile_definitions(_libpq PRIVATE -DSTRERROR_R_INT=1)
endif()
target_link_libraries (_libpq PRIVATE OpenSSL::SSL)
add_library(ch_contrib::libpq ALIAS _libpq)

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit ff32b4e9eeafd0b276f010ee969179e4e9e6d0b2
Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f

2
contrib/llvm vendored

@ -1 +1 @@
Subproject commit 20607e61728e97c969e536644c3c0c1bb1a50672
Subproject commit 0db5bf5bd2452cd8f1283a1fcdc04845af705bfc

@ -1 +1 @@
Subproject commit f431047ac8da13179c488018dddf1c0d0771a997
Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit 73695e419c27af7fe2a099c7aa57931cc02aea5d
Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2

View File

@ -304,7 +304,7 @@ target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src")
# Please regenerate these files if you update vectorscan.
if (ARCH_AMD64)
target_include_directories (_vectorscan PRIVATE x86_64)
target_include_directories (_vectorscan PRIVATE amd64)
endif ()
if (ARCH_AARCH64)

View File

@ -67,24 +67,5 @@ ENV GOCACHE=/workdir/
RUN mkdir /workdir && chmod 777 /workdir
WORKDIR /workdir
# NOTE: thread sanitizer is broken in clang-14, we have to build it with clang-15
# https://github.com/ClickHouse/ClickHouse/pull/39450
# https://github.com/google/sanitizers/issues/1540
# https://github.com/google/sanitizers/issues/1552
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-15 main" >> \
/etc/apt/sources.list.d/clang.list \
&& apt-get update \
&& apt-get install \
clang-15 \
llvm-15 \
clang-tidy-15 \
--yes --no-install-recommends \
&& apt-get clean
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-15 /usr/bin/llvm-symbolizer
COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -339,17 +339,16 @@ if __name__ == "__main__":
parser.add_argument(
"--compiler",
choices=(
"clang-15", # For TSAN builds, see #39450
"clang-14",
"clang-14-darwin",
"clang-14-darwin-aarch64",
"clang-14-aarch64",
"clang-14-ppc64le",
"clang-14-amd64sse2",
"clang-14-freebsd",
"clang-15",
"clang-15-darwin",
"clang-15-darwin-aarch64",
"clang-15-aarch64",
"clang-15-ppc64le",
"clang-15-amd64sse2",
"clang-15-freebsd",
"gcc-11",
),
default="clang-14",
default="clang-15",
help="a compiler to use",
)
parser.add_argument(

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="20.9.3.45"
ARG VERSION="22.8.5.29"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION=22.6.1.*
ARG VERSION="22.8.5.29"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -16,11 +16,10 @@ RUN apt-get update \
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
# TSAN will flush shadow memory when reaching this limit.
# It may cause false-negatives, but it's better than OOM.
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment; \
echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment; \
ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080'

View File

@ -8,16 +8,41 @@ FROM clickhouse/binary-builder:$FROM_TAG
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-14 libllvm14 libclang-14-dev libmlir-14-dev
RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev
# libclang-15-dev does not contain proper symlink:
#
# This is what cmake will search for:
#
# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1
# /usr/lib/x86_64-linux-gnu/libclang-15.so.1
#
# This is what exists:
#
# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15*
# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1
# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0
# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0
#
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
*) exit 1 ;; \
esac \
&& ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
# TODO: remove branch in a few weeks after merge, e.g. in May or June 2022
RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser --branch llvm-14 \
#
# FIXME: update location of a repo
RUN git clone https://github.com/azat/woboq_codebrowser --branch llvm-15 \
&& cd woboq_codebrowser \
&& cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-14 -DCMAKE_C_COMPILER=clang-14 \
&& make -j \
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \
&& ninja \
&& cd .. \
&& rm -rf woboq_codebrowser
@ -32,7 +57,7 @@ ENV SHA=nosha
ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-14 -DCMAKE_C_COMPILER=/usr/bin/clang-14 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\

View File

@ -19,7 +19,7 @@ stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir"
repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_unsplitted_disable_False_binary"}
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
function clone

View File

@ -2,7 +2,7 @@
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -61,7 +61,7 @@ function configure
cp -rv right/config left ||:
# Start a temporary server to rename the tables
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
set -m # Spawn temporary in its own process groups
@ -88,7 +88,7 @@ function configure
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
# Make copies of the original db for both servers. Use hardlinks instead
@ -106,7 +106,7 @@ function configure
function restart
{
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
# Change the jemalloc settings here.
@ -1400,7 +1400,7 @@ case "$stage" in
while env kill -- -$watchdog_pid ; do sleep 1; done
# Stop the servers to free memory for the subsequent query analysis.
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo Servers stopped.
;&
"analyze_queries")

View File

@ -5,7 +5,7 @@ FROM ubuntu:20.04
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
RUN apt-get update \
&& apt-get install \
@ -56,6 +56,8 @@ RUN apt-get update \
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
ARG CCACHE_VERSION=4.6.1
RUN mkdir /tmp/ccache \

View File

@ -0,0 +1,34 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.8.5.29-lts (74ffb843807) FIXME as compared to v22.8.4.7-lts (baad27bcd2f)
#### New Feature
* Backported in [#40870](https://github.com/ClickHouse/ClickHouse/issues/40870): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
#### Improvement
* Backported in [#40817](https://github.com/ClickHouse/ClickHouse/issues/40817): The setting `show_addresses_in_stack_traces` was accidentally disabled in default `config.xml`. It's removed from the config now, so the setting is enabled by default. [#40749](https://github.com/ClickHouse/ClickHouse/pull/40749) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#40944](https://github.com/ClickHouse/ClickHouse/issues/40944): Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168)?notification_referrer_id=NT_kwDOAzsV57MzMDMxNjAzNTY5OjU0MjAzODc5. [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Build/Testing/Packaging Improvement
* Backported in [#41157](https://github.com/ClickHouse/ClickHouse/issues/41157): Add macOS binaries to GH release assets, it fixes [#37718](https://github.com/ClickHouse/ClickHouse/issues/37718). [#41088](https://github.com/ClickHouse/ClickHouse/pull/41088) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#40866](https://github.com/ClickHouse/ClickHouse/issues/40866): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#40805](https://github.com/ClickHouse/ClickHouse/issues/40805): During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#40777](https://github.com/ClickHouse/ClickHouse/issues/40777): Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#41135](https://github.com/ClickHouse/ClickHouse/issues/41135): Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE <table_function>()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#41242](https://github.com/ClickHouse/ClickHouse/issues/41242): Fixed "possible deadlock avoided" error on automatic conversion of database engine from Ordinary to Atomic. [#41146](https://github.com/ClickHouse/ClickHouse/pull/41146) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#41234](https://github.com/ClickHouse/ClickHouse/issues/41234): Fix background clean up of broken detached parts. [#41190](https://github.com/ClickHouse/ClickHouse/pull/41190) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* CaresPTRResolver small safety improvement [#40890](https://github.com/ClickHouse/ClickHouse/pull/40890) ([Arthur Passos](https://github.com/arthurpassos)).

View File

@ -103,6 +103,7 @@ ClickHouse, Inc. does **not** maintain the tools and libraries listed below and
- [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)
- [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
- [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations)
- [Linq To DB](https://github.com/linq2db/linq2db)
- Elixir
- [Ecto](https://github.com/elixir-ecto/ecto)
- [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto)

View File

@ -134,6 +134,13 @@ Example of configuration for versions later or equal to 22.8:
<max_size>10000000</max_size>
</cache>
</disks>
<policies>
<volumes>
<main>
<disk>cache</disk>
</main>
</volumes>
<policies>
</storage_configuration>
```
@ -151,6 +158,13 @@ Example of configuration for versions earlier than 22.8:
<data_cache_size>10000000</data_cache_size>
</s3>
</disks>
<policies>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
<policies>
</storage_configuration>
```
@ -166,7 +180,7 @@ Cache **configuration settings**:
- `enable_cache_hits_threshold` - a number, which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it.
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `true`.
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading.
- `max_file_segment_size` - a maximum size of a single cache file. Default: `104857600` (100 Mb).

View File

@ -2,10 +2,9 @@
slug: /en/operations/troubleshooting
sidebar_position: 46
sidebar_label: Troubleshooting
title: Troubleshooting
---
# Troubleshooting
- [Installation](#troubleshooting-installation-errors)
- [Connecting to the server](#troubleshooting-accepts-no-connections)
- [Query processing](#troubleshooting-does-not-process-queries)

View File

@ -1227,6 +1227,8 @@ Result:
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
Alias: `fromUnixTimestamp`.
**Example:**
Query:

View File

@ -1823,6 +1823,36 @@ Result:
Evaluate external model.
Accepts a model name and model arguments. Returns Float64.
## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n)
Evaluate external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learing.
Accepts a path to a catboost model and model arguments (features). Returns Float64.
``` sql
SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction
FROM data_table
```
**Prerequisites**
1. Build the catboost evaluation library
Before evaluating catboost models, the `libcatboostmodel.<so|dylib>` library must be made available. See [CatBoost documentation](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html) how to compile it.
Next, specify the path to `libcatboostmodel.<so|dylib>` in the clickhouse configuration:
``` xml
<clickhouse>
...
<catboost_lib_path>/path/to/libcatboostmodel.so</catboost_lib_path>
...
</clickhouse>
```
2. Train a catboost model using libcatboost
See [Training and applying models](https://catboost.ai/docs/features/training.html#training) for how to train catboost models from a training data set.
## throwIf(x\[, message\[, error_code\]\])
Throw an exception if the argument is non zero.

View File

@ -32,6 +32,12 @@ SET allow_experimental_lightweight_delete = true;
An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
:::warning
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on OLTP system. Ligthweight deletes are currently efficient for wide parts, but for compact parts they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Ligthweight deletes are currently efficient for wide parts, but for compact parts they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
:::
:::note
`DELETE FROM` requires the `ALTER DELETE` privilege:
```sql
grant ALTER DELETE ON db.table to username;
```
:::

View File

@ -6,45 +6,6 @@ sidebar_label: SYSTEM
# SYSTEM Statements
The list of available `SYSTEM` statements:
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [RELOAD MODELS](#query_language-system-reload-models)
- [RELOAD MODEL](#query_language-system-reload-model)
- [RELOAD FUNCTIONS](#query_language-system-reload-functions)
- [RELOAD FUNCTION](#query_language-system-reload-functions)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
- [RELOAD CONFIG](#query_language-system-reload-config)
- [SHUTDOWN](#query_language-system-shutdown)
- [KILL](#query_language-system-kill)
- [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
- [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTORE REPLICA](#query_language-system-restore-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES
Reload all [Internal dictionaries](../../sql-reference/dictionaries/internal-dicts.md).
@ -69,7 +30,12 @@ SELECT name, status FROM system.dictionaries;
## RELOAD MODELS
Reloads all [CatBoost](../../guides/developer/apply-catboost-model.md) models if the configuration was updated without restarting the server.
:::note
This statement and `SYSTEM RELOAD MODEL` merely unload catboost models from the clickhouse-library-bridge. The function `catboostEvaluate()`
loads a model upon first access if it is not loaded yet.
:::
Unloads all CatBoost models.
**Syntax**
@ -79,12 +45,12 @@ SYSTEM RELOAD MODELS [ON CLUSTER cluster_name]
## RELOAD MODEL
Completely reloads a CatBoost model `model_name` if the configuration was updated without restarting the server.
Unloads a CatBoost model at `model_path`.
**Syntax**
```sql
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_name>
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_path>
```
## RELOAD FUNCTIONS

View File

@ -13,7 +13,7 @@ Creates a table from a file. This table function is similar to [url](../../sql-r
**Syntax**
``` sql
file(path, format, structure)
file(path [,format] [,structure])
```
**Parameters**

View File

@ -11,7 +11,7 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws.
**Syntax**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
```
**Arguments**

View File

@ -10,7 +10,7 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel
**Syntax**
``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
```
**Arguments**

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Syntax**
``` sql
url(URL, format, structure)
url(URL [,format] [,structure])
```
**Parameters**

View File

@ -155,7 +155,6 @@ getting_started/index.md getting-started/index.md
getting_started/install.md getting-started/install.md
getting_started/playground.md getting-started/playground.md
getting_started/tutorial.md getting-started/tutorial.md
guides/apply_catboost_model.md guides/apply-catboost-model.md
images/column_oriented.gif images/column-oriented.gif
images/row_oriented.gif images/row-oriented.gif
interfaces/http_interface.md interfaces/http.md

View File

@ -1,241 +0,0 @@
---
slug: /ru/guides/apply-catboost-model
sidebar_position: 41
sidebar_label: "Применение модели CatBoost в ClickHouse"
---
# Применение модели CatBoost в ClickHouse {#applying-catboost-model-in-clickhouse}
[CatBoost](https://catboost.ai) — открытая программная библиотека разработанная компанией [Яндекс](https://yandex.ru/company/) для машинного обучения, которая использует схему градиентного бустинга.
С помощью этой инструкции вы научитесь применять предобученные модели в ClickHouse: в результате вы запустите вывод модели из SQL.
Чтобы применить модель CatBoost в ClickHouse:
1. [Создайте таблицу](#create-table).
2. [Вставьте данные в таблицу](#insert-data-to-table).
3. [Интегрируйте CatBoost в ClickHouse](#integrate-catboost-into-clickhouse) (Опциональный шаг).
4. [Запустите вывод модели из SQL](#run-model-inference).
Подробнее об обучении моделей в CatBoost, см. [Обучение и применение моделей](https://catboost.ai/docs/features/training.html#training).
Вы можете перегрузить модели CatBoost, если их конфигурация была обновлена, без перезагрузки сервера. Для этого используйте системные запросы [RELOAD MODEL](../sql-reference/statements/system.md#query_language-system-reload-model) и [RELOAD MODELS](../sql-reference/statements/system.md#query_language-system-reload-models).
## Перед началом работы {#prerequisites}
Если у вас еще нет [Docker](https://docs.docker.com/install/), установите его.
:::note "Примечание"
[Docker](https://www.docker.com) это программная платформа для создания контейнеров, которые изолируют установку CatBoost и ClickHouse от остальной части системы.
:::
Перед применением модели CatBoost:
**1.** Скачайте [Docker-образ](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) из реестра:
``` bash
$ docker pull yandex/tutorial-catboost-clickhouse
```
Данный Docker-образ содержит все необходимое для запуска CatBoost и ClickHouse: код, среду выполнения, библиотеки, переменные окружения и файлы конфигурации.
**2.** Проверьте, что Docker-образ успешно скачался:
``` bash
$ docker image ls
REPOSITORY TAG IMAGE ID CREATED SIZE
yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB
```
**3.** Запустите Docker-контейнер основанный на данном образе:
``` bash
$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse
```
## 1. Создайте таблицу {#create-table}
Чтобы создать таблицу для обучающей выборки:
**1.** Запустите клиент ClickHouse:
``` bash
$ clickhouse client
```
:::note "Примечание"
Сервер ClickHouse уже запущен внутри Docker-контейнера.
:::
**2.** Создайте таблицу в ClickHouse с помощью следующей команды:
``` sql
:) CREATE TABLE amazon_train
(
date Date MATERIALIZED today(),
ACTION UInt8,
RESOURCE UInt32,
MGR_ID UInt32,
ROLE_ROLLUP_1 UInt32,
ROLE_ROLLUP_2 UInt32,
ROLE_DEPTNAME UInt32,
ROLE_TITLE UInt32,
ROLE_FAMILY_DESC UInt32,
ROLE_FAMILY UInt32,
ROLE_CODE UInt32
)
ENGINE = MergeTree ORDER BY date
```
**3.** Выйдите из клиента ClickHouse:
``` sql
:) exit
```
## 2. Вставьте данные в таблицу {#insert-data-to-table}
Чтобы вставить данные:
**1.** Выполните следующую команду:
``` bash
$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv
```
**2.** Запустите клиент ClickHouse:
``` bash
$ clickhouse client
```
**3.** Проверьте, что данные успешно загрузились:
``` sql
:) SELECT count() FROM amazon_train
SELECT count()
FROM amazon_train
+-count()-+
| 65538 |
+---------+
```
## 3. Интегрируйте CatBoost в ClickHouse {#integrate-catboost-into-clickhouse}
:::note "Примечание"
**Опциональный шаг.** Docker-образ содержит все необходимое для запуска CatBoost и ClickHouse.
:::
Чтобы интегрировать CatBoost в ClickHouse:
**1.** Создайте библиотеку для оценки модели.
Наиболее быстрый способ оценить модель CatBoost — это скомпилировать библиотеку `libcatboostmodel.<so|dll|dylib>`. Подробнее о том, как скомпилировать библиотеку, читайте в [документации CatBoost](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
**2.** Создайте в любом месте новую директорию с произвольным названием, например `data` и поместите в нее созданную библиотеку. Docker-образ уже содержит библиотеку `data/libcatboostmodel.so`.
**3.** Создайте в любом месте новую директорию для конфигурации модели с произвольным названием, например `models`.
**4.** Создайте файл конфигурации модели с произвольным названием, например `models/amazon_model.xml`.
**5.** Опишите конфигурацию модели:
``` xml
<models>
<model>
<!-- Тип модели. В настоящий момент ClickHouse предоставляет только модель catboost. -->
<type>catboost</type>
<!-- Имя модели. -->
<name>amazon</name>
<!-- Путь к обученной модели. -->
<path>/home/catboost/tutorial/catboost_model.bin</path>
<!-- Интервал обновления. -->
<lifetime>0</lifetime>
</model>
</models>
```
**6.** Добавьте в конфигурацию ClickHouse путь к CatBoost и конфигурации модели:
``` xml
<!-- Файл etc/clickhouse-server/config.d/models_config.xml. -->
<catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
<models_config>/home/catboost/models/*_model.xml</models_config>
```
:::note "Примечание"
Вы можете позднее изменить путь к конфигурации модели CatBoost без перезагрузки сервера.
:::
## 4. Запустите вывод модели из SQL {#run-model-inference}
Для тестирования модели запустите клиент ClickHouse `$ clickhouse client`.
Проверьте, что модель работает:
``` sql
:) SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) > 0 AS prediction,
ACTION AS target
FROM amazon_train
LIMIT 10
```
:::note "Примечание"
Функция [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) возвращает кортежи (tuple) с исходными прогнозами по классам для моделей с несколькими классами.
:::
Спрогнозируйте вероятность:
``` sql
:) SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) AS prediction,
1. / (1 + exp(-prediction)) AS probability,
ACTION AS target
FROM amazon_train
LIMIT 10
```
:::note "Примечание"
Подробнее про функцию [exp()](../sql-reference/functions/math-functions.md).
:::
Посчитайте логистическую функцию потерь (LogLoss) на всей выборке:
``` sql
:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss
FROM
(
SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) AS prediction,
1. / (1. + exp(-prediction)) AS prob,
ACTION AS tg
FROM amazon_train
)
```
:::note "Примечание"
Подробнее про функции [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg), [log()](../sql-reference/functions/math-functions.md).
:::

View File

@ -7,5 +7,3 @@ sidebar_label: "Руководства"
# Руководства {#rukovodstva}
Подробные пошаговые инструкции, которые помогут вам решать различные задачи с помощью ClickHouse.
- [Применение модели CatBoost в ClickHouse](apply-catboost-model.md)

View File

@ -6,43 +6,6 @@ sidebar_label: SYSTEM
# Запросы SYSTEM {#query-language-system}
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [RELOAD MODELS](#query_language-system-reload-models)
- [RELOAD MODEL](#query_language-system-reload-model)
- [RELOAD FUNCTIONS](#query_language-system-reload-functions)
- [RELOAD FUNCTION](#query_language-system-reload-functions)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
- [RELOAD CONFIG](#query_language-system-reload-config)
- [SHUTDOWN](#query_language-system-shutdown)
- [KILL](#query_language-system-kill)
- [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
- [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTORE REPLICA](#query_language-system-restore-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
Перегружает все [Встроенные словари](../dictionaries/internal-dicts.md).
По умолчанию встроенные словари выключены.
@ -66,7 +29,12 @@ SELECT name, status FROM system.dictionaries;
## RELOAD MODELS {#query_language-system-reload-models}
Перегружает все модели [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-model-in-clickhouse), если их конфигурация была обновлена, без перезагрузки сервера.
:::note
Это утверждение и `SYSTEM RELOAD MODEL` просто выгружают модели catboost из clickhouse-library-bridge. Функция `catboostEvaluate()`
загружает модель при первом обращении, если она еще не загружена.
:::
Разгрузите все модели CatBoost.
**Синтаксис**
@ -76,12 +44,12 @@ SYSTEM RELOAD MODELS
## RELOAD MODEL {#query_language-system-reload-model}
Полностью перегружает модель [CatBoost](../../guides/apply-catboost-model.md#applying-catboost-model-in-clickhouse) `model_name`, если ее конфигурация была обновлена, без перезагрузки сервера.
Выгружает модель CatBoost по адресу одель_путь`.
**Синтаксис**
```sql
SYSTEM RELOAD MODEL <model_name>
SYSTEM RELOAD MODEL <model_path>
```
## RELOAD FUNCTIONS {#query_language-system-reload-functions}

View File

@ -13,7 +13,7 @@ sidebar_label: file
**Синтаксис**
``` sql
file(path, format, structure)
file(path [,format] [,structure])
```
**Параметры**

View File

@ -11,7 +11,7 @@ sidebar_label: s3
**Синтаксис**
``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression])
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
```
**Aргументы**

View File

@ -11,7 +11,7 @@ sidebar_label: s3Cluster
**Синтаксис**
``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
```
**Аргументы**

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Синтаксис**
``` sql
url(URL, format, structure)
url(URL [,format] [,structure])
```
**Параметры**

View File

@ -1,244 +0,0 @@
---
slug: /zh/guides/apply-catboost-model
sidebar_position: 41
sidebar_label: "\u5E94\u7528CatBoost\u6A21\u578B"
---
# 在ClickHouse中应用Catboost模型 {#applying-catboost-model-in-clickhouse}
[CatBoost](https://catboost.ai) 是一个由[Yandex](https://yandex.com/company/)开发的开源免费机器学习库。
通过本篇文档您将学会如何用SQL语句调用已经存放在Clickhouse中的预训练模型来预测数据。
为了在ClickHouse中应用CatBoost模型需要进行如下步骤
1. [创建数据表](#create-table).
2. [将数据插入到表中](#insert-data-to-table).
3. [将CatBoost集成到ClickHouse中](#integrate-catboost-into-clickhouse) (可跳过)。
4. [从SQL运行模型推断](#run-model-inference).
有关训练CatBoost模型的详细信息请参阅 [训练和模型应用](https://catboost.ai/docs/features/training.html#training).
您可以通过[RELOAD MODEL](https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-reload-model)与[RELOAD MODELS](https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-reload-models)语句来重载CatBoost模型。
## 先决条件 {#prerequisites}
请先安装 [Docker](https://docs.docker.com/install/)。
!!! note "注"
[Docker](https://www.docker.com) 是一个软件平台用户可以用Docker来创建独立于已有系统并集成了CatBoost和ClickHouse的容器。
在应用CatBoost模型之前:
**1.** 从容器仓库拉取示例docker镜像 (https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) :
``` bash
$ docker pull yandex/tutorial-catboost-clickhouse
```
此示例Docker镜像包含运行CatBoost和ClickHouse所需的所有内容代码、运行时、库、环境变量和配置文件。
**2.** 确保已成功拉取Docker镜像:
``` bash
$ docker image ls
REPOSITORY TAG IMAGE ID CREATED SIZE
yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB
```
**3.** 基于此镜像启动一个Docker容器:
``` bash
$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse
```
## 1. 创建数据表 {#create-table}
为训练样本创建ClickHouse表:
**1.** 在交互模式下启动ClickHouse控制台客户端:
``` bash
$ clickhouse client
```
!!! note "注"
ClickHouse服务器已经在Docker容器内运行。
**2.** 使用以下命令创建表:
``` sql
:) CREATE TABLE amazon_train
(
date Date MATERIALIZED today(),
ACTION UInt8,
RESOURCE UInt32,
MGR_ID UInt32,
ROLE_ROLLUP_1 UInt32,
ROLE_ROLLUP_2 UInt32,
ROLE_DEPTNAME UInt32,
ROLE_TITLE UInt32,
ROLE_FAMILY_DESC UInt32,
ROLE_FAMILY UInt32,
ROLE_CODE UInt32
)
ENGINE = MergeTree ORDER BY date
```
**3.** 从ClickHouse控制台客户端退出:
``` sql
:) exit
```
## 2. 将数据插入到表中 {#insert-data-to-table}
插入数据:
**1.** 运行以下命令:
``` bash
$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv
```
**2.** 在交互模式下启动ClickHouse控制台客户端:
``` bash
$ clickhouse client
```
**3.** 确保数据已上传:
``` sql
:) SELECT count() FROM amazon_train
SELECT count()
FROM amazon_train
+-count()-+
| 65538 |
+-------+
```
## 3. 将CatBoost集成到ClickHouse中 {#integrate-catboost-into-clickhouse}
!!! note "注"
**可跳过。** 示例Docker映像已经包含了运行CatBoost和ClickHouse所需的所有内容。
为了将CatBoost集成进ClickHouse需要进行如下步骤
**1.** 构建评估库。
评估CatBoost模型的最快方法是编译 `libcatboostmodel.<so|dll|dylib>` 库文件.
有关如何构建库文件的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
**2.** 创建一个新目录(位置与名称可随意指定), 如 `data` 并将创建的库文件放入其中。 示例Docker镜像已经包含了库 `data/libcatboostmodel.so`.
**3.** 创建一个新目录来放配置模型, 如 `models`.
**4.** 创建一个模型配置文件,如 `models/amazon_model.xml`.
**5.** 修改模型配置:
``` xml
<models>
<model>
<!-- Model type. Now catboost only. -->
<type>catboost</type>
<!-- Model name. -->
<name>amazon</name>
<!-- Path to trained model. -->
<path>/home/catboost/tutorial/catboost_model.bin</path>
<!-- Update interval. -->
<lifetime>0</lifetime>
</model>
</models>
```
**6.** 将CatBoost库文件的路径和模型配置添加到ClickHouse配置:
``` xml
<!-- File etc/clickhouse-server/config.d/models_config.xml. -->
<catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
<models_config>/home/catboost/models/*_model.xml</models_config>
```
## 4. 使用SQL调用预测模型 {#run-model-inference}
为了测试模型是否正常可以使用ClickHouse客户端 `$ clickhouse client`.
让我们确保模型能正常工作:
``` sql
:) SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) > 0 AS prediction,
ACTION AS target
FROM amazon_train
LIMIT 10
```
!!! note "注"
函数 [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) 会对多类别模型返回一个元组,其中包含每一类别的原始预测值。
执行预测:
``` sql
:) SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) AS prediction,
1. / (1 + exp(-prediction)) AS probability,
ACTION AS target
FROM amazon_train
LIMIT 10
```
!!! note "注"
查看函数说明 [exp()](../sql-reference/functions/math-functions.md) 。
让我们计算样本的LogLoss:
``` sql
:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss
FROM
(
SELECT
modelEvaluate('amazon',
RESOURCE,
MGR_ID,
ROLE_ROLLUP_1,
ROLE_ROLLUP_2,
ROLE_DEPTNAME,
ROLE_TITLE,
ROLE_FAMILY_DESC,
ROLE_FAMILY,
ROLE_CODE) AS prediction,
1. / (1. + exp(-prediction)) AS prob,
ACTION AS tg
FROM amazon_train
)
```
!!! note "注"
查看函数说明 [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) 和 [log()](../sql-reference/functions/math-functions.md) 。
[原始文章](https://clickhouse.com/docs/en/guides/apply_catboost_model/) <!--hide-->

View File

@ -9,6 +9,5 @@ sidebar_label: ClickHouse指南
列出了如何使用 Clickhouse 解决各种任务的详细说明:
- [关于简单集群设置的教程](../getting-started/tutorial.md)
- [在ClickHouse中应用CatBoost模型](apply-catboost-model.md)
[原始文章](https://clickhouse.com/docs/en/guides/) <!--hide-->

View File

@ -6,38 +6,6 @@ sidebar_label: SYSTEM
# SYSTEM Queries {#query-language-system}
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
- [RELOAD CONFIG](#query_language-system-reload-config)
- [SHUTDOWN](#query_language-system-shutdown)
- [KILL](#query_language-system-kill)
- [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends)
- [FLUSH DISTRIBUTED](#query_language-system-flush-distributed)
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES\] {#query_language-system-reload-emdedded-dictionaries}
重新加载所有[内置字典](../../sql-reference/dictionaries/internal-dicts.md)。默认情况下内置字典是禁用的。

View File

@ -47,9 +47,10 @@ CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid"
# Some systems lack "flock"
command -v flock >/dev/null && FLOCK=flock
# Override defaults from optional config file
# Override defaults from optional config file and export them automatically
set -a
test -f /etc/default/clickhouse && . /etc/default/clickhouse
set +a
die()
{

View File

@ -54,7 +54,7 @@ else ()
endif ()
if (NOT USE_MUSL)
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Library dictionary source" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to external dynamically loaded libraries" ${ENABLE_CLICKHOUSE_ALL})
endif ()
# https://presentations.clickhouse.com/matemarketing_2020/

View File

@ -446,8 +446,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
fs::path ulimits_file = ulimits_dir / fmt::format("{}.conf", user);
fmt::print("Will set ulimits for {} user in {}.\n", user, ulimits_file.string());
std::string ulimits_content = fmt::format(
"{0}\tsoft\tnofile\t262144\n"
"{0}\thard\tnofile\t262144\n", user);
"{0}\tsoft\tnofile\t1048576\n"
"{0}\thard\tnofile\t1048576\n", user);
fs::create_directories(ulimits_dir);

View File

@ -1,6 +1,8 @@
include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
CatBoostLibraryHandler.cpp
CatBoostLibraryHandlerFactory.cpp
ExternalDictionaryLibraryAPI.cpp
ExternalDictionaryLibraryHandler.cpp
ExternalDictionaryLibraryHandlerFactory.cpp

View File

@ -0,0 +1,49 @@
#pragma once
#include <cstdint>
#include <cstddef>
// Function pointer typedefs and names of libcatboost.so functions used by ClickHouse
struct CatBoostLibraryAPI
{
using ModelCalcerHandle = void;
using ModelCalcerCreateFunc = ModelCalcerHandle * (*)();
static constexpr const char * ModelCalcerCreateName = "ModelCalcerCreate";
using ModelCalcerDeleteFunc = void (*)(ModelCalcerHandle *);
static constexpr const char * ModelCalcerDeleteName = "ModelCalcerDelete";
using GetErrorStringFunc = const char * (*)();
static constexpr const char * GetErrorStringName = "GetErrorString";
using LoadFullModelFromFileFunc = bool (*)(ModelCalcerHandle *, const char *);
static constexpr const char * LoadFullModelFromFileName = "LoadFullModelFromFile";
using CalcModelPredictionFlatFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, double *, size_t);
static constexpr const char * CalcModelPredictionFlatName = "CalcModelPredictionFlat";
using CalcModelPredictionFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, const char ***, size_t, double *, size_t);
static constexpr const char * CalcModelPredictionName = "CalcModelPrediction";
using CalcModelPredictionWithHashedCatFeaturesFunc = bool (*)(ModelCalcerHandle *, size_t, const float **, size_t, const int **, size_t, double *, size_t);
static constexpr const char * CalcModelPredictionWithHashedCatFeaturesName = "CalcModelPredictionWithHashedCatFeatures";
using GetStringCatFeatureHashFunc = int (*)(const char *, size_t);
static constexpr const char * GetStringCatFeatureHashName = "GetStringCatFeatureHash";
using GetIntegerCatFeatureHashFunc = int (*)(uint64_t);
static constexpr const char * GetIntegerCatFeatureHashName = "GetIntegerCatFeatureHash";
using GetFloatFeaturesCountFunc = size_t (*)(ModelCalcerHandle *);
static constexpr const char * GetFloatFeaturesCountName = "GetFloatFeaturesCount";
using GetCatFeaturesCountFunc = size_t (*)(ModelCalcerHandle *);
static constexpr const char * GetCatFeaturesCountName = "GetCatFeaturesCount";
using GetTreeCountFunc = size_t (*)(ModelCalcerHandle *);
static constexpr const char * GetTreeCountName = "GetTreeCount";
using GetDimensionsCountFunc = size_t (*)(ModelCalcerHandle *);
static constexpr const char * GetDimensionsCountName = "GetDimensionsCount";
};

View File

@ -0,0 +1,389 @@
#include "CatBoostLibraryHandler.h"
#include <Columns/ColumnTuple.h>
#include <Common/FieldVisitorConvertToNumber.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_APPLY_CATBOOST_MODEL;
extern const int CANNOT_LOAD_CATBOOST_MODEL;
extern const int LOGICAL_ERROR;
}
CatBoostLibraryHandler::APIHolder::APIHolder(SharedLibrary & lib)
{
ModelCalcerCreate = lib.get<CatBoostLibraryAPI::ModelCalcerCreateFunc>(CatBoostLibraryAPI::ModelCalcerCreateName);
ModelCalcerDelete = lib.get<CatBoostLibraryAPI::ModelCalcerDeleteFunc>(CatBoostLibraryAPI::ModelCalcerDeleteName);
GetErrorString = lib.get<CatBoostLibraryAPI::GetErrorStringFunc>(CatBoostLibraryAPI::GetErrorStringName);
LoadFullModelFromFile = lib.get<CatBoostLibraryAPI::LoadFullModelFromFileFunc>(CatBoostLibraryAPI::LoadFullModelFromFileName);
CalcModelPredictionFlat = lib.get<CatBoostLibraryAPI::CalcModelPredictionFlatFunc>(CatBoostLibraryAPI::CalcModelPredictionFlatName);
CalcModelPrediction = lib.get<CatBoostLibraryAPI::CalcModelPredictionFunc>(CatBoostLibraryAPI::CalcModelPredictionName);
CalcModelPredictionWithHashedCatFeatures = lib.get<CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesFunc>(CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesName);
GetStringCatFeatureHash = lib.get<CatBoostLibraryAPI::GetStringCatFeatureHashFunc>(CatBoostLibraryAPI::GetStringCatFeatureHashName);
GetIntegerCatFeatureHash = lib.get<CatBoostLibraryAPI::GetIntegerCatFeatureHashFunc>(CatBoostLibraryAPI::GetIntegerCatFeatureHashName);
GetFloatFeaturesCount = lib.get<CatBoostLibraryAPI::GetFloatFeaturesCountFunc>(CatBoostLibraryAPI::GetFloatFeaturesCountName);
GetCatFeaturesCount = lib.get<CatBoostLibraryAPI::GetCatFeaturesCountFunc>(CatBoostLibraryAPI::GetCatFeaturesCountName);
GetTreeCount = lib.tryGet<CatBoostLibraryAPI::GetTreeCountFunc>(CatBoostLibraryAPI::GetTreeCountName);
GetDimensionsCount = lib.tryGet<CatBoostLibraryAPI::GetDimensionsCountFunc>(CatBoostLibraryAPI::GetDimensionsCountName);
}
CatBoostLibraryHandler::CatBoostLibraryHandler(
const std::string & library_path,
const std::string & model_path)
: loading_start_time(std::chrono::system_clock::now())
, library(std::make_shared<SharedLibrary>(library_path))
, api(*library)
{
model_calcer_handle = api.ModelCalcerCreate();
if (!api.LoadFullModelFromFile(model_calcer_handle, model_path.c_str()))
{
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
"Cannot load CatBoost model: {}", api.GetErrorString());
}
float_features_count = api.GetFloatFeaturesCount(model_calcer_handle);
cat_features_count = api.GetCatFeaturesCount(model_calcer_handle);
tree_count = 1;
if (api.GetDimensionsCount)
tree_count = api.GetDimensionsCount(model_calcer_handle);
loading_duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - loading_start_time);
}
CatBoostLibraryHandler::~CatBoostLibraryHandler()
{
api.ModelCalcerDelete(model_calcer_handle);
}
std::chrono::system_clock::time_point CatBoostLibraryHandler::getLoadingStartTime() const
{
return loading_start_time;
}
std::chrono::milliseconds CatBoostLibraryHandler::getLoadingDuration() const
{
return loading_duration;
}
namespace
{
/// Buffer should be allocated with features_count * column->size() elements.
/// Place column elements in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
template <typename T>
void placeColumnAsNumber(const IColumn * column, T * buffer, size_t features_count)
{
size_t size = column->size();
FieldVisitorConvertToNumber<T> visitor;
for (size_t i = 0; i < size; ++i)
{
/// TODO: Replace with column visitor.
Field field;
column->get(i, field);
*buffer = applyVisitor(visitor, field);
buffer += features_count;
}
}
/// Buffer should be allocated with features_count * column->size() elements.
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
void placeStringColumn(const ColumnString & column, const char ** buffer, size_t features_count)
{
size_t size = column.size();
for (size_t i = 0; i < size; ++i)
{
*buffer = const_cast<char *>(column.getDataAtWithTerminatingZero(i).data);
buffer += features_count;
}
}
/// Buffer should be allocated with features_count * column->size() elements.
/// Place string pointers in positions buffer[0], buffer[features_count], ... , buffer[size * features_count]
/// Returns PODArray which holds data (because ColumnFixedString doesn't store terminating zero).
PODArray<char> placeFixedStringColumn(const ColumnFixedString & column, const char ** buffer, size_t features_count)
{
size_t size = column.size();
size_t str_size = column.getN();
PODArray<char> data(size * (str_size + 1));
char * data_ptr = data.data();
for (size_t i = 0; i < size; ++i)
{
auto ref = column.getDataAt(i);
memcpy(data_ptr, ref.data, ref.size);
data_ptr[ref.size] = 0;
*buffer = data_ptr;
data_ptr += ref.size + 1;
buffer += features_count;
}
return data;
}
/// Place columns into buffer, returns column which holds placed data. Buffer should contains column->size() values.
template <typename T>
ColumnPtr placeNumericColumns(const ColumnRawPtrs & columns, size_t offset, size_t size, const T** buffer)
{
if (size == 0)
return nullptr;
size_t column_size = columns[offset]->size();
auto data_column = ColumnVector<T>::create(size * column_size);
T * data = data_column->getData().data();
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (column->isNumeric())
placeColumnAsNumber(column, data + i, size);
}
for (size_t i = 0; i < column_size; ++i)
{
*buffer = data;
++buffer;
data += size;
}
return data_column;
}
/// Place columns into buffer, returns data which was used for fixed string columns.
/// Buffer should contains column->size() values, each value contains size strings.
std::vector<PODArray<char>> placeStringColumns(const ColumnRawPtrs & columns, size_t offset, size_t size, const char ** buffer)
{
if (size == 0)
return {};
std::vector<PODArray<char>> data;
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
placeStringColumn(*column_string, buffer + i, size);
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
data.push_back(placeFixedStringColumn(*column_fixed_string, buffer + i, size));
else
throw Exception("Cannot place string column.", ErrorCodes::LOGICAL_ERROR);
}
return data;
}
/// buffer[column_size * cat_features_count] -> char * => cat_features[column_size][cat_features_count] -> char *
void fillCatFeaturesBuffer(
const char *** cat_features, const char ** buffer,
size_t column_size, size_t cat_features_count)
{
for (size_t i = 0; i < column_size; ++i)
{
*cat_features = buffer;
++cat_features;
buffer += cat_features_count;
}
}
/// Calc hash for string cat feature at ps positions.
template <typename Column>
void calcStringHashes(const Column * column, size_t ps, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
{
size_t column_size = column->size();
for (size_t j = 0; j < column_size; ++j)
{
auto ref = column->getDataAt(j);
const_cast<int *>(*buffer)[ps] = api.GetStringCatFeatureHash(ref.data, ref.size);
++buffer;
}
}
/// Calc hash for int cat feature at ps position. Buffer at positions ps should contains unhashed values.
void calcIntHashes(size_t column_size, size_t ps, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
{
for (size_t j = 0; j < column_size; ++j)
{
const_cast<int *>(*buffer)[ps] = api.GetIntegerCatFeatureHash((*buffer)[ps]);
++buffer;
}
}
/// buffer contains column->size() rows and size columns.
/// For int cat features calc hash inplace.
/// For string cat features calc hash from column rows.
void calcHashes(const ColumnRawPtrs & columns, size_t offset, size_t size, const int ** buffer, const CatBoostLibraryHandler::APIHolder & api)
{
if (size == 0)
return;
size_t column_size = columns[offset]->size();
std::vector<PODArray<char>> data;
for (size_t i = 0; i < size; ++i)
{
const auto * column = columns[offset + i];
if (const auto * column_string = typeid_cast<const ColumnString *>(column))
calcStringHashes(column_string, i, buffer, api);
else if (const auto * column_fixed_string = typeid_cast<const ColumnFixedString *>(column))
calcStringHashes(column_fixed_string, i, buffer, api);
else
calcIntHashes(column_size, i, buffer, api);
}
}
}
/// Convert values to row-oriented format and call evaluation function from CatBoost wrapper api.
/// * CalcModelPredictionFlat if no cat features
/// * CalcModelPrediction if all cat features are strings
/// * CalcModelPredictionWithHashedCatFeatures if has int cat features.
ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
const ColumnRawPtrs & columns,
bool cat_features_are_strings) const
{
std::string error_msg = "Error occurred while applying CatBoost model: ";
size_t column_size = columns.front()->size();
auto result = ColumnFloat64::create(column_size * tree_count);
auto * result_buf = result->getData().data();
if (!column_size)
return result;
/// Prepare float features.
PODArray<const float *> float_features(column_size);
auto * float_features_buf = float_features.data();
/// Store all float data into single column. float_features is a list of pointers to it.
auto float_features_col = placeNumericColumns<float>(columns, 0, float_features_count, float_features_buf);
if (cat_features_count == 0)
{
if (!api.CalcModelPredictionFlat(model_calcer_handle, column_size,
float_features_buf, float_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
return result;
}
/// Prepare cat features.
if (cat_features_are_strings)
{
/// cat_features_holder stores pointers to ColumnString data or fixed_strings_data.
PODArray<const char *> cat_features_holder(cat_features_count * column_size);
PODArray<const char **> cat_features(column_size);
auto * cat_features_buf = cat_features.data();
fillCatFeaturesBuffer(cat_features_buf, cat_features_holder.data(), column_size, cat_features_count);
/// Fixed strings are stored without termination zero, so have to copy data into fixed_strings_data.
auto fixed_strings_data = placeStringColumns(columns, float_features_count,
cat_features_count, cat_features_holder.data());
if (!api.CalcModelPrediction(model_calcer_handle, column_size,
float_features_buf, float_features_count,
cat_features_buf, cat_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
}
else
{
PODArray<const int *> cat_features(column_size);
auto * cat_features_buf = cat_features.data();
auto cat_features_col = placeNumericColumns<int>(columns, float_features_count,
cat_features_count, cat_features_buf);
calcHashes(columns, float_features_count, cat_features_count, cat_features_buf, api);
if (!api.CalcModelPredictionWithHashedCatFeatures(
model_calcer_handle, column_size,
float_features_buf, float_features_count,
cat_features_buf, cat_features_count,
result_buf, column_size * tree_count))
{
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
}
}
return result;
}
size_t CatBoostLibraryHandler::getTreeCount() const
{
std::lock_guard lock(mutex);
return tree_count;
}
ColumnPtr CatBoostLibraryHandler::evaluate(const ColumnRawPtrs & columns) const
{
std::lock_guard lock(mutex);
if (columns.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model.");
if (columns.size() != float_features_count + cat_features_count)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
columns.size(),
float_features_count,
cat_features_count);
for (size_t i = 0; i < float_features_count; ++i)
{
if (!columns[i]->isNumeric())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i);
}
}
bool cat_features_are_strings = true;
for (size_t i = float_features_count; i < float_features_count + cat_features_count; ++i)
{
const auto * column = columns[i];
if (column->isNumeric())
{
cat_features_are_strings = false;
}
else if (!(typeid_cast<const ColumnString *>(column)
|| typeid_cast<const ColumnFixedString *>(column)))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i);
}
}
auto result = evalImpl(columns, cat_features_are_strings);
if (tree_count == 1)
return result;
size_t column_size = columns.front()->size();
auto * result_buf = result->getData().data();
/// Multiple trees case. Copy data to several columns.
MutableColumns mutable_columns(tree_count);
std::vector<Float64 *> column_ptrs(tree_count);
for (size_t i = 0; i < tree_count; ++i)
{
auto col = ColumnFloat64::create(column_size);
column_ptrs[i] = col->getData().data();
mutable_columns[i] = std::move(col);
}
Float64 * data = result_buf;
for (size_t row = 0; row < column_size; ++row)
{
for (size_t i = 0; i < tree_count; ++i)
{
*column_ptrs[i] = *data;
++column_ptrs[i];
++data;
}
}
return ColumnTuple::create(std::move(mutable_columns));
}
}

View File

@ -0,0 +1,78 @@
#pragma once
#include "CatBoostLibraryAPI.h"
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Common/SharedLibrary.h>
#include <base/defines.h>
#include <chrono>
#include <mutex>
namespace DB
{
/// Abstracts access to the CatBoost shared library.
class CatBoostLibraryHandler
{
public:
/// Holds pointers to CatBoost library functions
struct APIHolder
{
explicit APIHolder(SharedLibrary & lib);
// NOLINTBEGIN(readability-identifier-naming)
CatBoostLibraryAPI::ModelCalcerCreateFunc ModelCalcerCreate;
CatBoostLibraryAPI::ModelCalcerDeleteFunc ModelCalcerDelete;
CatBoostLibraryAPI::GetErrorStringFunc GetErrorString;
CatBoostLibraryAPI::LoadFullModelFromFileFunc LoadFullModelFromFile;
CatBoostLibraryAPI::CalcModelPredictionFlatFunc CalcModelPredictionFlat;
CatBoostLibraryAPI::CalcModelPredictionFunc CalcModelPrediction;
CatBoostLibraryAPI::CalcModelPredictionWithHashedCatFeaturesFunc CalcModelPredictionWithHashedCatFeatures;
CatBoostLibraryAPI::GetStringCatFeatureHashFunc GetStringCatFeatureHash;
CatBoostLibraryAPI::GetIntegerCatFeatureHashFunc GetIntegerCatFeatureHash;
CatBoostLibraryAPI::GetFloatFeaturesCountFunc GetFloatFeaturesCount;
CatBoostLibraryAPI::GetCatFeaturesCountFunc GetCatFeaturesCount;
CatBoostLibraryAPI::GetTreeCountFunc GetTreeCount;
CatBoostLibraryAPI::GetDimensionsCountFunc GetDimensionsCount;
// NOLINTEND(readability-identifier-naming)
};
CatBoostLibraryHandler(
const String & library_path,
const String & model_path);
~CatBoostLibraryHandler();
std::chrono::system_clock::time_point getLoadingStartTime() const;
std::chrono::milliseconds getLoadingDuration() const;
size_t getTreeCount() const;
ColumnPtr evaluate(const ColumnRawPtrs & columns) const;
private:
std::chrono::system_clock::time_point loading_start_time;
std::chrono::milliseconds loading_duration;
const SharedLibraryPtr library;
const APIHolder api;
mutable std::mutex mutex;
CatBoostLibraryAPI::ModelCalcerHandle * model_calcer_handle TSA_GUARDED_BY(mutex) TSA_PT_GUARDED_BY(mutex);
size_t float_features_count TSA_GUARDED_BY(mutex);
size_t cat_features_count TSA_GUARDED_BY(mutex);
size_t tree_count TSA_GUARDED_BY(mutex);
ColumnFloat64::MutablePtr evalImpl(const ColumnRawPtrs & columns, bool cat_features_are_strings) const TSA_REQUIRES(mutex);
};
using CatBoostLibraryHandlerPtr = std::shared_ptr<CatBoostLibraryHandler>;
}

View File

@ -0,0 +1,80 @@
#include "CatBoostLibraryHandlerFactory.h"
#include <Common/logger_useful.h>
namespace DB
{
CatBoostLibraryHandlerFactory & CatBoostLibraryHandlerFactory::instance()
{
static CatBoostLibraryHandlerFactory instance;
return instance;
}
CatBoostLibraryHandlerFactory::CatBoostLibraryHandlerFactory()
: log(&Poco::Logger::get("CatBoostLibraryHandlerFactory"))
{
}
CatBoostLibraryHandlerPtr CatBoostLibraryHandlerFactory::tryGetModel(const String & model_path, const String & library_path, bool create_if_not_found)
{
std::lock_guard lock(mutex);
auto handler = library_handlers.find(model_path);
bool found = (handler != library_handlers.end());
if (found)
return handler->second;
else
{
if (create_if_not_found)
{
auto new_handler = std::make_shared<CatBoostLibraryHandler>(library_path, model_path);
library_handlers.emplace(model_path, new_handler);
LOG_DEBUG(log, "Loaded catboost library handler for model path '{}'", model_path);
return new_handler;
}
return nullptr;
}
}
void CatBoostLibraryHandlerFactory::removeModel(const String & model_path)
{
std::lock_guard lock(mutex);
bool deleted = library_handlers.erase(model_path);
if (!deleted)
{
LOG_DEBUG(log, "Cannot unload catboost library handler for model path '{}'", model_path);
return;
}
LOG_DEBUG(log, "Unloaded catboost library handler for model path '{}'", model_path);
}
void CatBoostLibraryHandlerFactory::removeAllModels()
{
std::lock_guard lock(mutex);
library_handlers.clear();
LOG_DEBUG(log, "Unloaded all catboost library handlers");
}
ExternalModelInfos CatBoostLibraryHandlerFactory::getModelInfos()
{
std::lock_guard lock(mutex);
ExternalModelInfos result;
for (const auto & handler : library_handlers)
result.push_back({
.model_path = handler.first,
.model_type = "catboost",
.loading_start_time = handler.second->getLoadingStartTime(),
.loading_duration = handler.second->getLoadingDuration()
});
return result;
}
}

View File

@ -0,0 +1,37 @@
#pragma once
#include "CatBoostLibraryHandler.h"
#include <base/defines.h>
#include <Common/ExternalModelInfo.h>
#include <chrono>
#include <mutex>
#include <unordered_map>
namespace DB
{
class CatBoostLibraryHandlerFactory final : private boost::noncopyable
{
public:
static CatBoostLibraryHandlerFactory & instance();
CatBoostLibraryHandlerFactory();
CatBoostLibraryHandlerPtr tryGetModel(const String & model_path, const String & library_path, bool create_if_not_found);
void removeModel(const String & model_path);
void removeAllModels();
ExternalModelInfos getModelInfos();
private:
/// map: model path --> catboost library handler
std::unordered_map<String, CatBoostLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
std::mutex mutex;
Poco::Logger * log;
};
}

View File

@ -50,6 +50,6 @@ private:
void * lib_data;
};
using SharedLibraryHandlerPtr = std::shared_ptr<ExternalDictionaryLibraryHandler>;
using ExternalDictionaryLibraryHandlerPtr = std::shared_ptr<ExternalDictionaryLibraryHandler>;
}

View File

@ -1,37 +1,40 @@
#include "ExternalDictionaryLibraryHandlerFactory.h"
#include <Common/logger_useful.h>
namespace DB
{
SharedLibraryHandlerPtr ExternalDictionaryLibraryHandlerFactory::get(const std::string & dictionary_id)
ExternalDictionaryLibraryHandlerPtr ExternalDictionaryLibraryHandlerFactory::get(const String & dictionary_id)
{
std::lock_guard lock(mutex);
auto library_handler = library_handlers.find(dictionary_id);
if (library_handler != library_handlers.end())
return library_handler->second;
if (auto handler = library_handlers.find(dictionary_id); handler != library_handlers.end())
return handler->second;
return nullptr;
}
void ExternalDictionaryLibraryHandlerFactory::create(
const std::string & dictionary_id,
const std::string & library_path,
const std::vector<std::string> & library_settings,
const String & dictionary_id,
const String & library_path,
const std::vector<String> & library_settings,
const Block & sample_block,
const std::vector<std::string> & attributes_names)
const std::vector<String> & attributes_names)
{
std::lock_guard lock(mutex);
if (!library_handlers.contains(dictionary_id))
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<ExternalDictionaryLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
else
if (library_handlers.contains(dictionary_id))
{
LOG_WARNING(&Poco::Logger::get("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
return;
}
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<ExternalDictionaryLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
}
bool ExternalDictionaryLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
bool ExternalDictionaryLibraryHandlerFactory::clone(const String & from_dictionary_id, const String & to_dictionary_id)
{
std::lock_guard lock(mutex);
auto from_library_handler = library_handlers.find(from_dictionary_id);
@ -45,7 +48,7 @@ bool ExternalDictionaryLibraryHandlerFactory::clone(const std::string & from_dic
}
bool ExternalDictionaryLibraryHandlerFactory::remove(const std::string & dictionary_id)
bool ExternalDictionaryLibraryHandlerFactory::remove(const String & dictionary_id)
{
std::lock_guard lock(mutex);
/// extDict_libDelete is called in destructor.

View File

@ -17,22 +17,22 @@ class ExternalDictionaryLibraryHandlerFactory final : private boost::noncopyable
public:
static ExternalDictionaryLibraryHandlerFactory & instance();
SharedLibraryHandlerPtr get(const std::string & dictionary_id);
ExternalDictionaryLibraryHandlerPtr get(const String & dictionary_id);
void create(
const std::string & dictionary_id,
const std::string & library_path,
const std::vector<std::string> & library_settings,
const String & dictionary_id,
const String & library_path,
const std::vector<String> & library_settings,
const Block & sample_block,
const std::vector<std::string> & attributes_names);
const std::vector<String> & attributes_names);
bool clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id);
bool clone(const String & from_dictionary_id, const String & to_dictionary_id);
bool remove(const std::string & dictionary_id);
bool remove(const String & dictionary_id);
private:
/// map: dict_id -> sharedLibraryHandler
std::unordered_map<std::string, SharedLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
std::unordered_map<String, ExternalDictionaryLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
std::mutex mutex;
};

View File

@ -27,12 +27,16 @@ std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHa
{
if (uri.getPath() == "/extdict_ping")
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
else if (uri.getPath() == "/catboost_ping")
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
}
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
{
if (uri.getPath() == "/extdict_request")
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
else if (uri.getPath() == "/catboost_request")
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
}
return nullptr;

View File

@ -1,24 +1,32 @@
#include "LibraryBridgeHandlers.h"
#include "CatBoostLibraryHandler.h"
#include "CatBoostLibraryHandlerFactory.h"
#include "ExternalDictionaryLibraryHandler.h"
#include "ExternalDictionaryLibraryHandlerFactory.h"
#include <Formats/FormatFactory.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <Common/BridgeProtocolVersion.h>
#include <IO/WriteHelpers.h>
#include <Poco/Net/HTMLForm.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/Net/HTMLForm.h>
#include <Poco/ThreadPool.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Formats/IInputFormat.h>
#include <QueryPipeline/QueryPipeline.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Formats/IInputFormat.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <QueryPipeline/Pipe.h>
#include <QueryPipeline/QueryPipeline.h>
#include <Server/HTTP/HTMLForm.h>
#include <IO/ReadBufferFromString.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <Formats/NativeReader.h>
#include <Formats/NativeWriter.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
namespace DB
@ -31,7 +39,7 @@ namespace ErrorCodes
namespace
{
void processError(HTTPServerResponse & response, const std::string & message)
void processError(HTTPServerResponse & response, const String & message)
{
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
@ -41,7 +49,7 @@ namespace
LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message));
}
std::shared_ptr<Block> parseColumns(std::string && column_string)
std::shared_ptr<Block> parseColumns(String && column_string)
{
auto sample_block = std::make_shared<Block>();
auto names_and_types = NamesAndTypesList::parse(column_string);
@ -59,10 +67,10 @@ namespace
return ids;
}
std::vector<std::string> parseNamesFromBinary(const std::string & names_string)
std::vector<String> parseNamesFromBinary(const String & names_string)
{
ReadBufferFromString buf(names_string);
std::vector<std::string> names;
std::vector<String> names;
readVectorBinary(names, buf);
return names;
}
@ -79,13 +87,15 @@ static void writeData(Block data, OutputFormatPtr format)
executor.execute();
}
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler"))
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler"))
{
}
void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
@ -97,7 +107,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
version = 0; /// assumed version for too old servers which do not send a version
else
{
String version_str = params.get("version");
const String & version_str = params.get("version");
if (!tryParse(version, version_str))
{
processError(response, "Unable to parse 'version' string in request URL: '" + version_str + "' Check if the server and library-bridge have the same version.");
@ -124,8 +134,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
std::string method = params.get("method");
std::string dictionary_id = params.get("dictionary_id");
const String & method = params.get("method");
const String & dictionary_id = params.get("dictionary_id");
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
@ -141,7 +151,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
std::string from_dictionary_id = params.get("from_dictionary_id");
const String & from_dictionary_id = params.get("from_dictionary_id");
bool cloned = false;
cloned = ExternalDictionaryLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
@ -166,7 +176,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
std::string library_path = params.get("library_path");
const String & library_path = params.get("library_path");
if (!params.has("library_settings"))
{
@ -174,10 +184,10 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
const auto & settings_string = params.get("library_settings");
const String & settings_string = params.get("library_settings");
LOG_DEBUG(log, "Parsing library settings from binary string");
std::vector<std::string> library_settings = parseNamesFromBinary(settings_string);
std::vector<String> library_settings = parseNamesFromBinary(settings_string);
/// Needed for library dictionary
if (!params.has("attributes_names"))
@ -186,10 +196,10 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
const auto & attributes_string = params.get("attributes_names");
const String & attributes_string = params.get("attributes_names");
LOG_DEBUG(log, "Parsing attributes names from binary string");
std::vector<std::string> attributes_names = parseNamesFromBinary(attributes_string);
std::vector<String> attributes_names = parseNamesFromBinary(attributes_string);
/// Needed to parse block from binary string format
if (!params.has("sample_block"))
@ -197,7 +207,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
processError(response, "No 'sample_block' in request URL");
return;
}
std::string sample_block_string = params.get("sample_block");
String sample_block_string = params.get("sample_block");
std::shared_ptr<Block> sample_block;
try
@ -297,7 +307,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
return;
}
std::string requested_block_string = params.get("requested_block_sample");
String requested_block_string = params.get("requested_block_sample");
std::shared_ptr<Block> requested_sample_block;
try
@ -332,7 +342,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
}
else
{
LOG_WARNING(log, "Unknown library method: '{}'", method);
processError(response, "Unknown library method '" + method + "'");
LOG_ERROR(log, "Unknown library method: '{}'", method);
}
}
catch (...)
@ -362,6 +373,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
}
}
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
@ -369,6 +381,7 @@ ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExi
{
}
void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
try
@ -382,7 +395,7 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
return;
}
std::string dictionary_id = params.get("dictionary_id");
const String & dictionary_id = params.get("dictionary_id");
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
@ -399,4 +412,230 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
}
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("CatBoostLibraryBridgeRequestHandler"))
{
}
void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
size_t version;
if (!params.has("version"))
version = 0; /// assumed version for too old servers which do not send a version
else
{
const String & version_str = params.get("version");
if (!tryParse(version, version_str))
{
processError(response, "Unable to parse 'version' string in request URL: '" + version_str + "' Check if the server and library-bridge have the same version.");
return;
}
}
if (version != LIBRARY_BRIDGE_PROTOCOL_VERSION)
{
/// backwards compatibility is considered unnecessary for now, just let the user know that the server and the bridge must be upgraded together
processError(response, "Server and library-bridge have different versions: '" + std::to_string(version) + "' vs. '" + std::to_string(LIBRARY_BRIDGE_PROTOCOL_VERSION) + "'");
return;
}
if (!params.has("method"))
{
processError(response, "No 'method' in request URL");
return;
}
const String & method = params.get("method");
LOG_TRACE(log, "Library method: '{}'", method);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try
{
if (method == "catboost_list")
{
ExternalModelInfos model_infos = CatBoostLibraryHandlerFactory::instance().getModelInfos();
writeIntBinary(static_cast<UInt64>(model_infos.size()), out);
for (const auto & info : model_infos)
{
writeStringBinary(info.model_path, out);
writeStringBinary(info.model_type, out);
UInt64 t = std::chrono::system_clock::to_time_t(info.loading_start_time);
writeIntBinary(t, out);
t = info.loading_duration.count();
writeIntBinary(t, out);
}
}
else if (method == "catboost_removeModel")
{
auto & read_buf = request.getStream();
params.read(read_buf);
if (!params.has("model_path"))
{
processError(response, "No 'model_path' in request URL");
return;
}
const String & model_path = params.get("model_path");
CatBoostLibraryHandlerFactory::instance().removeModel(model_path);
String res = "1";
writeStringBinary(res, out);
}
else if (method == "catboost_removeAllModels")
{
CatBoostLibraryHandlerFactory::instance().removeAllModels();
String res = "1";
writeStringBinary(res, out);
}
else if (method == "catboost_GetTreeCount")
{
auto & read_buf = request.getStream();
params.read(read_buf);
if (!params.has("library_path"))
{
processError(response, "No 'library_path' in request URL");
return;
}
const String & library_path = params.get("library_path");
if (!params.has("model_path"))
{
processError(response, "No 'model_path' in request URL");
return;
}
const String & model_path = params.get("model_path");
auto catboost_handler = CatBoostLibraryHandlerFactory::instance().tryGetModel(model_path, library_path, /*create_if_not_found*/ true);
size_t tree_count = catboost_handler->getTreeCount();
writeIntBinary(tree_count, out);
}
else if (method == "catboost_libEvaluate")
{
auto & read_buf = request.getStream();
params.read(read_buf);
if (!params.has("model_path"))
{
processError(response, "No 'model_path' in request URL");
return;
}
const String & model_path = params.get("model_path");
if (!params.has("data"))
{
processError(response, "No 'data' in request URL");
return;
}
const String & data = params.get("data");
ReadBufferFromString string_read_buf(data);
NativeReader deserializer(string_read_buf, /*server_revision*/ 0);
Block block_read = deserializer.read();
Columns col_ptrs = block_read.getColumns();
ColumnRawPtrs col_raw_ptrs;
for (const auto & p : col_ptrs)
col_raw_ptrs.push_back(&*p);
auto catboost_handler = CatBoostLibraryHandlerFactory::instance().tryGetModel(model_path, "DummyLibraryPath", /*create_if_not_found*/ false);
if (!catboost_handler)
{
processError(response, "CatBoost library is not loaded for model '" + model_path + "'. Please try again.");
return;
}
ColumnPtr res_col = catboost_handler->evaluate(col_raw_ptrs);
DataTypePtr res_col_type = std::make_shared<DataTypeFloat64>();
String res_col_name = "res_col";
ColumnsWithTypeAndName res_cols_with_type_and_name = {{res_col, res_col_type, res_col_name}};
Block block_write(res_cols_with_type_and_name);
NativeWriter serializer{out, /*client_revision*/ 0, block_write};
serializer.write(block_write);
}
else
{
processError(response, "Unknown library method '" + method + "'");
LOG_ERROR(log, "Unknown library method: '{}'", method);
}
}
catch (...)
{
auto message = getCurrentExceptionMessage(true);
LOG_ERROR(log, "Failed to process request. Error: {}", message);
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR, message); // can't call process_error, because of too soon response sending
try
{
writeStringBinary(message, out);
out.finalize();
}
catch (...)
{
tryLogCurrentException(log);
}
}
try
{
out.finalize();
}
catch (...)
{
tryLogCurrentException(log);
}
}
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("CatBoostLibraryBridgeExistsHandler"))
{
}
void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
try
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
String res = "1";
setResponseDefaultHeaders(response, keep_alive_timeout);
LOG_TRACE(log, "Sending ping response: {}", res);
response.sendBuffer(res.data(), res.size());
}
catch (...)
{
tryLogCurrentException("PingHandler");
}
}
}

View File

@ -1,9 +1,8 @@
#pragma once
#include <Common/logger_useful.h>
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Common/logger_useful.h>
#include "ExternalDictionaryLibraryHandler.h"
namespace DB
@ -26,11 +25,12 @@ public:
private:
static constexpr inline auto FORMAT = "RowBinary";
const size_t keep_alive_timeout;
Poco::Logger * log;
size_t keep_alive_timeout;
};
// Handler for checking if the external dictionary library is loaded (used for handshake)
class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
{
public:
@ -43,4 +43,47 @@ private:
Poco::Logger * log;
};
/// Handler for requests to catboost library. The call protocol is as follows:
/// (1) Send a "catboost_GetTreeCount" request from the server to the bridge. It contains a library path (e.g /home/user/libcatboost.so) and
/// a model path (e.g. /home/user/model.bin). This loads the catboost library handler associated with the model path, then executes
/// GetTreeCount() on the library handler and sends the result back to the server.
/// (2) Send "catboost_Evaluate" from the server to the bridge. It contains a model path and the features to run the interference on. Step
/// (2) is called multiple times (once per chunk) by the server.
///
/// We would ideally like to have steps (1) and (2) in one atomic handler but can't because the evaluation on the server side is divided
/// into two dependent phases: FunctionCatBoostEvaluate::getReturnTypeImpl() and ::executeImpl(). So the model may in principle be unloaded
/// from the library-bridge between steps (1) and (2). Step (2) checks if that is the case and fails gracefully. This is okay because that
/// situation considered exceptional and rare.
///
/// An update of a model is performed by unloading it. The first call to "catboost_GetTreeCount" brings it into memory again.
///
/// Further handlers are provided for unloading a specific model, for unloading all models or for retrieving information about the loaded
/// models for display in a system view.
class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
{
public:
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
private:
const size_t keep_alive_timeout;
Poco::Logger * log;
};
// Handler for pinging the library-bridge for catboost access (used for handshake)
class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
{
public:
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
private:
const size_t keep_alive_timeout;
Poco::Logger * log;
};
}

View File

@ -51,7 +51,6 @@
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/ExternalModelsLoader.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
@ -551,8 +550,9 @@ static void sanityChecks(Server & server)
try
{
const char * filename = "/sys/devices/system/clocksource/clocksource0/current_clocksource";
if (readString(filename).find("tsc") == std::string::npos)
server.context()->addWarningMessage("Linux is not using a fast TSC clock source. Performance can be degraded. Check " + String(filename));
String clocksource = readString(filename);
if (clocksource.find("tsc") == std::string::npos && clocksource.find("kvm-clock") == std::string::npos)
server.context()->addWarningMessage("Linux is not using a fast clock source. Performance can be degraded. Check " + String(filename));
}
catch (...)
{
@ -1157,7 +1157,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setExternalAuthenticatorsConfig(*config);
global_context->loadOrReloadDictionaries(*config);
global_context->loadOrReloadModels(*config);
global_context->loadOrReloadUserDefinedExecutableFunctions(*config);
global_context->setRemoteHostFilter(*config);
@ -1738,17 +1737,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
throw;
}
/// try to load models immediately, throw on error and die
try
{
global_context->loadOrReloadModels(config());
}
catch (...)
{
tryLogCurrentException(log, "Caught exception while loading dictionaries.");
throw;
}
/// try to load user defined executable functions, throw on error and die
try
{

View File

@ -278,6 +278,71 @@ public:
}
}
void addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t) const final
{
std::unique_ptr<UInt8[]> final_null_flags = std::make_unique<UInt8[]>(row_end);
const size_t filter_column_num = number_of_arguments - 1;
if (is_nullable[filter_column_num])
{
const ColumnNullable * nullable_column = assert_cast<const ColumnNullable *>(columns[filter_column_num]);
const IColumn & filter_column = nullable_column->getNestedColumn();
const UInt8 * filter_null_map = nullable_column->getNullMapColumn().getData().data();
const UInt8 * filter_values = assert_cast<const ColumnUInt8 &>(filter_column).getData().data();
for (size_t i = row_begin; i < row_end; i++)
{
final_null_flags[i] = (null_is_skipped && filter_null_map[i]) || !filter_values[i];
}
}
else
{
const IColumn * filter_column = columns[filter_column_num];
const UInt8 * filter_values = assert_cast<const ColumnUInt8 *>(filter_column)->getData().data();
for (size_t i = row_begin; i < row_end; i++)
final_null_flags[i] = !filter_values[i];
}
const IColumn * nested_columns[number_of_arguments];
for (size_t arg = 0; arg < number_of_arguments; arg++)
{
if (is_nullable[arg])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[arg]);
if (null_is_skipped && (arg != filter_column_num))
{
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
const UInt8 * col_null_map = nullmap_column.getData().data();
for (size_t r = row_begin; r < row_end; r++)
{
final_null_flags[r] |= col_null_map[r];
}
}
nested_columns[arg] = &nullable_col.getNestedColumn();
}
else
nested_columns[arg] = columns[arg];
}
bool at_least_one = false;
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_null_flags[i])
{
at_least_one = true;
break;
}
}
if (at_least_one)
{
this->setFlag(place);
this->nested_function->addBatchSinglePlaceNotNull(
row_begin, row_end, this->nestedPlace(place), nested_columns, final_null_flags.get(), arena, -1);
}
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override

View File

@ -414,6 +414,109 @@ public:
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos) const final
{
/// We are going to merge all the flags into a single one to be able to call the nested batching functions
std::vector<const UInt8 *> nullable_filters;
const IColumn * nested_columns[number_of_arguments];
std::unique_ptr<UInt8[]> final_flags = nullptr;
const UInt8 * final_flags_ptr = nullptr;
if (if_argument_pos >= 0)
{
final_flags = std::make_unique<UInt8[]>(row_end);
final_flags_ptr = final_flags.get();
bool included_elements = 0;
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; i++)
{
final_flags[i] = !flags.data()[i];
included_elements += !!flags.data()[i];
}
if (included_elements == 0)
return;
if (included_elements != (row_end - row_begin))
{
nullable_filters.push_back(final_flags_ptr);
}
}
for (size_t i = 0; i < number_of_arguments; ++i)
{
if (is_nullable[i])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
nested_columns[i] = &nullable_col.getNestedColumn();
if constexpr (null_is_skipped)
{
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
nullable_filters.push_back(nullmap_column.getData().data());
}
}
else
{
nested_columns[i] = columns[i];
}
}
bool found_one = false;
chassert(nullable_filters.size() > 0); /// We work under the assumption that we reach this because one argument was NULL
if (nullable_filters.size() == 1)
{
/// We can avoid making copies of the only filter but we still need to check that there is data to be added
final_flags_ptr = nullable_filters[0];
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_flags_ptr[i])
{
found_one = true;
break;
}
}
}
else
{
if (!final_flags)
{
final_flags = std::make_unique<UInt8[]>(row_end);
final_flags_ptr = final_flags.get();
}
const size_t filter_start = nullable_filters[0] == final_flags_ptr ? 1 : 0;
for (size_t filter = filter_start; filter < nullable_filters.size(); filter++)
{
for (size_t i = row_begin; i < row_end; i++)
final_flags[i] |= nullable_filters[filter][i];
}
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_flags_ptr[i])
{
found_one = true;
break;
}
}
}
if (!found_one)
return; // Nothing to do and nothing to mark
this->setFlag(place);
this->nested_function->addBatchSinglePlaceNotNull(
row_begin, row_end, this->nestedPlace(place), nested_columns, final_flags_ptr, arena, -1);
}
#if USE_EMBEDDED_COMPILER

View File

@ -424,14 +424,30 @@ public:
alloc(new_size_degree);
for (size_t i = 0; i < m_size; ++i)
if (m_size <= 1)
{
HashValue x = 0;
DB::readIntBinary(x, rb);
if (x == 0)
has_zero = true;
else
reinsertImpl(x);
for (size_t i = 0; i < m_size; ++i)
{
HashValue x = 0;
DB::readIntBinary(x, rb);
if (x == 0)
has_zero = true;
else
reinsertImpl(x);
}
}
else
{
auto hs = std::make_unique<HashValue[]>(m_size);
rb.readStrict(reinterpret_cast<char *>(hs.get()), m_size * sizeof(HashValue));
for (size_t i = 0; i < m_size; ++i)
{
if (hs[i] == 0)
has_zero = true;
else
reinsertImpl(hs[i]);
}
}
}
@ -458,11 +474,24 @@ public:
resize(new_size_degree);
}
for (size_t i = 0; i < rhs_size; ++i)
if (rhs_size <= 1)
{
HashValue x = 0;
DB::readIntBinary(x, rb);
insertHash(x);
for (size_t i = 0; i < rhs_size; ++i)
{
HashValue x = 0;
DB::readIntBinary(x, rb);
insertHash(x);
}
}
else
{
auto hs = std::make_unique<HashValue[]>(rhs_size);
rb.readStrict(reinterpret_cast<char *>(hs.get()), rhs_size * sizeof(HashValue));
for (size_t i = 0; i < rhs_size; ++i)
{
insertHash(hs[i]);
}
}
}

View File

@ -0,0 +1,194 @@
#include "CatBoostLibraryBridgeHelper.h"
#include <Columns/ColumnsNumber.h>
#include <Common/escapeForFileName.h>
#include <Core/Block.h>
#include <DataTypes/DataTypesNumber.h>
#include <Formats/NativeReader.h>
#include <Formats/NativeWriter.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <Poco/Net/HTTPRequest.h>
#include <random>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
CatBoostLibraryBridgeHelper::CatBoostLibraryBridgeHelper(
ContextPtr context_,
std::optional<String> model_path_,
std::optional<String> library_path_)
: LibraryBridgeHelper(context_->getGlobalContext())
, model_path(model_path_)
, library_path(library_path_)
{
}
Poco::URI CatBoostLibraryBridgeHelper::getPingURI() const
{
auto uri = createBaseURI();
uri.setPath(PING_HANDLER);
return uri;
}
Poco::URI CatBoostLibraryBridgeHelper::getMainURI() const
{
auto uri = createBaseURI();
uri.setPath(MAIN_HANDLER);
return uri;
}
Poco::URI CatBoostLibraryBridgeHelper::createRequestURI(const String & method) const
{
auto uri = getMainURI();
uri.addQueryParameter("version", std::to_string(LIBRARY_BRIDGE_PROTOCOL_VERSION));
uri.addQueryParameter("method", method);
return uri;
}
bool CatBoostLibraryBridgeHelper::bridgeHandShake()
{
String result;
try
{
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials);
readString(result, buf);
}
catch (...)
{
tryLogCurrentException(log);
return false;
}
if (result != "1")
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {}. Check that bridge and server have the same version.", result);
return true;
}
ExternalModelInfos CatBoostLibraryBridgeHelper::listModels()
{
startBridgeSync();
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_LIST_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[](std::ostream &) {},
http_timeouts, credentials);
ExternalModelInfos result;
UInt64 num_rows;
readIntBinary(num_rows, buf);
for (UInt64 i = 0; i < num_rows; ++i)
{
ExternalModelInfo info;
readStringBinary(info.model_path, buf);
readStringBinary(info.model_type, buf);
UInt64 t;
readIntBinary(t, buf);
info.loading_start_time = std::chrono::system_clock::from_time_t(t);
readIntBinary(t, buf);
info.loading_duration = std::chrono::milliseconds(t);
result.push_back(info);
}
return result;
}
void CatBoostLibraryBridgeHelper::removeModel()
{
startBridgeSync();
assert(model_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_REMOVEMODEL_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this](std::ostream & os)
{
os << "model_path=" << escapeForFileName(*model_path);
},
http_timeouts, credentials);
String result;
readStringBinary(result, buf);
assert(result == "1");
}
void CatBoostLibraryBridgeHelper::removeAllModels()
{
startBridgeSync();
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_REMOVEALLMODELS_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[](std::ostream &){},
http_timeouts, credentials);
String result;
readStringBinary(result, buf);
assert(result == "1");
}
size_t CatBoostLibraryBridgeHelper::getTreeCount()
{
startBridgeSync();
assert(model_path && library_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_GETTREECOUNT_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this](std::ostream & os)
{
os << "library_path=" << escapeForFileName(*library_path) << "&";
os << "model_path=" << escapeForFileName(*model_path);
},
http_timeouts, credentials);
size_t result;
readIntBinary(result, buf);
return result;
}
ColumnPtr CatBoostLibraryBridgeHelper::evaluate(const ColumnsWithTypeAndName & columns)
{
startBridgeSync();
WriteBufferFromOwnString string_write_buf;
Block block(columns);
NativeWriter serializer(string_write_buf, /*client_revision*/ 0, block);
serializer.write(block);
assert(model_path);
ReadWriteBufferFromHTTP buf(
createRequestURI(CATBOOST_LIB_EVALUATE_METHOD),
Poco::Net::HTTPRequest::HTTP_POST,
[this, serialized = string_write_buf.str()](std::ostream & os)
{
os << "model_path=" << escapeForFileName(*model_path) << "&";
os << "data=" << escapeForFileName(serialized);
},
http_timeouts, credentials);
NativeReader deserializer(buf, /*server_revision*/ 0);
Block block_read = deserializer.read();
return block_read.getColumns()[0];
}
}

View File

@ -0,0 +1,53 @@
#pragma once
#include <BridgeHelper/LibraryBridgeHelper.h>
#include <Common/ExternalModelInfo.h>
#include <DataTypes/IDataType.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <Interpreters/Context.h>
#include <Poco/URI.h>
#include <optional>
namespace DB
{
class CatBoostLibraryBridgeHelper : public LibraryBridgeHelper
{
public:
static constexpr inline auto PING_HANDLER = "/catboost_ping";
static constexpr inline auto MAIN_HANDLER = "/catboost_request";
explicit CatBoostLibraryBridgeHelper(
ContextPtr context_,
std::optional<String> model_path_ = std::nullopt,
std::optional<String> library_path_ = std::nullopt);
ExternalModelInfos listModels();
void removeModel(); /// requires model_path
void removeAllModels();
size_t getTreeCount(); /// requires model_path and library_path
ColumnPtr evaluate(const ColumnsWithTypeAndName & columns); /// requires model_path
protected:
Poco::URI getPingURI() const override;
Poco::URI getMainURI() const override;
bool bridgeHandShake() override;
private:
static constexpr inline auto CATBOOST_LIST_METHOD = "catboost_list";
static constexpr inline auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel";
static constexpr inline auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels";
static constexpr inline auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount";
static constexpr inline auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate";
Poco::URI createRequestURI(const String & method) const;
const std::optional<String> model_path;
const std::optional<String> library_path;
};
}

View File

@ -12,8 +12,8 @@
namespace DB
{
/// Common base class for XDBC and Library bridge helpers.
/// Contains helper methods to check/start bridge sync.
/// Base class for server-side bridge helpers, e.g. xdbc-bridge and library-bridge.
/// Contains helper methods to check/start bridge sync
class IBridgeHelper: protected WithContext
{

View File

@ -1080,6 +1080,20 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de
}
void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query)
{
if (!global_context->hasInsertionTable() && insert_query.table)
{
String table = insert_query.table->as<ASTIdentifier &>().shortName();
if (!table.empty())
{
String database = insert_query.database ? insert_query.database->as<ASTIdentifier &>().shortName() : "";
global_context->setInsertionTable(StorageID(database, table));
}
}
}
void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr parsed_query)
{
auto query = query_to_execute;
@ -1129,6 +1143,8 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
{
/// If structure was received (thus, server has not thrown an exception),
/// send our data with that structure.
setInsertionTable(parsed_insert_query);
sendData(sample, columns_description, parsed_query);
receiveEndOfQuery();
}

View File

@ -113,6 +113,8 @@ protected:
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments) = 0;
void setInsertionTable(const ASTInsertQuery & insert_query);
private:
void receiveResult(ASTPtr parsed_query);

View File

@ -722,5 +722,3 @@ public:
return res;
}
};
template <typename Key, typename Payload> constexpr size_t ArrayCache<Key, Payload>::min_chunk_size;

View File

@ -176,10 +176,10 @@ static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string
void tryLogCurrentException(const char * log_name, const std::string & start_of_message)
{
/// Under high memory pressure, any new allocation will definitelly lead
/// to MEMORY_LIMIT_EXCEEDED exception.
/// Under high memory pressure, new allocations throw a
/// MEMORY_LIMIT_EXCEEDED exception.
///
/// And in this case the exception will not be logged, so let's block the
/// In this case the exception will not be logged, so let's block the
/// MemoryTracker until the exception will be logged.
LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global);
@ -189,8 +189,8 @@ void tryLogCurrentException(const char * log_name, const std::string & start_of_
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message)
{
/// Under high memory pressure, any new allocation will definitelly lead
/// to MEMORY_LIMIT_EXCEEDED exception.
/// Under high memory pressure, new allocations throw a
/// MEMORY_LIMIT_EXCEEDED exception.
///
/// And in this case the exception will not be logged, so let's block the
/// MemoryTracker until the exception will be logged.

View File

@ -0,0 +1,20 @@
#pragma once
#include <vector>
#include <base/types.h>
namespace DB
{
/// Details about external machine learning model, used by clickhouse-server and clickhouse-library-bridge
struct ExternalModelInfo
{
String model_path;
String model_type;
std::chrono::system_clock::time_point loading_start_time; /// serialized as std::time_t
std::chrono::milliseconds loading_duration; /// serialized as UInt64
};
using ExternalModelInfos = std::vector<ExternalModelInfo>;
}

View File

@ -135,7 +135,7 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const
struct kinfo_proc kp;
size_t len = sizeof(struct kinfo_proc);
if (-1 == ::sysctl(mib, 4, &kp, &len, NULL, 0))
if (-1 == ::sysctl(mib, 4, &kp, &len, nullptr, 0))
throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::SYSTEM_ERROR);
if (sizeof(struct kinfo_proc) != len)

View File

@ -130,16 +130,15 @@ void SpanHolder::finish() noexcept
try
{
auto log = current_thread_trace_context.span_log.lock();
if (!log)
/// The log might be disabled, check it before use
if (log)
{
// The log might be disabled.
return;
this->finish_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
log->add(OpenTelemetrySpanLogElement(*this));
}
this->finish_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
log->add(OpenTelemetrySpanLogElement(*this));
}
catch (...)
{

View File

@ -264,6 +264,18 @@ protected:
}
};
/// Schedule jobs/tasks on global thread pool without implicit passing tracing context on current thread to underlying worker as parent tracing context.
///
/// If you implement your own job/task scheduling upon global thread pool or schedules a long time running job in a infinite loop way,
/// you need to use class, or you need to use ThreadFromGlobalPool below.
///
/// See the comments of ThreadPool below to know how it works.
using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl<false>;
/// An alias of thread that execute jobs/tasks on global thread pool by implicit passing tracing context on current thread to underlying worker as parent tracing context.
/// If jobs/tasks are directly scheduled by using APIs of this class, you need to use this class or you need to use class above.
using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl<true>;
/// Recommended thread pool for the case when multiple thread pools are created and destroyed.
///
/// The template parameter of ThreadFromGlobalPool is set to false to disable tracing context propagation to underlying worker.
@ -274,9 +286,6 @@ protected:
/// which means the tracing context initialized at underlying worker level won't be delete for a very long time.
/// This would cause wrong context for further jobs scheduled in ThreadPool.
///
/// To make sure the tracing context are correctly propagated, we explicitly disable context propagation(including initialization and de-initialization) at underlying worker level.
/// To make sure the tracing context is correctly propagated, we explicitly disable context propagation(including initialization and de-initialization) at underlying worker level.
///
using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPoolImpl<false>>;
/// An alias for user code to execute a job in the global thread pool
using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl<true>;
using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPoolNoTracingContextPropagation>;

View File

@ -189,6 +189,9 @@ KeeperConfigurationAndSettings::loadFromConfig(const Poco::Util::AbstractConfigu
ret->coordination_settings->loadFromConfig("keeper_server.coordination_settings", config);
if (ret->coordination_settings->quorum_reads)
LOG_WARNING(&Poco::Logger::get("KeeperConfigurationAndSettings"), "Setting 'quorum_reads' is deprecated. Please use 'read_mode'");
return ret;
}

View File

@ -26,10 +26,12 @@ struct Settings;
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, leadership_expiry, 0, "How often will leader node check if it still has majority. Set it lower or equal to election_timeout_lower_bound_ms to have linearizable reads.", 0) \
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
M(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \
M(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \
M(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
@ -37,11 +39,12 @@ struct Settings;
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
M(UInt64, max_requests_batch_size, 100, "Max size of batch in requests count before it will be sent to RAFT", 0) \
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
M(Bool, quorum_reads, false, "Deprecated - use read_mode. Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0)
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
M(String, read_mode, "nonlinear", "How should reads be processed. Valid values: 'nonlinear', 'fastlinear', 'quorum'. 'nonlinear' is the fastest option because there are no consistency requirements", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -1,4 +1,5 @@
#include <Coordination/KeeperDispatcher.h>
#include <libnuraft/async.hxx>
#include <Common/setThreadName.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <future>
@ -6,6 +7,8 @@
#include <Poco/Path.h>
#include <Common/hex.h>
#include <filesystem>
#include <iterator>
#include <limits>
#include <Common/checkStackSize.h>
#include <Common/CurrentMetrics.h>
@ -30,22 +33,83 @@ namespace ErrorCodes
KeeperDispatcher::KeeperDispatcher()
: responses_queue(std::numeric_limits<size_t>::max())
, read_requests_queue(std::numeric_limits<size_t>::max())
, finalize_requests_queue(std::numeric_limits<size_t>::max())
, configuration_and_settings(std::make_shared<KeeperConfigurationAndSettings>())
, log(&Poco::Logger::get("KeeperDispatcher"))
{
}
/// ZooKeepers has 2 requirements:
/// - writes need to be linearizable
/// - all requests from single session need to be processed in the order of their arrival
///
/// Because of that, we cannot process read and write requests from SAME session at the same time.
/// To be able to process read and write requests in parallel we need to make sure that only 1 type
/// of request is being processed from a single session.
/// Multiple types from different sessions can be processed at the same time.
///
/// We do some in-session housekeeping to make sure that the multithreaded request processing is correct.
/// When a request is received from a client, we check if there are requests being processed from that same
/// session, and if yes, of what type. If the types are the same, and there are no requests of different
/// type inbetetween, we can instanly add it to active request queue. Otherwise, we need to wait until
/// all requests of the other type are processed.
///
/// There are multiple threads used for processing the request, each of them communicating with a queue.
/// Assumption: only one type of request is being processed from a same session at any point in time (read or write).
///
/// requestThread -> requests currently being processed
/// readRequestThread -> thread for processing read requests
/// finalizeRequestThread -> thread for finalizing requests:
/// - in-session housekeeping, add requests to the active request queue if there are any
///
/// If reads are linearizable without quorum, a request can possibly wait for a certain log to be committed.
/// In that case we add it to the waiting queue for that log.
/// When that log is committed, the committing thread will send that read request to readRequestThread so it can be processed.
///
void KeeperDispatcher::requestThread()
{
setThreadName("KeeperReqT");
/// Result of requests batch from previous iteration
RaftAppendResult prev_result = nullptr;
/// Requests from previous iteration. We store them to be able
/// to send errors to the client.
KeeperStorage::RequestsForSessions prev_batch;
RaftResult prev_result = nullptr;
const auto previous_quorum_done = [&] { return !prev_result || prev_result->has_result() || prev_result->get_result_code() != nuraft::cmd_result_code::OK; };
const auto needs_quorum = [](const auto & coordination_settings, const auto & request)
{
return coordination_settings->quorum_reads || coordination_settings->read_mode.toString() == "quorum" || !request.request->isReadRequest();
};
KeeperStorage::RequestsForSessions quorum_requests;
KeeperStorage::RequestsForSessions read_requests;
auto process_quorum_requests = [&, this]() mutable
{
/// Forcefully process all previous pending requests
if (prev_result)
forceWaitAndProcessResult(prev_result);
prev_result = server->putRequestBatch(quorum_requests);
if (prev_result)
{
prev_result->when_ready([&, requests_for_sessions = std::move(quorum_requests)](nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & result, nuraft::ptr<std::exception> &) mutable
{
if (!result.get_accepted() || result.get_result_code() == nuraft::cmd_result_code::TIMEOUT)
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
else if (result.get_result_code() != nuraft::cmd_result_code::OK)
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
});
}
quorum_requests.clear();
};
/// ZooKeeper requires that the requests inside a single session are processed in a strict order
/// (we cannot process later requests before all the previous once are processed)
/// By making sure that at this point we can either have just read requests or just write requests
/// from a single session, we can process them independently
while (!shutdown_called)
{
KeeperStorage::RequestForSession request;
@ -54,94 +118,67 @@ void KeeperDispatcher::requestThread()
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
uint64_t max_batch_size = coordination_settings->max_requests_batch_size;
/// The code below do a very simple thing: batch all write (quorum) requests into vector until
/// previous write batch is not finished or max_batch size achieved. The main complexity goes from
/// the ability to process read requests without quorum (from local state). So when we are collecting
/// requests into a batch we must check that the new request is not read request. Otherwise we have to
/// process all already accumulated write requests, wait them synchronously and only after that process
/// read request. So reads are some kind of "separator" for writes.
try
{
if (requests_queue->tryPop(request, max_wait))
if (active_requests_queue->tryPop(request, max_wait))
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
if (shutdown_called)
break;
KeeperStorage::RequestsForSessions current_batch;
if (needs_quorum(coordination_settings, request))
quorum_requests.emplace_back(request);
else
read_requests.emplace_back(request);
bool has_read_request = false;
/// If new request is not read request or we must to process it through quorum.
/// Otherwise we will process it locally.
if (coordination_settings->quorum_reads || !request.request->isReadRequest())
/// Waiting until previous append will be successful, or batch is big enough
/// has_result == false && get_result_code == OK means that our request still not processed.
/// Sometimes NuRaft set errorcode without setting result, so we check both here.
while (true)
{
current_batch.emplace_back(request);
if (quorum_requests.size() > max_batch_size)
break;
/// Waiting until previous append will be successful, or batch is big enough
/// has_result == false && get_result_code == OK means that our request still not processed.
/// Sometimes NuRaft set errorcode without setting result, so we check both here.
while (prev_result && (!prev_result->has_result() && prev_result->get_result_code() == nuraft::cmd_result_code::OK) && current_batch.size() <= max_batch_size)
if (read_requests.size() > max_batch_size)
{
/// Trying to get batch requests as fast as possible
if (requests_queue->tryPop(request, 1))
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
/// Don't append read request into batch, we have to process them separately
if (!coordination_settings->quorum_reads && request.request->isReadRequest())
{
has_read_request = true;
break;
}
else
{
processReadRequests(coordination_settings, read_requests);
current_batch.emplace_back(request);
}
}
if (shutdown_called)
if (previous_quorum_done())
break;
}
/// Trying to get batch requests as fast as possible
if (active_requests_queue->tryPop(request, 1))
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
if (needs_quorum(coordination_settings, request))
quorum_requests.emplace_back(request);
else
read_requests.emplace_back(request);
}
else
{
/// batch of read requests can send at most one request
/// so we don't care if the previous batch hasn't received response
if (!read_requests.empty())
processReadRequests(coordination_settings, read_requests);
/// if we still didn't process previous batch we can
/// increase are current batch even more
if (previous_quorum_done())
break;
}
if (shutdown_called)
break;
}
else
has_read_request = true;
if (shutdown_called)
break;
/// Forcefully process all previous pending requests
if (prev_result)
forceWaitAndProcessResult(prev_result, prev_batch);
if (!quorum_requests.empty())
process_quorum_requests();
/// Process collected write requests batch
if (!current_batch.empty())
{
auto result = server->putRequestBatch(current_batch);
if (result)
{
if (has_read_request) /// If we will execute read request next, than we have to process result now
forceWaitAndProcessResult(result, current_batch);
}
else
{
addErrorResponses(current_batch, Coordination::Error::ZCONNECTIONLOSS);
current_batch.clear();
}
prev_batch = std::move(current_batch);
prev_result = result;
}
/// Read request always goes after write batch (last request)
if (has_read_request)
{
if (server->isLeaderAlive())
server->putLocalReadRequest(request);
else
addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
}
}
}
catch (...)
@ -151,6 +188,72 @@ void KeeperDispatcher::requestThread()
}
}
void KeeperDispatcher::processReadRequests(const CoordinationSettingsPtr & coordination_settings, KeeperStorage::RequestsForSessions & read_requests)
{
if (coordination_settings->read_mode.toString() == "fastlinear")
{
// we just want to know what's the current latest committed log on Leader node
auto leader_info_result = server->getLeaderInfo();
if (leader_info_result)
{
leader_info_result->when_ready([&, requests_for_sessions = std::move(read_requests)](nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & result, nuraft::ptr<std::exception> & exception) mutable
{
if (!result.get_accepted() || result.get_result_code() == nuraft::cmd_result_code::TIMEOUT)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
return;
}
if (result.get_result_code() != nuraft::cmd_result_code::OK)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
if (exception)
{
LOG_INFO(log, "Got exception while waiting for read results {}", exception->what());
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
auto & leader_info_ctx = result.get();
if (!leader_info_ctx)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
KeeperServer::NodeInfo leader_info;
leader_info.term = leader_info_ctx->get_ulong();
leader_info.last_committed_index = leader_info_ctx->get_ulong();
std::lock_guard lock(leader_waiter_mutex);
auto node_info = server->getNodeInfo();
/// we're behind, we need to wait
if (node_info.term < leader_info.term || node_info.last_committed_index < leader_info.last_committed_index)
{
auto & leader_waiter = leader_waiters[leader_info];
leader_waiter.insert(leader_waiter.end(), requests_for_sessions.begin(), requests_for_sessions.end());
LOG_TRACE(log, "waiting for term {}, idx {}", leader_info.term, leader_info.last_committed_index);
}
/// process it in background thread
else if (!read_requests_queue.push(std::move(requests_for_sessions)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
});
}
}
else
{
assert(coordination_settings->read_mode.toString() == "nonlinear");
if (!read_requests_queue.push(std::move(read_requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
read_requests.clear();
}
void KeeperDispatcher::responseThread()
{
setThreadName("KeeperRspT");
@ -200,6 +303,65 @@ void KeeperDispatcher::snapshotThread()
}
}
/// Background thread for processing read requests
void KeeperDispatcher::readRequestThread()
{
setThreadName("KeeperReadT");
while (!shutdown_called)
{
KeeperStorage::RequestsForSessions requests;
if (!read_requests_queue.pop(requests))
break;
if (shutdown_called)
break;
try
{
for (const auto & request_info : requests)
{
if (server->isLeaderAlive())
server->putLocalReadRequest(request_info);
else
addErrorResponses({request_info}, Coordination::Error::ZCONNECTIONLOSS);
}
if (!finalize_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
/// We finalize requests every time we commit a single log with request
/// or process a batch of read requests.
/// Because it can get heavy, we do it in background thread.
void KeeperDispatcher::finalizeRequestsThread()
{
setThreadName("KeeperFinalT");
while (!shutdown_called)
{
KeeperStorage::RequestsForSessions requests;
if (!finalize_requests_queue.pop(requests))
break;
if (shutdown_called)
break;
try
{
finalizeRequests(requests);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
{
std::lock_guard lock(session_to_response_callback_mutex);
@ -255,6 +417,30 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
request_info.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
request_info.session_id = session_id;
{
std::lock_guard lock{unprocessed_request_mutex};
auto unprocessed_requests_it = unprocessed_requests_for_session.find(session_id);
if (unprocessed_requests_it == unprocessed_requests_for_session.end())
{
auto & unprocessed_requests = unprocessed_requests_for_session[session_id];
unprocessed_requests.unprocessed_num = 1;
unprocessed_requests.is_read = request->isReadRequest();
}
else
{
auto & unprocessed_requests = unprocessed_requests_it->second;
/// queue is not empty, or the request types don't match, put it in the waiting queue
if (!unprocessed_requests.request_queue.empty() || unprocessed_requests.is_read != request->isReadRequest())
{
unprocessed_requests.request_queue.push_back(std::move(request_info));
return true;
}
++unprocessed_requests.unprocessed_num;
}
}
std::lock_guard lock(push_request_mutex);
if (shutdown_called)
@ -263,10 +449,10 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
/// Put close requests without timeouts
if (request->getOpNum() == Coordination::OpNum::Close)
{
if (!requests_queue->push(std::move(request_info)))
if (!active_requests_queue->push(std::move(request_info)))
throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR);
}
else if (!requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
else if (!active_requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
{
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
}
@ -279,13 +465,23 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
LOG_DEBUG(log, "Initializing storage dispatcher");
configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_requests_batch_size);
active_requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_requests_batch_size);
request_thread = ThreadFromGlobalPool([this] { requestThread(); });
responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
read_request_thread = ThreadFromGlobalPool([this] { readRequestThread(); });
finalize_requests_thread = ThreadFromGlobalPool([this] { finalizeRequestsThread(); });
server = std::make_unique<KeeperServer>(configuration_and_settings, config, responses_queue, snapshots_queue);
server = std::make_unique<KeeperServer>(
configuration_and_settings,
config,
responses_queue,
snapshots_queue,
[this](const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx)
{ onRequestCommit(request_for_session, log_term, log_idx); },
[this](uint64_t term, uint64_t last_idx)
{ onApplySnapshot(term, last_idx); });
try
{
@ -333,9 +529,9 @@ void KeeperDispatcher::shutdown()
if (session_cleaner_thread.joinable())
session_cleaner_thread.join();
if (requests_queue)
if (active_requests_queue)
{
requests_queue->finish();
active_requests_queue->finish();
if (request_thread.joinable())
request_thread.join();
@ -349,18 +545,23 @@ void KeeperDispatcher::shutdown()
if (snapshot_thread.joinable())
snapshot_thread.join();
read_requests_queue.finish();
if (read_request_thread.joinable())
read_request_thread.join();
finalize_requests_queue.finish();
if (finalize_requests_thread.joinable())
finalize_requests_thread.join();
update_configuration_queue.finish();
if (update_configuration_thread.joinable())
update_configuration_thread.join();
}
if (server)
server->shutdown();
KeeperStorage::RequestForSession request_for_session;
/// Set session expired for all pending requests
while (requests_queue && requests_queue->tryPop(request_for_session))
while (active_requests_queue && active_requests_queue->tryPop(request_for_session))
{
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
auto response = request_for_session.request->makeResponse();
@ -368,10 +569,58 @@ void KeeperDispatcher::shutdown()
setResponse(request_for_session.session_id, response);
}
/// Clear all registered sessions
std::lock_guard lock(session_to_response_callback_mutex);
session_to_response_callback.clear();
KeeperStorage::RequestsForSessions close_requests;
{
/// Clear all registered sessions
std::lock_guard lock(session_to_response_callback_mutex);
if (hasLeader())
{
close_requests.reserve(session_to_response_callback.size());
// send to leader CLOSE requests for active sessions
for (const auto & [session, response] : session_to_response_callback)
{
auto request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
request->xid = Coordination::CLOSE_XID;
using namespace std::chrono;
KeeperStorage::RequestForSession request_info
{
.session_id = session,
.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(),
.request = std::move(request),
};
close_requests.push_back(std::move(request_info));
}
}
session_to_response_callback.clear();
}
// if there is no leader, there is no reason to do CLOSE because it's a write request
if (hasLeader() && !close_requests.empty())
{
LOG_INFO(log, "Trying to close {} session(s)", close_requests.size());
const auto raft_result = server->putRequestBatch(close_requests);
auto sessions_closing_done_promise = std::make_shared<std::promise<void>>();
auto sessions_closing_done = sessions_closing_done_promise->get_future();
raft_result->when_ready([sessions_closing_done_promise = std::move(sessions_closing_done_promise)](
nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & /*result*/,
nuraft::ptr<std::exception> & /*exception*/) { sessions_closing_done_promise->set_value(); });
auto session_shutdown_timeout = configuration_and_settings->coordination_settings->session_shutdown_timeout.totalMilliseconds();
if (sessions_closing_done.wait_for(std::chrono::milliseconds(session_shutdown_timeout)) != std::future_status::ready)
LOG_WARNING(
log,
"Failed to close sessions in {}ms. If they are not closed, they will be closed after session timeout.",
session_shutdown_timeout);
}
if (server)
server->shutdown();
CurrentMetrics::set(CurrentMetrics::KeeperAliveConnections, 0);
}
catch (...)
{
@ -418,16 +667,18 @@ void KeeperDispatcher::sessionCleanerTask()
LOG_INFO(log, "Found dead session {}, will try to close it", dead_session);
/// Close session == send close request to raft server
Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
auto request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
request->xid = Coordination::CLOSE_XID;
KeeperStorage::RequestForSession request_info;
request_info.request = request;
using namespace std::chrono;
request_info.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
request_info.session_id = dead_session;
KeeperStorage::RequestForSession request_info
{
.session_id = dead_session,
.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(),
.request = std::move(request),
};
{
std::lock_guard lock(push_request_mutex);
if (!requests_queue->push(std::move(request_info)))
if (!active_requests_queue->push(std::move(request_info)))
LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions");
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
}
@ -477,19 +728,12 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession
}
}
void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions)
void KeeperDispatcher::forceWaitAndProcessResult(RaftResult & result)
{
if (!result->has_result())
result->get();
/// If we get some errors, than send them to clients
if (!result->get_accepted() || result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
else if (result->get_result_code() != nuraft::cmd_result_code::OK)
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
result = nullptr;
requests_for_sessions.clear();
}
int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
@ -537,7 +781,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
/// Push new session request to queue
{
std::lock_guard lock(push_request_mutex);
if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms))
if (!active_requests_queue->tryPush(std::move(request_info), session_timeout_ms))
throw Exception("Cannot push session id request to queue within session timeout", ErrorCodes::TIMEOUT_EXCEEDED);
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
}
@ -610,6 +854,122 @@ void KeeperDispatcher::updateConfigurationThread()
}
}
// Used to update the state for a session based on the requests
// - update the number of current unprocessed requests for the session
// - if the number of unprocessed requests is 0, we can start adding next type of requests
// from unprocessed requests queue to the active queue
void KeeperDispatcher::finalizeRequests(const KeeperStorage::RequestsForSessions & requests_for_sessions)
{
std::unordered_map<int64_t, size_t> counts_for_session;
for (const auto & request_for_session : requests_for_sessions)
{
++counts_for_session[request_for_session.session_id];
}
std::lock_guard lock{unprocessed_request_mutex};
for (const auto [session_id, count] : counts_for_session)
{
auto unprocessed_requests_it = unprocessed_requests_for_session.find(session_id);
if (unprocessed_requests_it == unprocessed_requests_for_session.end())
continue;
auto & unprocessed_requests = unprocessed_requests_it->second;
unprocessed_requests.unprocessed_num -= count;
if (unprocessed_requests.unprocessed_num == 0)
{
if (!unprocessed_requests.request_queue.empty())
{
auto & unprocessed_requests_queue = unprocessed_requests.request_queue;
unprocessed_requests.is_read = !unprocessed_requests.is_read;
// start adding next type of requests
while (!unprocessed_requests_queue.empty() && unprocessed_requests_queue.front().request->isReadRequest() == unprocessed_requests.is_read)
{
auto & front_request = unprocessed_requests_queue.front();
/// Put close requests without timeouts
if (front_request.request->getOpNum() == Coordination::OpNum::Close)
{
if (!active_requests_queue->push(std::move(front_request)))
throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR);
}
else if (!active_requests_queue->tryPush(std::move(front_request), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
{
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
}
++unprocessed_requests.unprocessed_num;
unprocessed_requests_queue.pop_front();
}
}
else
{
unprocessed_requests_for_session.erase(unprocessed_requests_it);
}
}
}
}
// Finalize request
// Process read requests that were waiting for this commit
void KeeperDispatcher::onRequestCommit(const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx)
{
if (!finalize_requests_queue.push({request_for_session}))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
KeeperStorage::RequestsForSessions requests;
{
std::lock_guard lock(leader_waiter_mutex);
auto request_queue_it = leader_waiters.find(KeeperServer::NodeInfo{.term = log_term, .last_committed_index = log_idx});
if (request_queue_it != leader_waiters.end())
{
requests = std::move(request_queue_it->second);
leader_waiters.erase(request_queue_it);
}
}
if (requests.empty())
return;
if (!read_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
/// Process all read request that are waiting for lower or currently last processed log index
void KeeperDispatcher::onApplySnapshot(uint64_t term, uint64_t last_idx)
{
KeeperServer::NodeInfo current_node_info{term, last_idx};
KeeperStorage::RequestsForSessions requests;
{
std::lock_guard lock(leader_waiter_mutex);
for (auto leader_waiter_it = leader_waiters.begin(); leader_waiter_it != leader_waiters.end();)
{
auto waiting_node_info = leader_waiter_it->first;
if (waiting_node_info.term <= current_node_info.term
&& waiting_node_info.last_committed_index <= current_node_info.last_committed_index)
{
for (auto & request : leader_waiter_it->second)
{
requests.push_back(std::move(request));
}
leader_waiter_it = leader_waiters.erase(leader_waiter_it);
}
else
{
++leader_waiter_it;
}
}
}
if (requests.empty())
return;
if (!read_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
bool KeeperDispatcher::isServerActive() const
{
return checkInit() && hasLeader() && !server->isRecovering();
@ -674,7 +1034,7 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const
Keeper4LWInfo result = server->getPartiallyFilled4LWInfo();
{
std::lock_guard lock(push_request_mutex);
result.outstanding_requests_count = requests_queue->size();
result.outstanding_requests_count = active_requests_queue->size();
}
{
std::lock_guard lock(session_to_response_callback_mutex);

View File

@ -32,9 +32,12 @@ private:
using UpdateConfigurationQueue = ConcurrentBoundedQueue<ConfigUpdateAction>;
/// Size depends on coordination settings
std::unique_ptr<RequestsQueue> requests_queue;
/// Request currently being processed
std::unique_ptr<RequestsQueue> active_requests_queue;
ResponsesQueue responses_queue;
SnapshotsQueue snapshots_queue{1};
ConcurrentBoundedQueue<KeeperStorage::RequestsForSessions> read_requests_queue;
ConcurrentBoundedQueue<KeeperStorage::RequestsForSessions> finalize_requests_queue;
/// More than 1k updates is definitely misconfiguration.
UpdateConfigurationQueue update_configuration_queue{1000};
@ -64,6 +67,8 @@ private:
ThreadFromGlobalPool snapshot_thread;
/// Apply or wait for configuration changes
ThreadFromGlobalPool update_configuration_thread;
ThreadFromGlobalPool read_request_thread;
ThreadFromGlobalPool finalize_requests_thread;
/// RAFT wrapper.
std::unique_ptr<KeeperServer> server;
@ -77,6 +82,34 @@ private:
/// Counter for new session_id requests.
std::atomic<int64_t> internal_session_id_counter{0};
/// A read request needs to have at least the log it was the last committed log on the leader
/// at the time the request was being made.
/// If the node is stale, we need to wait to commit that log before doing local read requests to achieve
/// linearizability.
std::unordered_map<KeeperServer::NodeInfo, KeeperStorage::RequestsForSessions> leader_waiters;
std::mutex leader_waiter_mutex;
/// We can be actively processing one type of requests (either read or write) from a single session.
/// If we receive a request of a type that is not currently being processed, we put it in the waiting queue.
/// Also, we want to process them in ariving order, so if we have a different type in the queue, we cannot process that request
/// but wait for all the previous requests to finish.
/// E.g. READ -> WRITE -> READ, the last READ will go to the waiting queue even though we are currently processing the first READ
/// because we have WRITE request before it that needs to be processed.
struct UnprocessedRequests
{
/// how many requests are currently in the active request queue
size_t unprocessed_num{0};
/// is_read currently being processed
bool is_read{false};
std::list<KeeperStorage::RequestForSession> request_queue;
};
// Called every time a batch of requests are processed.
void finalizeRequests(const KeeperStorage::RequestsForSessions & requests_for_sessions);
std::unordered_map<int64_t, UnprocessedRequests> unprocessed_requests_for_session;
std::mutex unprocessed_request_mutex;
/// Thread put requests to raft
void requestThread();
/// Thread put responses for subscribed sessions
@ -88,6 +121,12 @@ private:
/// Thread apply or wait configuration changes from leader
void updateConfigurationThread();
void readRequestThread();
void finalizeRequestsThread();
void processReadRequests(const CoordinationSettingsPtr & coordination_settings, KeeperStorage::RequestsForSessions & read_requests);
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
/// Add error responses for requests to responses queue.
@ -96,7 +135,7 @@ private:
/// Forcefully wait for result and sets errors if something when wrong.
/// Clears both arguments
void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions);
static void forceWaitAndProcessResult(RaftResult & result);
public:
/// Just allocate some objects, real initialization is done by `intialize method`
@ -116,6 +155,12 @@ public:
return server && server->checkInit();
}
/// Called when a single log with request is committed.
void onRequestCommit(const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx);
/// Called when a snapshot is applied
void onApplySnapshot(uint64_t term, uint64_t last_idx);
/// Is server accepting requests, i.e. connected to the cluster
/// and achieved quorum
bool isServerActive() const;

View File

@ -105,7 +105,9 @@ KeeperServer::KeeperServer(
const KeeperConfigurationAndSettingsPtr & configuration_and_settings_,
const Poco::Util::AbstractConfiguration & config,
ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_)
SnapshotsQueue & snapshots_queue_,
KeeperStateMachine::CommitCallback commit_callback,
KeeperStateMachine::ApplySnapshotCallback apply_snapshot_callback)
: server_id(configuration_and_settings_->server_id)
, coordination_settings(configuration_and_settings_->coordination_settings)
, log(&Poco::Logger::get("KeeperServer"))
@ -113,7 +115,7 @@ KeeperServer::KeeperServer(
, keeper_context{std::make_shared<KeeperContext>()}
, create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true))
{
if (coordination_settings->quorum_reads)
if (coordination_settings->quorum_reads || coordination_settings->read_mode.toString() == "quorum")
LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false);
@ -125,7 +127,9 @@ KeeperServer::KeeperServer(
configuration_and_settings_->snapshot_storage_path,
coordination_settings,
keeper_context,
checkAndGetSuperdigest(configuration_and_settings_->super_digest));
checkAndGetSuperdigest(configuration_and_settings_->super_digest),
std::move(commit_callback),
std::move(apply_snapshot_callback));
state_manager = nuraft::cs_new<KeeperStateManager>(
server_id,
@ -176,6 +180,13 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
reconfigure(new_config);
}
RaftResult getLeaderInfo()
{
nuraft::ptr<nuraft::req_msg> req
= nuraft::cs_new<nuraft::req_msg>(0ull, nuraft::msg_type::leader_status_request, 0, 0, 0ull, 0ull, 0ull);
return send_msg_to_leader(req);
}
void commit_in_bg() override
{
// For NuRaft, if any commit fails (uncaught exception) the whole server aborts as a safety
@ -269,6 +280,20 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(), "election_timeout_lower_bound_ms", log);
params.election_timeout_upper_bound_ = getValueOrMaxInt32AndLogWarning(
coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(), "election_timeout_upper_bound_ms", log);
params.leadership_expiry_ = getValueOrMaxInt32AndLogWarning(coordination_settings->leadership_expiry.totalMilliseconds(), "leadership_expiry", log);
if (coordination_settings->read_mode.toString() == "fastlinear")
{
if (params.leadership_expiry_ == 0)
params.leadership_expiry_ = params.election_timeout_lower_bound_;
else if (params.leadership_expiry_ > params.election_timeout_lower_bound_)
{
LOG_WARNING(log, "To use fast linearizable reads, leadership_expiry should be set to a value that is less or equal to the election_timeout_upper_bound_ms. "
"Based on current settings, there are no guarantees for linearizability of reads.");
}
}
params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log);
params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log);
@ -487,7 +512,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession &
state_machine->processReadRequest(request_for_session);
}
RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
RaftResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
{
std::vector<nuraft::ptr<nuraft::buffer>> entries;
for (const auto & request_for_session : requests_for_sessions)
@ -713,6 +738,20 @@ std::vector<int64_t> KeeperServer::getDeadSessions()
return state_machine->getDeadSessions();
}
RaftResult KeeperServer::getLeaderInfo()
{
std::lock_guard lock{server_write_mutex};
if (is_recovering)
return nullptr;
return raft_instance->getLeaderInfo();
}
KeeperServer::NodeInfo KeeperServer::getNodeInfo()
{
return { .term = raft_instance->get_term(), .last_committed_index = state_machine->last_commit_index() };
}
ConfigUpdateActions KeeperServer::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
{
auto diff = state_manager->getConfigurationDiff(config);

View File

@ -14,7 +14,7 @@
namespace DB
{
using RaftAppendResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>;
using RaftResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>;
class KeeperServer
{
@ -71,7 +71,9 @@ public:
const KeeperConfigurationAndSettingsPtr & settings_,
const Poco::Util::AbstractConfiguration & config_,
ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_);
SnapshotsQueue & snapshots_queue_,
KeeperStateMachine::CommitCallback commit_callback,
KeeperStateMachine::ApplySnapshotCallback apply_snapshot_callback);
/// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings.
void startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6 = true);
@ -84,7 +86,7 @@ public:
/// Put batch of requests into Raft and get result of put. Responses will be set separately into
/// responses_queue.
RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
RaftResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
/// Return set of the non-active sessions
std::vector<int64_t> getDeadSessions();
@ -119,6 +121,17 @@ public:
int getServerID() const { return server_id; }
struct NodeInfo
{
uint64_t term;
uint64_t last_committed_index;
bool operator==(const NodeInfo &) const = default;
};
RaftResult getLeaderInfo();
NodeInfo getNodeInfo();
/// Get configuration diff between current configuration in RAFT and in XML file
ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config);
@ -126,10 +139,23 @@ public:
/// Synchronously check for update results with retries.
void applyConfigurationUpdate(const ConfigUpdateAction & task);
/// Wait configuration update for action. Used by followers.
/// Return true if update was successfully received.
bool waitConfigurationUpdate(const ConfigUpdateAction & task);
};
}
namespace std
{
template <>
struct hash<DB::KeeperServer::NodeInfo>
{
size_t operator()(const DB::KeeperServer::NodeInfo & info) const
{
SipHash hash_state;
hash_state.update(info.term);
hash_state.update(info.last_committed_index);
return hash_state.get64();
}
};
}

View File

@ -44,7 +44,9 @@ KeeperStateMachine::KeeperStateMachine(
const std::string & snapshots_path_,
const CoordinationSettingsPtr & coordination_settings_,
const KeeperContextPtr & keeper_context_,
const std::string & superdigest_)
const std::string & superdigest_,
CommitCallback commit_callback_,
ApplySnapshotCallback apply_snapshot_callback_)
: coordination_settings(coordination_settings_)
, snapshot_manager(
snapshots_path_,
@ -58,6 +60,8 @@ KeeperStateMachine::KeeperStateMachine(
, last_committed_idx(0)
, log(&Poco::Logger::get("KeeperStateMachine"))
, superdigest(superdigest_)
, commit_callback(std::move(commit_callback_))
, apply_snapshot_callback(std::move(apply_snapshot_callback_))
, keeper_context(keeper_context_)
{
}
@ -223,11 +227,11 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
return true;
}
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data)
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit_ext(const ext_op_params & params)
{
auto request_for_session = parseRequest(data);
auto request_for_session = parseRequest(*params.data);
if (!request_for_session.zxid)
request_for_session.zxid = log_idx;
request_for_session.zxid = params.log_idx;
/// Special processing of session_id request
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
@ -272,8 +276,9 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
assertDigest(*request_for_session.digest, storage->getNodesDigest(true), *request_for_session.request, true);
}
last_committed_idx = params.log_idx;
commit_callback(request_for_session, params.log_term, params.log_idx);
ProfileEvents::increment(ProfileEvents::KeeperCommits);
last_committed_idx = log_idx;
return nullptr;
}
@ -306,6 +311,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
ProfileEvents::increment(ProfileEvents::KeeperSnapshotApplys);
last_committed_idx = s.get_last_log_idx();
apply_snapshot_callback(s.get_last_log_term(), s.get_last_log_idx());
return true;
}
@ -320,6 +326,10 @@ void KeeperStateMachine::commit_config(const uint64_t /* log_idx */, nuraft::ptr
void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)
{
auto request_for_session = parseRequest(data);
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
return;
// If we received a log from an older node, use the log_idx as the zxid
// log_idx will always be larger or equal to the zxid so we can safely do this
// (log_idx is increased for all logs, while zxid is only increased for requests)

View File

@ -20,13 +20,18 @@ using SnapshotsQueue = ConcurrentBoundedQueue<CreateSnapshotTask>;
class KeeperStateMachine : public nuraft::state_machine
{
public:
using CommitCallback = std::function<void(const KeeperStorage::RequestForSession &, uint64_t, uint64_t)>;
using ApplySnapshotCallback = std::function<void(uint64_t, uint64_t)>;
KeeperStateMachine(
ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_,
const std::string & snapshots_path_,
const CoordinationSettingsPtr & coordination_settings_,
const KeeperContextPtr & keeper_context_,
const std::string & superdigest_ = "");
const std::string & superdigest_ = "",
CommitCallback commit_callback_ = [](const KeeperStorage::RequestForSession &, uint64_t, uint64_t){},
ApplySnapshotCallback apply_snapshot_callback_ = [](uint64_t, uint64_t){});
/// Read state from the latest snapshot
void init();
@ -37,7 +42,7 @@ public:
nuraft::ptr<nuraft::buffer> pre_commit(uint64_t log_idx, nuraft::buffer & data) override;
nuraft::ptr<nuraft::buffer> commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT
nuraft::ptr<nuraft::buffer> commit_ext(const ext_op_params & params) override; /// NOLINT
/// Save new cluster config to our snapshot (copy of the config stored in StateManager)
void commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf) override; /// NOLINT
@ -145,6 +150,11 @@ private:
/// Special part of ACL system -- superdigest specified in server config.
const std::string superdigest;
/// call when a request is committed
const CommitCallback commit_callback;
/// call when snapshot is applied
const ApplySnapshotCallback apply_snapshot_callback;
KeeperContextPtr keeper_context;
};

View File

@ -1330,8 +1330,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
changelog.append(entry);
changelog.end_of_append_batch(0, 0);
state_machine->pre_commit(i, changelog.entry_at(i)->get_buf());
state_machine->commit(i, changelog.entry_at(i)->get_buf());
auto entry_buf = changelog.entry_at(i)->get_buf_ptr();
state_machine->pre_commit(i, *entry_buf);
state_machine->commit_ext(nuraft::state_machine::ext_op_params{i, entry_buf});
bool snapshot_created = false;
if (i % settings->snapshot_distance == 0)
{
@ -1375,8 +1376,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
for (size_t i = restore_machine->last_commit_index() + 1; i < restore_changelog.next_slot(); ++i)
{
restore_machine->pre_commit(i, changelog.entry_at(i)->get_buf());
restore_machine->commit(i, changelog.entry_at(i)->get_buf());
auto entry = changelog.entry_at(i)->get_buf_ptr();
restore_machine->pre_commit(i, *entry);
restore_machine->commit_ext(nuraft::state_machine::ext_op_params{i, entry});
}
auto & source_storage = state_machine->getStorage();
@ -1477,18 +1479,18 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove)
std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();
request_c->path = "/hello";
request_c->is_ephemeral = true;
auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c);
state_machine->pre_commit(1, entry_c->get_buf());
state_machine->commit(1, entry_c->get_buf());
auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c)->get_buf_ptr();
state_machine->pre_commit(1, *entry_c);
state_machine->commit_ext(nuraft::state_machine::ext_op_params{1, entry_c});
const auto & storage = state_machine->getStorage();
EXPECT_EQ(storage.ephemerals.size(), 1);
std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
request_d->path = "/hello";
/// Delete from other session
auto entry_d = getLogEntryFromZKRequest(0, 2, state_machine->getNextZxid(), request_d);
state_machine->pre_commit(2, entry_d->get_buf());
state_machine->commit(2, entry_d->get_buf());
auto entry_d = getLogEntryFromZKRequest(0, 2, state_machine->getNextZxid(), request_d)->get_buf_ptr();
state_machine->pre_commit(2, *entry_d);
state_machine->commit_ext(nuraft::state_machine::ext_op_params{2, entry_d});
EXPECT_EQ(storage.ephemerals.size(), 0);
}

View File

@ -149,9 +149,9 @@ BackgroundSchedulePool::BackgroundSchedulePool(size_t size_, CurrentMetrics::Met
threads.resize(size_);
for (auto & thread : threads)
thread = ThreadFromGlobalPool([this] { threadFunction(); });
thread = ThreadFromGlobalPoolNoTracingContextPropagation([this] { threadFunction(); });
delayed_thread = ThreadFromGlobalPool([this] { delayExecutionThreadFunction(); });
delayed_thread = ThreadFromGlobalPoolNoTracingContextPropagation([this] { delayExecutionThreadFunction(); });
}
@ -168,7 +168,7 @@ void BackgroundSchedulePool::increaseThreadsCount(size_t new_threads_count)
threads.resize(new_threads_count);
for (size_t i = old_threads_count; i < new_threads_count; ++i)
threads[i] = ThreadFromGlobalPool([this] { threadFunction(); });
threads[i] = ThreadFromGlobalPoolNoTracingContextPropagation([this] { threadFunction(); });
}

View File

@ -57,7 +57,9 @@ public:
~BackgroundSchedulePool();
private:
using Threads = std::vector<ThreadFromGlobalPool>;
/// BackgroundSchedulePool schedules a task on its own task queue, there's no need to construct/restore tracing context on this level.
/// This is also how ThreadPool class treats the tracing context. See ThreadPool for more information.
using Threads = std::vector<ThreadFromGlobalPoolNoTracingContextPropagation>;
void threadFunction();
void delayExecutionThreadFunction();
@ -83,7 +85,7 @@ private:
std::condition_variable delayed_tasks_cond_var;
std::mutex delayed_tasks_mutex;
/// Thread waiting for next delayed task.
ThreadFromGlobalPool delayed_thread;
ThreadFromGlobalPoolNoTracingContextPropagation delayed_thread;
/// Tasks ordered by scheduled time.
DelayedTasks delayed_tasks;

Some files were not shown because too many files have changed in this diff Show More