mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' into database_replicated
This commit is contained in:
commit
9e3fd3c170
2
.gitignore
vendored
2
.gitignore
vendored
@ -124,3 +124,5 @@ website/package-lock.json
|
||||
|
||||
# Toolchains
|
||||
/cmake/toolchain/*
|
||||
|
||||
*.iml
|
||||
|
10
.gitmodules
vendored
10
.gitmodules
vendored
@ -44,6 +44,7 @@
|
||||
[submodule "contrib/protobuf"]
|
||||
path = contrib/protobuf
|
||||
url = https://github.com/ClickHouse-Extras/protobuf.git
|
||||
branch = v3.13.0.1
|
||||
[submodule "contrib/boost"]
|
||||
path = contrib/boost
|
||||
url = https://github.com/ClickHouse-Extras/boost.git
|
||||
@ -107,6 +108,7 @@
|
||||
[submodule "contrib/grpc"]
|
||||
path = contrib/grpc
|
||||
url = https://github.com/ClickHouse-Extras/grpc.git
|
||||
branch = v1.33.2
|
||||
[submodule "contrib/aws"]
|
||||
path = contrib/aws
|
||||
url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git
|
||||
@ -198,5 +200,9 @@
|
||||
url = https://github.com/facebook/rocksdb
|
||||
branch = v6.14.5
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
[submodule "contrib/abseil-cpp"]
|
||||
path = contrib/abseil-cpp
|
||||
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
|
||||
branch = lts_2020_02_25
|
||||
|
@ -475,9 +475,6 @@ find_contrib_lib(cityhash)
|
||||
|
||||
find_contrib_lib(farmhash)
|
||||
|
||||
set (USE_INTERNAL_BTRIE_LIBRARY ON CACHE INTERNAL "")
|
||||
find_contrib_lib(btrie)
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
include (cmake/find/gtest.cmake)
|
||||
endif ()
|
||||
|
@ -1,6 +1,6 @@
|
||||
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.tech)
|
||||
|
||||
ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.
|
||||
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real time.
|
||||
|
||||
## Useful Links
|
||||
|
||||
@ -14,9 +14,3 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
|
||||
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [The Second ClickHouse Meetup East (online)](https://www.eventbrite.com/e/the-second-clickhouse-meetup-east-tickets-126787955187) on October 31, 2020.
|
||||
* [ClickHouse for Enterprise Meetup (online in Russian)](https://arenadata-events.timepad.ru/event/1465249/) on November 10, 2020.
|
||||
|
||||
|
@ -3,7 +3,6 @@
|
||||
/// Macros for convenient usage of Poco logger.
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ostream.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Message.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
19
base/glibc-compatibility/musl/accept4.c
Normal file
19
base/glibc-compatibility/musl/accept4.c
Normal file
@ -0,0 +1,19 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <sys/socket.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int flg)
|
||||
{
|
||||
if (!flg) return accept(fd, addr, len);
|
||||
int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0);
|
||||
if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret;
|
||||
ret = accept(fd, addr, len);
|
||||
if (ret<0) return ret;
|
||||
if (flg & SOCK_CLOEXEC)
|
||||
__syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
|
||||
if (flg & SOCK_NONBLOCK)
|
||||
__syscall(SYS_fcntl, ret, F_SETFL, O_NONBLOCK);
|
||||
return ret;
|
||||
}
|
37
base/glibc-compatibility/musl/epoll.c
Normal file
37
base/glibc-compatibility/musl/epoll.c
Normal file
@ -0,0 +1,37 @@
|
||||
#include <sys/epoll.h>
|
||||
#include <signal.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int epoll_create(int size)
|
||||
{
|
||||
return epoll_create1(0);
|
||||
}
|
||||
|
||||
int epoll_create1(int flags)
|
||||
{
|
||||
int r = __syscall(SYS_epoll_create1, flags);
|
||||
#ifdef SYS_epoll_create
|
||||
if (r==-ENOSYS && !flags) r = __syscall(SYS_epoll_create, 1);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev)
|
||||
{
|
||||
return syscall(SYS_epoll_ctl, fd, op, fd2, ev);
|
||||
}
|
||||
|
||||
int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs)
|
||||
{
|
||||
int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8);
|
||||
#ifdef SYS_epoll_wait
|
||||
if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int epoll_wait(int fd, struct epoll_event *ev, int cnt, int to)
|
||||
{
|
||||
return epoll_pwait(fd, ev, cnt, to, 0);
|
||||
}
|
23
base/glibc-compatibility/musl/eventfd.c
Normal file
23
base/glibc-compatibility/musl/eventfd.c
Normal file
@ -0,0 +1,23 @@
|
||||
#include <sys/eventfd.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int eventfd(unsigned int count, int flags)
|
||||
{
|
||||
int r = __syscall(SYS_eventfd2, count, flags);
|
||||
#ifdef SYS_eventfd
|
||||
if (r==-ENOSYS && !flags) r = __syscall(SYS_eventfd, count);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int eventfd_read(int fd, eventfd_t *value)
|
||||
{
|
||||
return (sizeof(*value) == read(fd, value, sizeof(*value))) ? 0 : -1;
|
||||
}
|
||||
|
||||
int eventfd_write(int fd, eventfd_t value)
|
||||
{
|
||||
return (sizeof(value) == write(fd, &value, sizeof(value))) ? 0 : -1;
|
||||
}
|
45
base/glibc-compatibility/musl/getauxval.c
Normal file
45
base/glibc-compatibility/musl/getauxval.c
Normal file
@ -0,0 +1,45 @@
|
||||
#include <sys/auxv.h>
|
||||
#include <unistd.h> // __environ
|
||||
#include <errno.h>
|
||||
|
||||
// We don't have libc struct available here. Compute aux vector manually.
|
||||
static unsigned long * __auxv = NULL;
|
||||
static unsigned long __auxv_secure = 0;
|
||||
|
||||
static size_t __find_auxv(unsigned long type)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; __auxv[i]; i += 2)
|
||||
{
|
||||
if (__auxv[i] == type)
|
||||
return i + 1;
|
||||
}
|
||||
return (size_t) -1;
|
||||
}
|
||||
|
||||
__attribute__((constructor)) static void __auxv_init()
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; __environ[i]; i++);
|
||||
__auxv = (unsigned long *) (__environ + i + 1);
|
||||
|
||||
size_t secure_idx = __find_auxv(AT_SECURE);
|
||||
if (secure_idx != ((size_t) -1))
|
||||
__auxv_secure = __auxv[secure_idx];
|
||||
}
|
||||
|
||||
unsigned long getauxval(unsigned long type)
|
||||
{
|
||||
if (type == AT_SECURE)
|
||||
return __auxv_secure;
|
||||
|
||||
if (__auxv)
|
||||
{
|
||||
size_t index = __find_auxv(type);
|
||||
if (index != ((size_t) -1))
|
||||
return __auxv[index];
|
||||
}
|
||||
|
||||
errno = ENOENT;
|
||||
return 0;
|
||||
}
|
8
base/glibc-compatibility/musl/secure_getenv.c
Normal file
8
base/glibc-compatibility/musl/secure_getenv.c
Normal file
@ -0,0 +1,8 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <stdlib.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
char * secure_getenv(const char * name)
|
||||
{
|
||||
return getauxval(AT_SECURE) ? NULL : getenv(name);
|
||||
}
|
@ -13,3 +13,11 @@ long __syscall(syscall_arg_t, ...);
|
||||
|
||||
__attribute__((visibility("hidden")))
|
||||
void *__vdsosym(const char *, const char *);
|
||||
|
||||
#define syscall(...) __syscall_ret(__syscall(__VA_ARGS__))
|
||||
|
||||
#define socketcall(...) __syscall_ret(__socketcall(__VA_ARGS__))
|
||||
|
||||
#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f)
|
||||
|
||||
#define socketcall_cp socketcall
|
||||
|
@ -40,24 +40,10 @@ static int checkver(Verdef *def, int vsym, const char *vername, char *strings)
|
||||
#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
|
||||
#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
|
||||
|
||||
extern char** environ;
|
||||
static Ehdr *eh = NULL;
|
||||
void *__vdsosym(const char *vername, const char *name);
|
||||
// We don't have libc struct available here. Compute aux vector manually.
|
||||
__attribute__((constructor)) static void auxv_init()
|
||||
{
|
||||
size_t i, *auxv;
|
||||
for (i=0; environ[i]; i++);
|
||||
auxv = (void *)(environ+i+1);
|
||||
for (i=0; auxv[i] != AT_SYSINFO_EHDR; i+=2)
|
||||
if (!auxv[i]) return;
|
||||
if (!auxv[i+1]) return;
|
||||
eh = (void *)auxv[i+1];
|
||||
}
|
||||
|
||||
void *__vdsosym(const char *vername, const char *name)
|
||||
{
|
||||
size_t i;
|
||||
Ehdr * eh = (void *) getauxval(AT_SYSINFO_EHDR);
|
||||
if (!eh) return 0;
|
||||
Phdr *ph = (void *)((char *)eh + eh->e_phoff);
|
||||
size_t *dynv=0, base=-1;
|
||||
|
@ -1,44 +0,0 @@
|
||||
# - Try to find btrie headers and libraries.
|
||||
#
|
||||
# Usage of this module as follows:
|
||||
#
|
||||
# find_package(btrie)
|
||||
#
|
||||
# Variables used by this module, they can change the default behaviour and need
|
||||
# to be set before calling find_package:
|
||||
#
|
||||
# BTRIE_ROOT_DIR Set this variable to the root installation of
|
||||
# btrie if the module has problems finding
|
||||
# the proper installation path.
|
||||
#
|
||||
# Variables defined by this module:
|
||||
#
|
||||
# BTRIE_FOUND System has btrie libs/headers
|
||||
# BTRIE_LIBRARIES The btrie library/libraries
|
||||
# BTRIE_INCLUDE_DIR The location of btrie headers
|
||||
|
||||
find_path(BTRIE_ROOT_DIR
|
||||
NAMES include/btrie.h
|
||||
)
|
||||
|
||||
find_library(BTRIE_LIBRARIES
|
||||
NAMES btrie
|
||||
PATHS ${BTRIE_ROOT_DIR}/lib ${BTRIE_LIBRARIES_PATHS}
|
||||
)
|
||||
|
||||
find_path(BTRIE_INCLUDE_DIR
|
||||
NAMES btrie.h
|
||||
PATHS ${BTRIE_ROOT_DIR}/include ${BTRIE_INCLUDE_PATHS}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(btrie DEFAULT_MSG
|
||||
BTRIE_LIBRARIES
|
||||
BTRIE_INCLUDE_DIR
|
||||
)
|
||||
|
||||
mark_as_advanced(
|
||||
BTRIE_ROOT_DIR
|
||||
BTRIE_LIBRARIES
|
||||
BTRIE_INCLUDE_DIR
|
||||
)
|
@ -6,11 +6,9 @@ Defines the following variables:
|
||||
The include directories of the gRPC framework, including the include directories of the C++ wrapper.
|
||||
``gRPC_LIBRARIES``
|
||||
The libraries of the gRPC framework.
|
||||
``gRPC_UNSECURE_LIBRARIES``
|
||||
The libraries of the gRPC framework without SSL.
|
||||
``_gRPC_CPP_PLUGIN``
|
||||
``gRPC_CPP_PLUGIN``
|
||||
The plugin for generating gRPC client and server C++ stubs from `.proto` files
|
||||
``_gRPC_PYTHON_PLUGIN``
|
||||
``gRPC_PYTHON_PLUGIN``
|
||||
The plugin for generating gRPC client and server Python stubs from `.proto` files
|
||||
|
||||
The following :prop_tgt:`IMPORTED` targets are also defined:
|
||||
@ -19,6 +17,13 @@ The following :prop_tgt:`IMPORTED` targets are also defined:
|
||||
``grpc_cpp_plugin``
|
||||
``grpc_python_plugin``
|
||||
|
||||
Set the following variables to adjust the behaviour of this script:
|
||||
``gRPC_USE_UNSECURE_LIBRARIES``
|
||||
if set gRPC_LIBRARIES will be filled with the unsecure version of the libraries (i.e. without SSL)
|
||||
instead of the secure ones.
|
||||
``gRPC_DEBUG`
|
||||
if set the debug message will be printed.
|
||||
|
||||
Add custom commands to process ``.proto`` files to C++::
|
||||
protobuf_generate_grpc_cpp(<SRCS> <HDRS>
|
||||
[DESCRIPTORS <DESC>] [EXPORT_MACRO <MACRO>] [<ARGN>...])
|
||||
@ -242,6 +247,7 @@ find_library(gRPC_LIBRARY NAMES grpc)
|
||||
find_library(gRPC_CPP_LIBRARY NAMES grpc++)
|
||||
find_library(gRPC_UNSECURE_LIBRARY NAMES grpc_unsecure)
|
||||
find_library(gRPC_CPP_UNSECURE_LIBRARY NAMES grpc++_unsecure)
|
||||
find_library(gRPC_CARES_LIBRARY NAMES cares)
|
||||
|
||||
set(gRPC_LIBRARIES)
|
||||
if(gRPC_USE_UNSECURE_LIBRARIES)
|
||||
@ -259,6 +265,7 @@ else()
|
||||
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CARES_LIBRARY})
|
||||
|
||||
# Restore the original find library ordering.
|
||||
if(gRPC_USE_STATIC_LIBS)
|
||||
@ -278,11 +285,11 @@ else()
|
||||
endif()
|
||||
|
||||
# Get full path to plugin.
|
||||
find_program(_gRPC_CPP_PLUGIN
|
||||
find_program(gRPC_CPP_PLUGIN
|
||||
NAMES grpc_cpp_plugin
|
||||
DOC "The plugin for generating gRPC client and server C++ stubs from `.proto` files")
|
||||
|
||||
find_program(_gRPC_PYTHON_PLUGIN
|
||||
find_program(gRPC_PYTHON_PLUGIN
|
||||
NAMES grpc_python_plugin
|
||||
DOC "The plugin for generating gRPC client and server Python stubs from `.proto` files")
|
||||
|
||||
@ -317,14 +324,14 @@ endif()
|
||||
|
||||
#include(FindPackageHandleStandardArgs.cmake)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(gRPC
|
||||
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY
|
||||
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR _gRPC_CPP_PLUGIN _gRPC_PYTHON_PLUGIN)
|
||||
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY gRPC_CARES_LIBRARY
|
||||
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR gRPC_CPP_PLUGIN gRPC_PYTHON_PLUGIN)
|
||||
|
||||
if(gRPC_FOUND)
|
||||
if(gRPC_DEBUG)
|
||||
message(STATUS "gRPC: INCLUDE_DIRS=${gRPC_INCLUDE_DIRS}")
|
||||
message(STATUS "gRPC: LIBRARIES=${gRPC_LIBRARIES}")
|
||||
message(STATUS "gRPC: CPP_PLUGIN=${_gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "gRPC: PYTHON_PLUGIN=${_gRPC_PYTHON_PLUGIN}")
|
||||
message(STATUS "gRPC: CPP_PLUGIN=${gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "gRPC: PYTHON_PLUGIN=${gRPC_PYTHON_PLUGIN}")
|
||||
endif()
|
||||
endif()
|
||||
|
@ -1,3 +1,4 @@
|
||||
# Needed when using Apache Avro serialization format
|
||||
option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_AVRO)
|
||||
|
@ -37,8 +37,8 @@ if(NOT USE_INTERNAL_GRPC_LIBRARY)
|
||||
if(NOT gRPC_INCLUDE_DIRS OR NOT gRPC_LIBRARIES)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system gRPC library")
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
|
||||
elseif(NOT _gRPC_CPP_PLUGIN)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grcp_cpp_plugin")
|
||||
elseif(NOT gRPC_CPP_PLUGIN)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grpc_cpp_plugin")
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
|
||||
else()
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 1)
|
||||
@ -53,8 +53,8 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
|
||||
else()
|
||||
set(gRPC_LIBRARIES grpc grpc++)
|
||||
endif()
|
||||
set(_gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
|
||||
set(_gRPC_PROTOC_EXECUTABLE $<TARGET_FILE:protobuf::protoc>)
|
||||
set(gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
|
||||
set(gRPC_PYTHON_PLUGIN $<TARGET_FILE:grpc_python_plugin>)
|
||||
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
|
||||
|
||||
@ -62,4 +62,4 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
|
||||
set(USE_GRPC 1)
|
||||
endif()
|
||||
|
||||
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${_gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${gRPC_CPP_PLUGIN}")
|
||||
|
@ -1,3 +1,5 @@
|
||||
# Needed when securely connecting to an external server, e.g.
|
||||
# clickhouse-client --host ... --secure
|
||||
option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
|
||||
|
||||
if(NOT ENABLE_SSL)
|
||||
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -66,10 +66,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
|
||||
add_subdirectory (libfarmhash)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_BTRIE_LIBRARY)
|
||||
add_subdirectory (libbtrie)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_ZLIB_LIBRARY)
|
||||
set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
|
||||
set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")
|
||||
|
1
contrib/abseil-cpp
vendored
Submodule
1
contrib/abseil-cpp
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 4f3b686f86c3ebaba7e4e926e62a79cb1c659a54
|
2
contrib/grpc
vendored
2
contrib/grpc
vendored
@ -1 +1 @@
|
||||
Subproject commit a6570b863cf76c9699580ba51c7827d5bffaac43
|
||||
Subproject commit 7436366ceb341ba5c00ea29f1645e02a2b70bf93
|
@ -1,6 +1,7 @@
|
||||
set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
|
||||
set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
|
||||
|
||||
# Use re2 from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT RE2_INCLUDE_DIR)
|
||||
message(FATAL_ERROR " grpc: The location of the \"re2\" library is unknown")
|
||||
endif()
|
||||
@ -8,6 +9,7 @@ set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_RE2_INCLUDE_DIR "${RE2_INCLUDE_DIR}")
|
||||
set(_gRPC_RE2_LIBRARIES "${RE2_LIBRARY}")
|
||||
|
||||
# Use zlib from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT ZLIB_INCLUDE_DIRS)
|
||||
message(FATAL_ERROR " grpc: The location of the \"zlib\" library is unknown")
|
||||
endif()
|
||||
@ -15,6 +17,7 @@ set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_INCLUDE_DIRS}")
|
||||
set(_gRPC_ZLIB_LIBRARIES "${ZLIB_LIBRARIES}")
|
||||
|
||||
# Use protobuf from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY)
|
||||
message(FATAL_ERROR " grpc: The location of the \"protobuf\" library is unknown")
|
||||
elseif (NOT Protobuf_PROTOC_EXECUTABLE)
|
||||
@ -29,21 +32,33 @@ set(_gRPC_PROTOBUF_PROTOC "protoc")
|
||||
set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}")
|
||||
set(_gRPC_PROTOBUF_PROTOC_LIBRARIES "${Protobuf_PROTOC_LIBRARY}")
|
||||
|
||||
# Use OpenSSL from ClickHouse contrib, not from gRPC third_party.
|
||||
set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR})
|
||||
set(_gRPC_SSL_LIBRARIES ${OPENSSL_LIBRARIES})
|
||||
|
||||
# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party.
|
||||
set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt")
|
||||
message(FATAL_ERROR " grpc: submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
|
||||
|
||||
# Choose to build static or shared library for c-ares.
|
||||
if (MAKE_STATIC_LIBRARIES)
|
||||
set(CARES_STATIC ON CACHE BOOL "" FORCE)
|
||||
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
|
||||
else ()
|
||||
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
|
||||
set(CARES_SHARED ON CACHE BOOL "" FORCE)
|
||||
endif ()
|
||||
|
||||
# We don't want to build C# extensions.
|
||||
set(gRPC_BUILD_CSHARP_EXT OFF)
|
||||
|
||||
# We don't want to build abseil tests, so we temporarily switch BUILD_TESTING off.
|
||||
set(_gRPC_ORIG_BUILD_TESTING ${BUILD_TESTING})
|
||||
set(BUILD_TESTING OFF)
|
||||
|
||||
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
|
||||
|
||||
set(BUILD_TESTING ${_gRPC_ORIG_BUILD_TESTING})
|
||||
|
||||
# The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes,
|
||||
# so we need to redefine it back.
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
|
||||
|
@ -1,6 +0,0 @@
|
||||
add_library(btrie
|
||||
src/btrie.c
|
||||
include/btrie.h
|
||||
)
|
||||
|
||||
target_include_directories (btrie SYSTEM PUBLIC include)
|
@ -1,23 +0,0 @@
|
||||
Copyright (c) 2013, CobbLiu
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,160 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* In btrie, each leaf means one bit in ip tree.
|
||||
* Left means 0, and right means 1.
|
||||
*/
|
||||
|
||||
#define BTRIE_NULL (uintptr_t) -1
|
||||
|
||||
#if !defined(BTRIE_MAX_PAGES)
|
||||
/// 54 ip per page. 8 bytes memory per page when empty
|
||||
#define BTRIE_MAX_PAGES 1024 * 2048 /// 128m ips , ~16mb ram when empty
|
||||
// #define BTRIE_MAX_PAGES 1024 * 65535 /// 4g ips (whole ipv4), ~512mb ram when empty
|
||||
#endif
|
||||
|
||||
typedef struct btrie_node_s btrie_node_t;
|
||||
|
||||
struct btrie_node_s {
|
||||
btrie_node_t *right;
|
||||
btrie_node_t *left;
|
||||
btrie_node_t *parent;
|
||||
uintptr_t value;
|
||||
};
|
||||
|
||||
|
||||
typedef struct btrie_s {
|
||||
btrie_node_t *root;
|
||||
|
||||
btrie_node_t *free; /* free list of btrie */
|
||||
char *start;
|
||||
size_t size;
|
||||
|
||||
/*
|
||||
* memory pool.
|
||||
* memory management(esp free) will be so easy by using this facility.
|
||||
*/
|
||||
char *pools[BTRIE_MAX_PAGES];
|
||||
size_t len;
|
||||
} btrie_t;
|
||||
|
||||
|
||||
/**
|
||||
* Create an empty btrie
|
||||
*
|
||||
* @Return:
|
||||
* An ip radix_tree created.
|
||||
* NULL if creation failed.
|
||||
*/
|
||||
|
||||
btrie_t *btrie_create();
|
||||
|
||||
/**
|
||||
* Destroy the ip radix_tree
|
||||
*
|
||||
* @Return:
|
||||
* OK if deletion succeed.
|
||||
* ERROR if error occurs while deleting.
|
||||
*/
|
||||
int btrie_destroy(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Count the nodes in the radix tree.
|
||||
*/
|
||||
size_t btrie_count(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Return the allocated number of bytes.
|
||||
*/
|
||||
size_t btrie_allocated(btrie_t *tree);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv4 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value);
|
||||
|
||||
|
||||
/**
|
||||
* Delete an ipv4 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
|
||||
|
||||
|
||||
/**
|
||||
* Find an ipv4 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find(btrie_t *tree, uint32_t key);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv6 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value);
|
||||
|
||||
/**
|
||||
* Delete an ipv6 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
|
||||
|
||||
/**
|
||||
* Find an ipv6 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
@ -1,460 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <btrie.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
|
||||
static btrie_node_t *
|
||||
btrie_alloc(btrie_t *tree)
|
||||
{
|
||||
btrie_node_t *p;
|
||||
|
||||
if (tree->free) {
|
||||
p = tree->free;
|
||||
tree->free = tree->free->right;
|
||||
return p;
|
||||
}
|
||||
|
||||
if (tree->size < sizeof(btrie_node_t)) {
|
||||
tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
|
||||
if (tree->start == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->pools[tree->len++] = tree->start;
|
||||
tree->size = PAGE_SIZE;
|
||||
}
|
||||
|
||||
p = (btrie_node_t *) tree->start;
|
||||
|
||||
tree->start += sizeof(btrie_node_t);
|
||||
tree->size -= sizeof(btrie_node_t);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
btrie_t *
|
||||
btrie_create()
|
||||
{
|
||||
btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
|
||||
if (tree == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->free = NULL;
|
||||
tree->start = NULL;
|
||||
tree->size = 0;
|
||||
memset(tree->pools, 0, sizeof(btrie_t *) * BTRIE_MAX_PAGES);
|
||||
tree->len = 0;
|
||||
|
||||
tree->root = btrie_alloc(tree);
|
||||
if (tree->root == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->root->right = NULL;
|
||||
tree->root->left = NULL;
|
||||
tree->root->parent = NULL;
|
||||
tree->root->value = BTRIE_NULL;
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
static size_t
|
||||
subtree_weight(btrie_node_t *node)
|
||||
{
|
||||
size_t weight = 1;
|
||||
if (node->left) {
|
||||
weight += subtree_weight(node->left);
|
||||
}
|
||||
if (node->right) {
|
||||
weight += subtree_weight(node->right);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_count(btrie_t *tree)
|
||||
{
|
||||
if (tree->root == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return subtree_weight(tree->root);
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_allocated(btrie_t *tree)
|
||||
{
|
||||
return tree->len * PAGE_SIZE;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
bit = 0x80000000;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask) {
|
||||
if (key & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask)) {
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find(btrie_t *tree, uint32_t key)
|
||||
{
|
||||
uint32_t bit;
|
||||
uintptr_t value;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint8_t bit;
|
||||
unsigned int i;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask[i]) {
|
||||
if (key[i] & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask[i]) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key[i] & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
|
||||
{
|
||||
uint8_t bit;
|
||||
unsigned int i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask[i])) {
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find_a6(btrie_t *tree, const uint8_t *key)
|
||||
{
|
||||
uint8_t bit;
|
||||
uintptr_t value;
|
||||
unsigned int i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
i++;
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_destroy(btrie_t *tree)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
||||
/* free memory pools */
|
||||
for (i = 0; i < tree->len; i++) {
|
||||
free(tree->pools[i]);
|
||||
}
|
||||
|
||||
free(tree);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <btrie.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
btrie_t *it;
|
||||
int ret;
|
||||
|
||||
uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
|
||||
uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
|
||||
uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
|
||||
|
||||
it = btrie_create();
|
||||
if (it == NULL) {
|
||||
printf("create error!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
//add 101.45.69.50/16
|
||||
ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 1 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.69.50/16
|
||||
ret = btrie_insert(it, 170738994, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 2 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.79.50/16
|
||||
ret = btrie_insert(it, 170741554, 0xffff0000, 1);
|
||||
if (ret == 0) {
|
||||
printf("insert 3 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 102.45.79.50/24
|
||||
ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 4 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170741554);
|
||||
if (ret == 1) {
|
||||
printf("test case 1 passed\n");
|
||||
} else {
|
||||
printf("test case 1 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170786817);
|
||||
if (ret != 1) {
|
||||
printf("test case 2 passed\n");
|
||||
} else {
|
||||
printf("test case 2 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_delete(it, 1714245426, 0xffffff00);
|
||||
if (ret != 0) {
|
||||
printf("delete 1 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 1714245426);
|
||||
if (ret != 1) {
|
||||
printf("test case 3 passed\n");
|
||||
} else {
|
||||
printf("test case 3 error\n");
|
||||
}
|
||||
|
||||
//add dead:beef::/32
|
||||
ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find_a6(it, ip_v6);
|
||||
if (ret == 1) {
|
||||
printf("test case 4 passed\n");
|
||||
} else {
|
||||
printf("test case 4 error\n");
|
||||
}
|
||||
|
||||
// insert 4m ips
|
||||
for (size_t ip = 1; ip < 1024 * 1024 * 4; ++ip) {
|
||||
ret = btrie_insert(it, ip, 0xffffffff, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error (%d) (%zu) .\n", ret, ip);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
btrie_destroy(it);
|
||||
printf("test failed\n");
|
||||
return 1;
|
||||
}
|
@ -22,7 +22,16 @@ set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-st
|
||||
set(LIBUNWIND_ASM_SOURCES
|
||||
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S
|
||||
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S)
|
||||
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
|
||||
|
||||
# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1]
|
||||
# Workaround these two issues by compiling as C.
|
||||
#
|
||||
# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771
|
||||
if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
|
||||
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
|
||||
else()
|
||||
enable_language(ASM)
|
||||
endif()
|
||||
|
||||
set(LIBUNWIND_SOURCES
|
||||
${LIBUNWIND_CXX_SOURCES}
|
||||
|
2
contrib/protobuf
vendored
2
contrib/protobuf
vendored
@ -1 +1 @@
|
||||
Subproject commit 445d1ae73a450b1e94622e7040989aa2048402e3
|
||||
Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee
|
96
debian/clickhouse-server.init
vendored
96
debian/clickhouse-server.init
vendored
@ -67,26 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
|
||||
fi
|
||||
|
||||
|
||||
is_running()
|
||||
{
|
||||
pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
|
||||
}
|
||||
|
||||
|
||||
wait_for_done()
|
||||
{
|
||||
timeout=$1
|
||||
attempts=0
|
||||
while is_running; do
|
||||
attempts=$(($attempts + 1))
|
||||
if [ -n "$timeout" ] && [ $attempts -gt $timeout ]; then
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
die()
|
||||
{
|
||||
echo $1 >&2
|
||||
@ -105,49 +85,7 @@ check_config()
|
||||
|
||||
initdb()
|
||||
{
|
||||
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
|
||||
if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then
|
||||
die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}";
|
||||
fi
|
||||
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
else
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR
|
||||
fi
|
||||
|
||||
if ! getent passwd ${CLICKHOUSE_USER} >/dev/null; then
|
||||
echo "Can't chown to non-existing user ${CLICKHOUSE_USER}"
|
||||
return
|
||||
fi
|
||||
if ! getent group ${CLICKHOUSE_GROUP} >/dev/null; then
|
||||
echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}"
|
||||
return
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then
|
||||
echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]"
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then
|
||||
if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") = "/" ]; then
|
||||
echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown."
|
||||
else
|
||||
if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then
|
||||
echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
|
||||
echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then
|
||||
echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/*
|
||||
echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
@ -171,17 +109,7 @@ restart()
|
||||
|
||||
forcestop()
|
||||
{
|
||||
local EXIT_STATUS
|
||||
EXIT_STATUS=0
|
||||
|
||||
echo -n "Stop forcefully $PROGRAM service: "
|
||||
|
||||
kill -KILL $(cat "$CLICKHOUSE_PIDFILE")
|
||||
|
||||
wait_for_done
|
||||
|
||||
echo "DONE"
|
||||
return $EXIT_STATUS
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
@ -261,16 +189,16 @@ main()
|
||||
service_or_func restart
|
||||
;;
|
||||
condstart)
|
||||
is_running || service_or_func start
|
||||
service_or_func start
|
||||
;;
|
||||
condstop)
|
||||
is_running && service_or_func stop
|
||||
service_or_func stop
|
||||
;;
|
||||
condrestart)
|
||||
is_running && service_or_func restart
|
||||
service_or_func restart
|
||||
;;
|
||||
condreload)
|
||||
is_running && service_or_func restart
|
||||
service_or_func restart
|
||||
;;
|
||||
initdb)
|
||||
initdb
|
||||
@ -293,17 +221,7 @@ main()
|
||||
|
||||
status()
|
||||
{
|
||||
if is_running; then
|
||||
echo "$PROGRAM service is running"
|
||||
exit 0
|
||||
else
|
||||
if is_cron_disabled; then
|
||||
echo "$PROGRAM service is stopped";
|
||||
else
|
||||
echo "$PROGRAM: process unexpectedly terminated"
|
||||
fi
|
||||
exit 3
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
|
@ -56,6 +56,7 @@ RUN apt-get update \
|
||||
libprotoc-dev \
|
||||
libgrpc++-dev \
|
||||
protobuf-compiler-grpc \
|
||||
libc-ares-dev \
|
||||
rapidjson-dev \
|
||||
libsnappy-dev \
|
||||
libparquet-dev \
|
||||
|
@ -104,223 +104,248 @@ function start_server
|
||||
|
||||
function clone_root
|
||||
{
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
|
||||
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
|
||||
if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
|
||||
git checkout FETCH_HEAD
|
||||
echo 'Clonned merge head'
|
||||
else
|
||||
git fetch
|
||||
git checkout "$COMMIT_SHA"
|
||||
echo 'Checked out to commit'
|
||||
fi
|
||||
else
|
||||
if [ -v COMMIT_SHA ]; then
|
||||
git checkout "$COMMIT_SHA"
|
||||
fi
|
||||
fi
|
||||
)
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
|
||||
if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
|
||||
git checkout FETCH_HEAD
|
||||
echo 'Clonned merge head'
|
||||
else
|
||||
git fetch
|
||||
git checkout "$COMMIT_SHA"
|
||||
echo 'Checked out to commit'
|
||||
fi
|
||||
else
|
||||
if [ -v COMMIT_SHA ]; then
|
||||
git checkout "$COMMIT_SHA"
|
||||
fi
|
||||
fi
|
||||
)
|
||||
}
|
||||
|
||||
function clone_submodules
|
||||
{
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
|
||||
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring contrib/miniselect contrib/xz)
|
||||
SUBMODULES_TO_UPDATE=(
|
||||
contrib/boost
|
||||
contrib/zlib-ng
|
||||
contrib/libxml2
|
||||
contrib/poco
|
||||
contrib/libunwind
|
||||
contrib/ryu
|
||||
contrib/fmtlib
|
||||
contrib/base64
|
||||
contrib/cctz
|
||||
contrib/libcpuid
|
||||
contrib/double-conversion
|
||||
contrib/libcxx
|
||||
contrib/libcxxabi
|
||||
contrib/libc-headers
|
||||
contrib/lz4
|
||||
contrib/zstd
|
||||
contrib/fastops
|
||||
contrib/rapidjson
|
||||
contrib/re2
|
||||
contrib/sparsehash-c11
|
||||
contrib/croaring
|
||||
contrib/miniselect
|
||||
contrib/xz
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
|
||||
git submodule foreach git reset --hard
|
||||
git submodule foreach git checkout @ -f
|
||||
git submodule foreach git clean -xfd
|
||||
)
|
||||
git submodule sync
|
||||
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
|
||||
git submodule foreach git reset --hard
|
||||
git submodule foreach git checkout @ -f
|
||||
git submodule foreach git clean -xfd
|
||||
)
|
||||
}
|
||||
|
||||
function run_cmake
|
||||
{
|
||||
CMAKE_LIBS_CONFIG=(
|
||||
"-DENABLE_LIBRARIES=0"
|
||||
"-DENABLE_TESTS=0"
|
||||
"-DENABLE_UTILS=0"
|
||||
"-DENABLE_EMBEDDED_COMPILER=0"
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
)
|
||||
CMAKE_LIBS_CONFIG=(
|
||||
"-DENABLE_LIBRARIES=0"
|
||||
"-DENABLE_TESTS=0"
|
||||
"-DENABLE_UTILS=0"
|
||||
"-DENABLE_EMBEDDED_COMPILER=0"
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
)
|
||||
|
||||
# TODO remove this? we don't use ccache anyway. An option would be to download it
|
||||
# from S3 simultaneously with cloning.
|
||||
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
|
||||
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
|
||||
export CCACHE_NOHASHDIR=true
|
||||
export CCACHE_COMPILERCHECK=content
|
||||
export CCACHE_MAXSIZE=15G
|
||||
# TODO remove this? we don't use ccache anyway. An option would be to download it
|
||||
# from S3 simultaneously with cloning.
|
||||
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
|
||||
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
|
||||
export CCACHE_NOHASHDIR=true
|
||||
export CCACHE_COMPILERCHECK=content
|
||||
export CCACHE_MAXSIZE=15G
|
||||
|
||||
ccache --show-stats ||:
|
||||
ccache --zero-stats ||:
|
||||
ccache --show-stats ||:
|
||||
ccache --zero-stats ||:
|
||||
|
||||
mkdir "$FASTTEST_BUILD" ||:
|
||||
mkdir "$FASTTEST_BUILD" ||:
|
||||
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
|
||||
)
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
|
||||
)
|
||||
}
|
||||
|
||||
function build
|
||||
{
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
|
||||
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
|
||||
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
|
||||
fi
|
||||
ccache --show-stats ||:
|
||||
)
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
|
||||
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
|
||||
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
|
||||
fi
|
||||
ccache --show-stats ||:
|
||||
)
|
||||
}
|
||||
|
||||
function configure
|
||||
{
|
||||
clickhouse-client --version
|
||||
clickhouse-test --help
|
||||
clickhouse-client --version
|
||||
clickhouse-test --help
|
||||
|
||||
mkdir -p "$FASTTEST_DATA"{,/client-config}
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
|
||||
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
|
||||
# doesn't support SSL
|
||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||
mkdir -p "$FASTTEST_DATA"{,/client-config}
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
|
||||
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
|
||||
# doesn't support SSL
|
||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||
}
|
||||
|
||||
function run_tests
|
||||
{
|
||||
clickhouse-server --version
|
||||
clickhouse-test --help
|
||||
clickhouse-server --version
|
||||
clickhouse-test --help
|
||||
|
||||
# Kill the server in case we are running locally and not in docker
|
||||
stop_server ||:
|
||||
|
||||
start_server
|
||||
|
||||
TESTS_TO_SKIP=(
|
||||
00105_shard_collations
|
||||
00109_shard_totals_after_having
|
||||
00110_external_sort
|
||||
00302_http_compression
|
||||
00417_kill_query
|
||||
00436_convert_charset
|
||||
00490_special_line_separators_and_characters_outside_of_bmp
|
||||
00652_replicated_mutations_zookeeper
|
||||
00682_empty_parts_merge
|
||||
00701_rollup
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
00911_tautological_compare
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01031_mutations_interpreter_and_context
|
||||
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
|
||||
01083_expressions_in_engine_arguments
|
||||
01092_memory_profiler
|
||||
01098_msgpack_format
|
||||
01098_temporary_and_external_tables
|
||||
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
|
||||
01193_metadata_loading
|
||||
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01251_dict_is_in_infinite_loop
|
||||
01259_dictionary_custom_settings_ddl
|
||||
01268_dictionary_direct_layout
|
||||
01280_ssd_complex_key_dictionary
|
||||
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01318_encrypt # Depends on OpenSSL
|
||||
01318_decrypt # Depends on OpenSSL
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
01292_create_user
|
||||
01294_lazy_database_concurrent
|
||||
01305_replica_create_drop_zookeeper
|
||||
01354_order_by_tuple_collate_const
|
||||
01355_ilike
|
||||
01411_bayesian_ab_testing
|
||||
01532_collate_in_low_cardinality
|
||||
01533_collate_in_nullable
|
||||
01542_collate_in_array
|
||||
01543_collate_in_tuple
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
base64
|
||||
brotli
|
||||
capnproto
|
||||
client
|
||||
ddl_dictionaries
|
||||
h3
|
||||
hashing
|
||||
hdfs
|
||||
java_hash
|
||||
json
|
||||
limit_memory
|
||||
live_view
|
||||
memory_leak
|
||||
memory_limit
|
||||
mysql
|
||||
odbc
|
||||
parallel_alter
|
||||
parquet
|
||||
protobuf
|
||||
secure
|
||||
sha256
|
||||
xz
|
||||
|
||||
# Not sure why these two fail even in sequential mode. Disabled for now
|
||||
# to make some progress.
|
||||
00646_url_engine
|
||||
00974_query_profiler
|
||||
|
||||
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
|
||||
01504_rocksdb
|
||||
|
||||
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
||||
01460_DistributedFilesToInsert
|
||||
|
||||
01541_max_memory_usage_for_user
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
|
||||
01545_system_errors
|
||||
# Checks system.errors
|
||||
01563_distributed_query_finish
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
||||
# We will rerun sequentially any tests that have failed during parallel run.
|
||||
# They might have failed because there was some interference from other tests
|
||||
# running concurrently. If they fail even in seqential mode, we will report them.
|
||||
# FIXME All tests that require exclusive access to the server must be
|
||||
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
|
||||
# run them in a separate group after all other tests. This is faster and also
|
||||
# explicit instead of guessing.
|
||||
if [[ -n "${FAILED_TESTS[*]}" ]]
|
||||
then
|
||||
# Kill the server in case we are running locally and not in docker
|
||||
stop_server ||:
|
||||
|
||||
# Clean the data so that there is no interference from the previous test run.
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
|
||||
|
||||
start_server
|
||||
|
||||
echo "Going to run again: ${FAILED_TESTS[*]}"
|
||||
TESTS_TO_SKIP=(
|
||||
00105_shard_collations
|
||||
00109_shard_totals_after_having
|
||||
00110_external_sort
|
||||
00302_http_compression
|
||||
00417_kill_query
|
||||
00436_convert_charset
|
||||
00490_special_line_separators_and_characters_outside_of_bmp
|
||||
00652_replicated_mutations_zookeeper
|
||||
00682_empty_parts_merge
|
||||
00701_rollup
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
00911_tautological_compare
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01031_mutations_interpreter_and_context
|
||||
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
|
||||
01083_expressions_in_engine_arguments
|
||||
01092_memory_profiler
|
||||
01098_msgpack_format
|
||||
01098_temporary_and_external_tables
|
||||
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
|
||||
01193_metadata_loading
|
||||
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01251_dict_is_in_infinite_loop
|
||||
01259_dictionary_custom_settings_ddl
|
||||
01268_dictionary_direct_layout
|
||||
01280_ssd_complex_key_dictionary
|
||||
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01318_encrypt # Depends on OpenSSL
|
||||
01318_decrypt # Depends on OpenSSL
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
01292_create_user
|
||||
01294_lazy_database_concurrent
|
||||
01305_replica_create_drop_zookeeper
|
||||
01354_order_by_tuple_collate_const
|
||||
01355_ilike
|
||||
01411_bayesian_ab_testing
|
||||
01532_collate_in_low_cardinality
|
||||
01533_collate_in_nullable
|
||||
01542_collate_in_array
|
||||
01543_collate_in_tuple
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
base64
|
||||
brotli
|
||||
capnproto
|
||||
client
|
||||
ddl_dictionaries
|
||||
h3
|
||||
hashing
|
||||
hdfs
|
||||
java_hash
|
||||
json
|
||||
limit_memory
|
||||
live_view
|
||||
memory_leak
|
||||
memory_limit
|
||||
mysql
|
||||
odbc
|
||||
parallel_alter
|
||||
parquet
|
||||
protobuf
|
||||
secure
|
||||
sha256
|
||||
xz
|
||||
|
||||
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
else
|
||||
echo "No failed tests"
|
||||
fi
|
||||
# Not sure why these two fail even in sequential mode. Disabled for now
|
||||
# to make some progress.
|
||||
00646_url_engine
|
||||
00974_query_profiler
|
||||
|
||||
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
|
||||
01504_rocksdb
|
||||
|
||||
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
||||
01460_DistributedFilesToInsert
|
||||
|
||||
01541_max_memory_usage_for_user
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
01561_mann_whitney_scipy
|
||||
|
||||
01545_system_errors
|
||||
# Checks system.errors
|
||||
01563_distributed_query_finish
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
||||
# We will rerun sequentially any tests that have failed during parallel run.
|
||||
# They might have failed because there was some interference from other tests
|
||||
# running concurrently. If they fail even in seqential mode, we will report them.
|
||||
# FIXME All tests that require exclusive access to the server must be
|
||||
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
|
||||
# run them in a separate group after all other tests. This is faster and also
|
||||
# explicit instead of guessing.
|
||||
if [[ -n "${FAILED_TESTS[*]}" ]]
|
||||
then
|
||||
stop_server ||:
|
||||
|
||||
# Clean the data so that there is no interference from the previous test run.
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
|
||||
|
||||
start_server
|
||||
|
||||
echo "Going to run again: ${FAILED_TESTS[*]}"
|
||||
|
||||
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
else
|
||||
echo "No failed tests"
|
||||
fi
|
||||
}
|
||||
|
||||
case "$stage" in
|
||||
|
@ -415,4 +415,4 @@ if not args.keep_created_tables and not args.use_existing_tables:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
|
||||
reportStageEnd('drop-2')
|
||||
reportStageEnd('drop-2')
|
||||
|
@ -10,6 +10,11 @@ RUN apt-get update --yes \
|
||||
gpg-agent \
|
||||
debsig-verify \
|
||||
strace \
|
||||
protobuf-compiler \
|
||||
protobuf-compiler-grpc \
|
||||
libprotoc-dev \
|
||||
libgrpc++-dev \
|
||||
libc-ares-dev \
|
||||
--yes --no-install-recommends
|
||||
|
||||
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
|
||||
@ -33,7 +38,8 @@ RUN set -x \
|
||||
&& dpkg -i "${PKG_VERSION}.deb"
|
||||
|
||||
CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
|
||||
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF && ninja re2_st \
|
||||
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \
|
||||
&& ninja re2_st clickhouse_grpc_protos \
|
||||
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
|
||||
plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \
|
||||
plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log
|
||||
|
@ -13,9 +13,9 @@ cmake .. \
|
||||
-DENABLE_CLICKHOUSE_SERVER=ON \
|
||||
-DENABLE_CLICKHOUSE_CLIENT=ON \
|
||||
-DUSE_STATIC_LIBRARIES=OFF \
|
||||
-DCLICKHOUSE_SPLIT_BINARY=ON \
|
||||
-DSPLIT_SHARED_LIBRARIES=ON \
|
||||
-DENABLE_LIBRARIES=OFF \
|
||||
-DUSE_UNWIND=ON \
|
||||
-DENABLE_UTILS=OFF \
|
||||
-DENABLE_TESTS=OFF
|
||||
```
|
||||
|
@ -17,7 +17,6 @@ toc_title: Third-Party Libraries Used
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -273,13 +273,15 @@ SELECT
|
||||
sum(Duration) AS Duration
|
||||
FROM UAct
|
||||
GROUP BY UserID
|
||||
```text
|
||||
```
|
||||
|
||||
``` text
|
||||
┌──────────────UserID─┬─PageViews─┬─Duration─┐
|
||||
│ 4324182021466249494 │ 6 │ 185 │
|
||||
└─────────────────────┴───────────┴──────────┘
|
||||
```
|
||||
|
||||
``` sqk
|
||||
``` sql
|
||||
select count() FROM UAct
|
||||
```
|
||||
|
||||
|
@ -53,6 +53,42 @@ Example of setting the addresses of the ZooKeeper cluster:
|
||||
</zookeeper>
|
||||
```
|
||||
|
||||
ClickHouse also supports to store replicas meta information in the auxiliary ZooKeeper cluster by providing ZooKeeper cluster name and path as engine arguments.
|
||||
In other word, it supports to store the metadata of differnt tables in different ZooKeeper clusters.
|
||||
|
||||
Example of setting the addresses of the auxiliary ZooKeeper cluster:
|
||||
|
||||
``` xml
|
||||
<auxiliary_zookeepers>
|
||||
<zookeeper2>
|
||||
<node index="1">
|
||||
<host>example_2_1</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node index="2">
|
||||
<host>example_2_2</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node index="3">
|
||||
<host>example_2_3</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
</zookeeper2>
|
||||
<zookeeper3>
|
||||
<node index="1">
|
||||
<host>example_3_1</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
</zookeeper3>
|
||||
</auxiliary_zookeepers>
|
||||
```
|
||||
|
||||
To store table datameta in a auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with
|
||||
ReplicatedMergeTree engine as follow:
|
||||
|
||||
```
|
||||
CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('zookeeper_name_configured_in_auxiliary_zookeepers:path', 'replica_name') ...
|
||||
```
|
||||
You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table).
|
||||
|
||||
If ZooKeeper isn’t set in the config file, you can’t create replicated tables, and any existing replicated tables will be read-only.
|
||||
@ -152,7 +188,7 @@ You can specify default arguments for `Replicated` table engine in the server co
|
||||
|
||||
```xml
|
||||
<default_replica_path>/clickhouse/tables/{shard}/{database}/{table}</default_replica_path>
|
||||
<default_replica_name>{replica}</default_replica_path>
|
||||
<default_replica_name>{replica}</default_replica_name>
|
||||
```
|
||||
|
||||
In this case, you can omit arguments when creating tables:
|
||||
|
@ -11,7 +11,7 @@ By going through this tutorial, you’ll learn how to set up a simple ClickHouse
|
||||
|
||||
## Single Node Setup {#single-node-setup}
|
||||
|
||||
To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
|
||||
To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do not support them.
|
||||
|
||||
For example, you have chosen `deb` packages and executed:
|
||||
|
||||
|
@ -44,11 +44,10 @@ stages, such as query planning or distributed queries.
|
||||
|
||||
To be useful, the tracing information has to be exported to a monitoring system
|
||||
that supports OpenTelemetry, such as Jaeger or Prometheus. ClickHouse avoids
|
||||
a dependency on a particular monitoring system, instead only
|
||||
providing the tracing data conforming to the standard. A natural way to do so
|
||||
in an SQL RDBMS is a system table. OpenTelemetry trace span information
|
||||
a dependency on a particular monitoring system, instead only providing the
|
||||
tracing data through a system table. OpenTelemetry trace span information
|
||||
[required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span)
|
||||
is stored in the system table called `system.opentelemetry_span_log`.
|
||||
is stored in the `system.opentelemetry_span_log` table.
|
||||
|
||||
The table must be enabled in the server configuration, see the `opentelemetry_span_log`
|
||||
element in the default config file `config.xml`. It is enabled by default.
|
||||
@ -67,3 +66,31 @@ The table has the following columns:
|
||||
|
||||
The tags or attributes are saved as two parallel arrays, containing the keys
|
||||
and values. Use `ARRAY JOIN` to work with them.
|
||||
|
||||
## Integration with monitoring systems
|
||||
|
||||
At the moment, there is no ready tool that can export the tracing data from
|
||||
ClickHouse to a monitoring system.
|
||||
|
||||
For testing, it is possible to setup the export using a materialized view with the URL engine over the `system.opentelemetry_span_log` table, which would push the arriving log data to an HTTP endpoint of a trace collector. For example, to push the minimal span data to a Zipkin instance running at `http://localhost:9411`, in Zipkin v2 JSON format:
|
||||
|
||||
```sql
|
||||
CREATE MATERIALIZED VIEW default.zipkin_spans
|
||||
ENGINE = URL('http://127.0.0.1:9411/api/v2/spans', 'JSONEachRow')
|
||||
SETTINGS output_format_json_named_tuples_as_objects = 1,
|
||||
output_format_json_array_of_rows = 1 AS
|
||||
SELECT
|
||||
lower(hex(reinterpretAsFixedString(trace_id))) AS traceId,
|
||||
lower(hex(parent_span_id)) AS parentId,
|
||||
lower(hex(span_id)) AS id,
|
||||
operation_name AS name,
|
||||
start_time_us AS timestamp,
|
||||
finish_time_us - start_time_us AS duration,
|
||||
cast(tuple('clickhouse'), 'Tuple(serviceName text)') AS localEndpoint,
|
||||
cast(tuple(
|
||||
attribute.values[indexOf(attribute.names, 'db.statement')]),
|
||||
'Tuple("db.statement" text)') AS tags
|
||||
FROM system.opentelemetry_span_log
|
||||
```
|
||||
|
||||
In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive.
|
||||
|
@ -4,4 +4,59 @@ toc_priority: 5
|
||||
|
||||
# avg {#agg_function-avg}
|
||||
|
||||
Calculates the average. Only works for numbers. The result is always Float64.
|
||||
Calculates the arithmetic mean.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
avgWeighted(x)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `x` — Values.
|
||||
|
||||
`x` must be
|
||||
[Integer](../../../sql-reference/data-types/int-uint.md),
|
||||
[floating-point](../../../sql-reference/data-types/float.md), or
|
||||
[Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `NaN` if the supplied parameter is empty.
|
||||
- Mean otherwise.
|
||||
|
||||
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avg(x) FROM values('x Int8', 0, 1, 2, 3, 4, 5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avg(x)─┐
|
||||
│ 2.5 │
|
||||
└────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE table test (t UInt8) ENGINE = Memory;
|
||||
SELECT avg(t) FROM test
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avg(x)─┐
|
||||
│ nan │
|
||||
└────────┘
|
||||
```
|
||||
|
@ -14,17 +14,21 @@ avgWeighted(x, weight)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
|
||||
- `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
|
||||
- `x` — Values.
|
||||
- `weight` — Weights of the values.
|
||||
|
||||
Type of `x` and `weight` must be the same.
|
||||
`x` and `weight` must both be
|
||||
[Integer](../../../sql-reference/data-types/int-uint.md),
|
||||
[floating-point](../../../sql-reference/data-types/float.md), or
|
||||
[Decimal](../../../sql-reference/data-types/decimal.md),
|
||||
but may have different types.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Weighted mean.
|
||||
- `NaN`. If all the weights are equal to 0.
|
||||
- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty.
|
||||
- Weighted mean otherwise.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -42,3 +46,54 @@ Result:
|
||||
│ 8 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avgWeighted(x, w)
|
||||
FROM values('x Int8, w Float64', (4, 1), (1, 0), (10, 2))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ 8 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avgWeighted(x, w)
|
||||
FROM values('x Int8, w Int8', (0, 0), (1, 0), (10, 0))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ nan │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE table test (t UInt8) ENGINE = Memory;
|
||||
SELECT avgWeighted(t) FROM test
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ nan │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
@ -25,7 +25,7 @@ SELECT [DISTINCT] expr_list
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
[LIMIT [n, ]m] [WITH TIES]
|
||||
[UNION ALL ...]
|
||||
[UNION ...]
|
||||
[INTO OUTFILE filename]
|
||||
[FORMAT format]
|
||||
```
|
||||
@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li
|
||||
- [SELECT clause](#select-clause)
|
||||
- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md)
|
||||
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
|
||||
- [UNION ALL clause](../../../sql-reference/statements/select/union-all.md)
|
||||
- [UNION clause](../../../sql-reference/statements/select/union-all.md)
|
||||
- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md)
|
||||
- [FORMAT clause](../../../sql-reference/statements/select/format.md)
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
toc_title: UNION ALL
|
||||
toc_title: UNION
|
||||
---
|
||||
|
||||
# UNION ALL Clause {#union-all-clause}
|
||||
@ -25,10 +25,13 @@ Type casting is performed for unions. For example, if two queries being combined
|
||||
|
||||
Queries that are parts of `UNION ALL` can’t be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
|
||||
|
||||
## Limitations {#limitations}
|
||||
# UNION DISTINCT Clause {#union-distinct-clause}
|
||||
The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`.
|
||||
|
||||
# UNION Clause {#union-clause}
|
||||
By default, `UNION` has the same behavior as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception.
|
||||
|
||||
Only `UNION ALL` is supported. The regular `UNION` (`UNION DISTINCT`) is not supported. If you need `UNION DISTINCT`, you can write `SELECT DISTINCT` from a subquery containing `UNION ALL`.
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
||||
Queries that are parts of `UNION ALL` can be run simultaneously, and their results can be mixed together.
|
||||
Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together.
|
||||
|
@ -19,7 +19,6 @@ toc_title: Bibliotecas de terceros utilizadas
|
||||
| Más información | [Licencia de 3 cláusulas BSD](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| H3 | [Licencia Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [Licencia de 3 cláusulas BSD](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [Licencia BSD de 2 cláusulas](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Licencia Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [Información adicional](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -21,7 +21,6 @@ toc_title: "\u06A9\u062A\u0627\u0628\u062E\u0627\u0646\u0647 \u0647\u0627\u06CC
|
||||
| googletest | [لیسانس 3 بند](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| اچ 3 | [نمایی مجوز 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [لیسانس 3 بند](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| لیبتری | [لیسانس 2 بند](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| شکنجه نوجوان | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| لیبیدوید | [مجوز زلب](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| نوشیدن شراب | [الجی پی ال2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -19,7 +19,6 @@ toc_title: "Biblioth\xE8ques Tierces Utilis\xE9es"
|
||||
| googletest | [Licence BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Licence Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [Licence BSD 3-Clause](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [Licence BSD 2-Clause](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Licence Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -20,7 +20,6 @@ toc_title: "\u30B5\u30FC\u30C9\u30D1\u30FC\u30C6\u30A3\u88FD\u30E9\u30A4\u30D6\u
|
||||
| googletest | [BSD3条項ライセンス](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apacheライセンス2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD3条項ライセンス](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD2条項ライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlibライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -18,7 +18,6 @@ toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -19,7 +19,6 @@ toc_title: "Kullan\u0131lan \xDC\xE7\xFCnc\xFC Taraf K\xFCt\xFCphaneleri"
|
||||
| googletest | [BSD 3-Clause Lisansı](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h33 | [Apache Lic 2.0ense 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause Lisansı](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2. 1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -11,7 +11,6 @@
|
||||
| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
|
||||
| googletest | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| 超扫描 | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -6,15 +6,16 @@ toc_title: "\u5BFC\u8A00"
|
||||
|
||||
# 示例数据集 {#example-datasets}
|
||||
|
||||
本节介绍如何获取示例数据集并将其导入ClickHouse。
|
||||
本节介绍如何获取示例数据集并将其导入ClickHouse。对于某些数据集,还可以使用示例查询。
|
||||
|
||||
对于某些数据集示例查询也可用。
|
||||
|
||||
- [脱敏的Yandex.Metrica数据集](metrica.md)
|
||||
- [星型基准测试](star-schema.md)
|
||||
- [维基访问数据](wikistat.md)
|
||||
- [Criteo TB级别点击日志](criteo.md)
|
||||
- [AMPLab大数据基准测试](amplab-benchmark.md)
|
||||
- [纽约出租车数据](nyc-taxi.md)
|
||||
- [航班飞行数据](ontime.md)
|
||||
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
|
||||
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
|
||||
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
|
||||
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
|
||||
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
|
||||
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
|
||||
- [OnTime](../../getting-started/example-datasets/ontime.md)
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->
|
||||
|
@ -1,17 +1,17 @@
|
||||
---
|
||||
toc_priority: 21
|
||||
toc_title: "Yandex\u6885\u7279\u91CC\u5361\u6570\u636E"
|
||||
toc_priority: 15
|
||||
toc_title: Yandex.Metrica Data
|
||||
---
|
||||
|
||||
# 脱敏的Yandex.Metrica数据集 {#anonymized-yandex-metrica-data}
|
||||
# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data}
|
||||
|
||||
Dataset由两个表组成,其中包含有关命中的匿名数据 (`hits_v1`)和访问 (`visits_v1`)的Yandex的。梅特里卡 你可以阅读更多关于Yandex的。梅特里卡 [ClickHouse历史](../../introduction/history.md) 科。
|
||||
数据集由两个表组成,包含关于Yandex.Metrica的hits(`hits_v1`)和visit(`visits_v1`)的匿名数据。你可以阅读更多关于Yandex的信息。在[ClickHouse历史](../../introduction/history.md)的Metrica部分。
|
||||
|
||||
数据集由两个表组成,其中任何一个都可以作为压缩表下载 `tsv.xz` 文件或作为准备的分区。 除此之外,该扩展版本 `hits` 包含1亿行的表可作为TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz 并作为准备的分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
|
||||
数据集由两个表组成,他们中的任何一个都可以下载作为一个压缩`tsv.xz`的文件或准备的分区。除此之外,一个扩展版的`hits`表包含1亿行TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz,准备分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz。
|
||||
|
||||
## 从准备好的分区获取表 {#obtaining-tables-from-prepared-partitions}
|
||||
|
||||
下载和导入点击表:
|
||||
下载和导入`hits`表:
|
||||
|
||||
``` bash
|
||||
curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar
|
||||
@ -21,7 +21,7 @@ sudo service clickhouse-server restart
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
|
||||
```
|
||||
|
||||
下载和导入访问:
|
||||
下载和导入`visits`表:
|
||||
|
||||
``` bash
|
||||
curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar
|
||||
@ -31,9 +31,9 @@ sudo service clickhouse-server restart
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
|
||||
```
|
||||
|
||||
## 从压缩TSV文件获取表 {#obtaining-tables-from-compressed-tsv-file}
|
||||
## 从TSV压缩文件获取表 {#obtaining-tables-from-compressed-tsv-file}
|
||||
|
||||
从压缩的TSV文件下载并导入命中:
|
||||
从TSV压缩文件下载并导入`hits`:
|
||||
|
||||
``` bash
|
||||
curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
|
||||
@ -47,7 +47,7 @@ clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL"
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
|
||||
```
|
||||
|
||||
从压缩tsv文件下载和导入访问:
|
||||
从压缩tsv文件下载和导入`visits`:
|
||||
|
||||
``` bash
|
||||
curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv
|
||||
@ -63,6 +63,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
|
||||
|
||||
## 查询示例 {#example-queries}
|
||||
|
||||
[点击教程](../../getting-started/tutorial.md) 是基于Yandex的。Metrica数据集和开始使用此数据集的推荐方式是通过教程。
|
||||
[使用教程](../../getting-started/tutorial.md)是以Yandex.Metrica数据集开始教程。
|
||||
|
||||
查询这些表的其他示例可以在 [有状态测试](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) ClickHouse的(它们被命名为 `test.hists` 和 `test.visits` 那里)。
|
||||
可以在ClickHouse的[stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) 中找到对这些表的查询的其他示例(它们被命名为`test.hists`和`test.visits`)。
|
||||
|
@ -1,3 +1,10 @@
|
||||
---
|
||||
machine_translated: true
|
||||
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
|
||||
toc_folder_title: "\u5BFC\u8A00"
|
||||
toc_priority: 2
|
||||
---
|
||||
|
||||
# 入门 {#ru-men}
|
||||
|
||||
如果您是ClickHouse的新手,并希望亲身体验它的性能,首先您需要通过 [安装过程](install.md).
|
||||
|
@ -1,34 +1,46 @@
|
||||
---
|
||||
toc_priority: 11
|
||||
toc_title: 安装部署
|
||||
---
|
||||
|
||||
# 安装 {#clickhouse-an-zhuang}
|
||||
|
||||
## 系统要求 {#xi-tong-yao-qiu}
|
||||
|
||||
ClickHouse可以在任何具有x86_64,AArch64或PowerPC64LE CPU架构的Linux,FreeBSD或Mac OS X上运行。
|
||||
|
||||
虽然预构建的二进制文件通常是为x86 _64编译并利用SSE 4.2指令集,但除非另有说明,否则使用支持它的CPU将成为额外的系统要求。这是检查当前CPU是否支持SSE 4.2的命令:
|
||||
官方预构建的二进制文件通常针对x86_64进行编译,并利用`SSE 4.2`指令集,因此,除非另有说明,支持它的CPU使用将成为额外的系统需求。下面是检查当前CPU是否支持SSE 4.2的命令:
|
||||
|
||||
``` bash
|
||||
$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
|
||||
```
|
||||
|
||||
要在不支持SSE 4.2或具有AArch64或PowerPC64LE体系结构的处理器上运行ClickHouse,您应该[通过源构建ClickHouse](#from-sources)进行适当的配置调整。
|
||||
要在不支持`SSE 4.2`或`AArch64`,`PowerPC64LE`架构的处理器上运行ClickHouse,您应该通过适当的配置调整从[源代码构建ClickHouse](#from-sources)。
|
||||
|
||||
## 可用的安装选项 {#install-from-deb-packages}
|
||||
## 可用安装包 {#install-from-deb-packages}
|
||||
|
||||
建议为Debian或Ubuntu使用官方的预编译`deb`软件包。 运行以下命令以安装软件包:
|
||||
### `DEB`安装包
|
||||
|
||||
然后运行:
|
||||
建议使用Debian或Ubuntu的官方预编译`deb`软件包。运行以下命令来安装包:
|
||||
|
||||
``` bash
|
||||
{% include 'install/deb.sh' %}
|
||||
```
|
||||
|
||||
你也可以从这里手动下载安装包:https://repo.clickhouse.tech/deb/stable/main/。
|
||||
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。
|
||||
|
||||
如果你想使用最新的测试版本,请使用`testing`替换`stable`。
|
||||
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/deb/stable/main/)。
|
||||
|
||||
### 来自RPM包 {#from-rpm-packages}
|
||||
安装包列表:
|
||||
|
||||
Yandex ClickHouse团队建议使用官方预编译的`rpm`软件包,用于CentOS,RedHat和所有其他基于rpm的Linux发行版。
|
||||
- `clickhouse-common-static` — ClickHouse编译的二进制文件。
|
||||
- `clickhouse-server` — 创建`clickhouse-server`软连接,并安装默认配置服务
|
||||
- `clickhouse-client` — 创建`clickhouse-client`客户端工具软连接,并安装客户端配置文件。
|
||||
- `clickhouse-common-static-dbg` — 带有调试信息的ClickHouse二进制文件。
|
||||
|
||||
### `RPM`安装包 {#from-rpm-packages}
|
||||
|
||||
推荐使用CentOS、RedHat和所有其他基于rpm的Linux发行版的官方预编译`rpm`包。
|
||||
|
||||
首先,您需要添加官方存储库:
|
||||
|
||||
@ -38,84 +50,120 @@ sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
|
||||
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
|
||||
```
|
||||
|
||||
如果您想使用最新版本,请将`stable`替换为`testing`(建议您在测试环境中使用)。
|
||||
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。
|
||||
|
||||
然后运行这些命令以实际安装包:
|
||||
然后运行命令安装:
|
||||
|
||||
``` bash
|
||||
sudo yum install clickhouse-server clickhouse-client
|
||||
```
|
||||
|
||||
您也可以从此处手动下载和安装软件包:https://repo.clickhouse.tech/rpm/stable/x86_64。
|
||||
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/rpm/stable/x86_64)。
|
||||
|
||||
### 来自Docker {#from-docker-image}
|
||||
### `Tgz`安装包 {#from-tgz-archives}
|
||||
|
||||
要在Docker中运行ClickHouse,请遵循[码头工人中心](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。那些图像使用官方的`deb`包。
|
||||
如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。
|
||||
|
||||
所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.tech/tgz/`下载。
|
||||
|
||||
下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新版本的安装示例:
|
||||
|
||||
``` bash
|
||||
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
|
||||
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz
|
||||
|
||||
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
|
||||
|
||||
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
|
||||
|
||||
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
|
||||
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
|
||||
```
|
||||
|
||||
对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它,它以后缀`-stable`标志。
|
||||
|
||||
### `Docker`安装包 {#from-docker-image}
|
||||
|
||||
要在Docker中运行ClickHouse,请遵循[Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。它是官方的`deb`安装包。
|
||||
|
||||
### 其他环境安装包 {#from-other}
|
||||
|
||||
对于非linux操作系统和Arch64 CPU架构,ClickHouse将会以`master`分支的最新提交的进行编译提供(它将会有几小时的延迟)。
|
||||
|
||||
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
|
||||
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
|
||||
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
|
||||
|
||||
下载后,您可以使用`clickhouse client`连接服务,或者使用`clickhouse local`模式处理数据,不过您必须要额外在GitHub下载[server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml)和[users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml)配置文件。
|
||||
|
||||
不建议在生产环境中使用这些构建版本,因为它们没有经过充分的测试,但是您可以自行承担这样做的风险。它们只是ClickHouse功能的一个部分。
|
||||
|
||||
### 使用源码安装 {#from-sources}
|
||||
|
||||
具体编译方式可以参考build.md。
|
||||
要手动编译ClickHouse, 请遵循[Linux](../development/build.md)或[Mac OS X](../development/build-osx.md)说明。
|
||||
|
||||
你可以编译并安装它们。
|
||||
你也可以直接使用而不进行安装。
|
||||
您可以编译并安装它们,也可以使用不安装包的程序。通过手动构建,您可以禁用`SSE 4.2`或`AArch64 cpu`。
|
||||
|
||||
``` text
|
||||
Client: programs/clickhouse-client
|
||||
Server: programs/clickhouse-server
|
||||
```
|
||||
Client: programs/clickhouse-client
|
||||
Server: programs/clickhouse-server
|
||||
|
||||
在服务器中为数据创建如下目录:
|
||||
您需要创建一个数据和元数据文件夹,并为所需的用户`chown`授权。它们的路径可以在服务器配置(`src/programs/server/config.xml`)中改变,默认情况下它们是:
|
||||
|
||||
``` text
|
||||
/opt/clickhouse/data/default/
|
||||
/opt/clickhouse/metadata/default/
|
||||
```
|
||||
/opt/clickhouse/data/default/
|
||||
/opt/clickhouse/metadata/default/
|
||||
|
||||
(它们可以在server config中配置。)
|
||||
为需要的用户运行’chown’
|
||||
|
||||
日志的路径可以在server config (src/programs/server/config.xml)中配置。
|
||||
在Gentoo上,你可以使用`emerge clickhouse`从源代码安装ClickHouse。
|
||||
|
||||
## 启动 {#qi-dong}
|
||||
|
||||
可以运行如下命令在后台启动服务:
|
||||
如果没有`service`,可以运行如下命令在后台启动服务:
|
||||
|
||||
``` bash
|
||||
sudo service clickhouse-server start
|
||||
$ sudo /etc/init.d/clickhouse-server start
|
||||
```
|
||||
|
||||
可以在`/var/log/clickhouse-server/`目录中查看日志。
|
||||
日志文件将输出在`/var/log/clickhouse-server/`文件夹。
|
||||
|
||||
如果服务没有启动,请检查配置文件 `/etc/clickhouse-server/config.xml`。
|
||||
如果服务器没有启动,检查`/etc/clickhouse-server/config.xml`中的配置。
|
||||
|
||||
你也可以在控制台中直接启动服务:
|
||||
您也可以手动从控制台启动服务器:
|
||||
|
||||
``` bash
|
||||
clickhouse-server --config-file=/etc/clickhouse-server/config.xml
|
||||
```bash
|
||||
$ clickhouse-server --config-file=/etc/clickhouse-server/config.xml
|
||||
```
|
||||
|
||||
在这种情况下,日志将被打印到控制台中,这在开发过程中很方便。
|
||||
如果配置文件在当前目录中,你可以不指定’–config-file’参数。它默认使用’./config.xml’。
|
||||
在这种情况下,日志将被打印到控制台,这在开发过程中很方便。
|
||||
|
||||
你可以使用命令行客户端连接到服务:
|
||||
如果配置文件在当前目录中,则不需要指定`——config-file`参数。默认情况下,它的路径为`./config.xml`。
|
||||
|
||||
ClickHouse支持访问限制设置。它们位于`users.xml`文件(与`config.xml`同级目录)。
|
||||
默认情况下,允许`default`用户从任何地方访问,不需要密码。可查看`user/default/networks`。
|
||||
更多信息,请参见[Configuration Files](../operations/configuration-files.md)。
|
||||
|
||||
启动服务后,您可以使用命令行客户端连接到它:
|
||||
|
||||
``` bash
|
||||
clickhouse-client
|
||||
$ clickhouse-client
|
||||
```
|
||||
|
||||
默认情况下它使用’default’用户无密码的与localhost:9000服务建立连接。
|
||||
客户端也可以用于连接远程服务,例如:
|
||||
默认情况下,使用`default`用户并不携带密码连接到`localhost:9000`。还可以使用`--host`参数连接到指定服务器。
|
||||
|
||||
终端必须使用UTF-8编码。
|
||||
更多信息,请参阅[Command-line client](../interfaces/cli.md)。
|
||||
|
||||
示例:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --host=example.com
|
||||
```
|
||||
|
||||
有关更多信息,请参考«Command-line client»部分。
|
||||
|
||||
检查系统是否工作:
|
||||
|
||||
``` bash
|
||||
milovidov@hostname:~/work/metrica/src/src/Client$ ./clickhouse-client
|
||||
$ ./clickhouse-client
|
||||
ClickHouse client version 0.0.18749.
|
||||
Connecting to localhost:9000.
|
||||
Connected to ClickHouse server version 0.0.18749.
|
||||
@ -135,6 +183,6 @@ SELECT 1
|
||||
|
||||
**恭喜,系统已经工作了!**
|
||||
|
||||
为了继续进行实验,你可以尝试下载测试数据集。
|
||||
为了继续进行实验,你可以尝试下载测试数据集或查看[教程](https://clickhouse.tech/tutorial.html)。
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/getting_started/install/) <!--hide-->
|
||||
|
@ -1,19 +1,19 @@
|
||||
---
|
||||
toc_priority: 12
|
||||
toc_title: "\u6559\u7A0B"
|
||||
toc_title: 使用教程
|
||||
---
|
||||
|
||||
# 点击教程 {#clickhouse-tutorial}
|
||||
# ClickHouse教程 {#clickhouse-tutorial}
|
||||
|
||||
## 从本教程中可以期待什么? {#what-to-expect-from-this-tutorial}
|
||||
## 从本教程中可以获得什么? {#what-to-expect-from-this-tutorial}
|
||||
|
||||
通过本教程,您将学习如何设置一个简单的ClickHouse集群。 它会很小,但却是容错和可扩展的。 然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。
|
||||
通过学习本教程,您将了解如何设置一个简单的ClickHouse集群。它会很小,但是可以容错和扩展。然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。
|
||||
|
||||
## 单节点设置 {#single-node-setup}
|
||||
|
||||
为了推迟分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从[deb](install.md#install-from-deb-packages) 或 [rpm](install.md#from-rpm-packages) 包安装,但对于不支持它们的操作系统也有 [替代方法](install.md#from-docker-image) 。
|
||||
为了延迟演示分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。ClickHouse通常是从[deb](install.md#install-from-deb-packages)或[rpm](install.md#from-rpm-packages)包安装,但对于不支持它们的操作系统也有[其他方法](install.md#from-docker-image)。
|
||||
|
||||
例如,您选择了从 `deb` 包安装,执行:
|
||||
例如,您选择`deb`安装包,执行:
|
||||
|
||||
``` bash
|
||||
{% include 'install/deb.sh' %}
|
||||
@ -21,13 +21,13 @@ toc_title: "\u6559\u7A0B"
|
||||
|
||||
在我们安装的软件中包含这些包:
|
||||
|
||||
- `clickhouse-client` 包,包含 [clickhouse-client](../interfaces/cli.md) 应用程序,它是交互式ClickHouse控制台客户端。
|
||||
- `clickhouse-client` 包,包含[clickhouse-client](../interfaces/cli.md)客户端,它是交互式ClickHouse控制台客户端。
|
||||
- `clickhouse-common` 包,包含一个ClickHouse可执行文件。
|
||||
- `clickhouse-server` 包,包含要作为服务端运行的ClickHouse配置文件。
|
||||
|
||||
服务端配置文件位于 `/etc/clickhouse-server/`。 在进一步讨论之前,请注意 `config.xml`文件中的`<path>` 元素. Path决定了数据存储的位置,因此该位置应该位于磁盘容量较大的卷上;默认值为 `/var/lib/clickhouse/`。 如果你想调整配置,考虑到它可能会在未来的软件包更新中被重写,直接编辑`config.xml` 文件并不方便。 推荐的方法是在[配置文件](../operations/configuration-files.md)目录创建文件,作为config.xml文件的“补丁”,用以复写配置元素。
|
||||
服务器配置文件位于`/etc/clickhouse-server/`。在继续之前,请注意`config.xml`中的`<path>`元素。它决定了数据存储的位置,因此它应该位于磁盘容量的卷上;默认值是`/var/lib/clickhouse/`。如果你想调整配置,直接编辑config是不方便的。考虑到它可能会在将来的包更新中被重写。建议重写配置元素的方法是在配置中创建[config.d文件夹](../operations/configuration-files.md),作为config.xml的重写方式。
|
||||
|
||||
你可能已经注意到了, `clickhouse-server` 安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样:
|
||||
你可能已经注意到了,`clickhouse-server`安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样:
|
||||
|
||||
``` bash
|
||||
sudo service clickhouse-server start
|
||||
@ -39,9 +39,9 @@ sudo service clickhouse-server start
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
```
|
||||
|
||||
服务端日志的默认位置是 `/var/log/clickhouse-server/`。当服务端在日志中记录 `Ready for connections` 消息,即表示服务端已准备好处理客户端连接。
|
||||
服务端日志的默认位置是`/var/log/clickhouse-server/`。当服务端在日志中记录`Ready for connections`消息,即表示服务端已准备好处理客户端连接。
|
||||
|
||||
一旦 `clickhouse-server` 启动并运行,我们可以利用 `clickhouse-client` 连接到服务端,并运行一些测试查询,如 `SELECT "Hello, world!";`.
|
||||
一旦`clickhouse-server`启动并运行,我们可以利用`clickhouse-client`连接到服务端,并运行一些测试查询,如`SELECT "Hello, world!";`.
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
|
||||
|
||||
## 导入示例数据集 {#import-sample-dataset}
|
||||
|
||||
现在是时候用一些示例数据填充我们的ClickHouse服务端。 在本教程中,我们将使用Yandex.Metrica的匿名数据,它是在ClickHouse成为开源之前作为生产环境运行的第一个服务(关于这一点的更多内容请参阅[ClickHouse历史](../introduction/history.md))。有 [多种导入Yandex.Metrica数据集的的方法](example-datasets/metrica.md),为了本教程,我们将使用最现实的一个。
|
||||
现在是时候用一些示例数据填充我们的ClickHouse服务端。 在本教程中,我们将使用Yandex.Metrica的匿名数据,它是在ClickHouse成为开源之前作为生产环境运行的第一个服务(关于这一点的更多内容请参阅[ClickHouse历史](../introduction/history.md))。[多种导入Yandex.Metrica数据集方法](example-datasets/metrica.md),为了本教程,我们将使用最现实的一个。
|
||||
|
||||
### 下载并提取表数据 {#download-and-extract-table-data}
|
||||
|
||||
@ -93,17 +93,17 @@ curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unx
|
||||
|
||||
### 创建表 {#create-tables}
|
||||
|
||||
与大多数数据库管理系统一样,ClickHouse在逻辑上将表分组为数据库。包含一个 `default` 数据库,但我们将创建一个新的数据库 `tutorial`:
|
||||
与大多数数据库管理系统一样,ClickHouse在逻辑上将表分组为数据库。包含一个`default`数据库,但我们将创建一个新的数据库`tutorial`:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial"
|
||||
```
|
||||
|
||||
与创建数据库相比,创建表的语法要复杂得多(请参阅 [参考资料](../sql-reference/statements/create.md). 一般 `CREATE TABLE` 声明必须指定三个关键的事情:
|
||||
与创建数据库相比,创建表的语法要复杂得多(请参阅[参考资料](../sql-reference/statements/create.md). 一般`CREATE TABLE`声明必须指定三个关键的事情:
|
||||
|
||||
1. 要创建的表的名称。
|
||||
2. 表结构,例如:列名和对应的[数据类型](../sql-reference/data-types/index.md)。
|
||||
3. [表引擎](../engines/table-engines/index.md) 及其设置,这决定了对此表的查询操作是如何在物理层面执行的所有细节。
|
||||
3. [表引擎](../engines/table-engines/index.md)及其设置,这决定了对此表的查询操作是如何在物理层面执行的所有细节。
|
||||
|
||||
Yandex.Metrica是一个网络分析服务,样本数据集不包括其全部功能,因此只有两个表可以创建:
|
||||
|
||||
@ -455,11 +455,11 @@ SETTINGS index_granularity = 8192
|
||||
|
||||
您可以使用`clickhouse-client`的交互模式执行这些查询(只需在终端中启动它,而不需要提前指定查询)。或者如果你愿意,可以尝试一些[替代接口](../interfaces/index.md)。
|
||||
|
||||
正如我们所看到的, `hits_v1` 使用 [基本的MergeTree引擎](../engines/table-engines/mergetree-family/mergetree.md),而 `visits_v1` 使用 [折叠树](../engines/table-engines/mergetree-family/collapsingmergetree.md) 变体。
|
||||
正如我们所看到的, `hits_v1`使用 [MergeTree引擎](../engines/table-engines/mergetree-family/mergetree.md),而`visits_v1`使用 [Collapsing](../engines/table-engines/mergetree-family/collapsingmergetree.md)引擎。
|
||||
|
||||
### 导入数据 {#import-data}
|
||||
|
||||
数据导入到ClickHouse是通过以下方式完成的 [INSERT INTO](../sql-reference/statements/insert-into.md) 查询像许多其他SQL数据库。 然而,数据通常是在一个提供 [支持的序列化格式](../interfaces/formats.md) 而不是 `VALUES` 子句(也支持)。
|
||||
数据导入到ClickHouse是通过[INSERT INTO](../sql-reference/statements/insert-into.md)方式完成的,查询类似许多SQL数据库。然而,数据通常是在一个提供[支持序列化格式](../interfaces/formats.md)而不是`VALUES`子句(也支持)。
|
||||
|
||||
我们之前下载的文件是以制表符分隔的格式,所以这里是如何通过控制台客户端导入它们:
|
||||
|
||||
@ -468,7 +468,7 @@ clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert
|
||||
clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv
|
||||
```
|
||||
|
||||
ClickHouse有很多 [要调整的设置](../operations/settings/index.md) 在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的 `--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询 `system.settings` 表:
|
||||
ClickHouse有很多[要调整的设置](../operations/settings/index.md)在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的`--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询`system.settings` 表:
|
||||
|
||||
``` sql
|
||||
SELECT name, value, changed, description
|
||||
@ -479,14 +479,14 @@ FORMAT TSV
|
||||
max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion."
|
||||
```
|
||||
|
||||
您也可以 [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) 导入后的表。 使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储(或至少检查是否有意义)。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行:
|
||||
您也可以[OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize)导入后的表。使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储(或至少检查是否有意义)。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL"
|
||||
clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL"
|
||||
```
|
||||
|
||||
这些查询开始一个I/O和CPU密集型操作,所以如果表一直接收到新数据,最好不要管它,让合并在后台运行。
|
||||
这些查询开始I/O和CPU密集型操作,所以如果表一直接收到新数据,最好不要管它,让合并在后台运行。
|
||||
|
||||
现在我们可以检查表导入是否成功:
|
||||
|
||||
@ -524,9 +524,9 @@ ClickHouse集群是一个同质集群。 设置步骤:
|
||||
1. 在群集的所有机器上安装ClickHouse服务端
|
||||
2. 在配置文件中设置群集配置
|
||||
3. 在每个实例上创建本地表
|
||||
4. 创建一个 [分布式表](../engines/table-engines/special/distributed.md)
|
||||
4. 创建一个[分布式表](../engines/table-engines/special/distributed.md)
|
||||
|
||||
[分布式表](../engines/table-engines/special/distributed.md) 实际上是一种 “视图”,映射到ClickHouse集群的本地表。 从分布式表中执行 **SELECT** 查询会使用集群所有分片的资源。 您可以为多个集群指定configs,并创建多个分布式表,为不同的集群提供视图。
|
||||
[分布式表](../engines/table-engines/special/distributed.md)实际上是一种`view`,映射到ClickHouse集群的本地表。 从分布式表中执行**SELECT**查询会使用集群所有分片的资源。 您可以为多个集群指定configs,并创建多个分布式表,为不同的集群提供视图。
|
||||
|
||||
具有三个分片,每个分片一个副本的集群的示例配置:
|
||||
|
||||
@ -555,7 +555,7 @@ ClickHouse集群是一个同质集群。 设置步骤:
|
||||
</remote_servers>
|
||||
```
|
||||
|
||||
为了进一步演示,让我们使用和创建 `hits_v1` 表相同的 `CREATE TABLE` 语句创建一个新的本地表,但表名不同:
|
||||
为了进一步演示,让我们使用和创建`hits_v1`表相同的`CREATE TABLE`语句创建一个新的本地表,但表名不同:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ...
|
||||
@ -568,9 +568,9 @@ CREATE TABLE tutorial.hits_all AS tutorial.hits_local
|
||||
ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
|
||||
```
|
||||
|
||||
常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表 [远程](../sql-reference/table-functions/remote.md) 表功能。
|
||||
常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表[远程](../sql-reference/table-functions/remote.md)表功能。
|
||||
|
||||
让我们运行 [INSERT SELECT](../sql-reference/statements/insert-into.md) 将该表传播到多个服务器。
|
||||
让我们运行[INSERT SELECT](../sql-reference/statements/insert-into.md)将该表传播到多个服务器。
|
||||
|
||||
``` sql
|
||||
INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
|
||||
@ -609,10 +609,10 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
|
||||
</remote_servers>
|
||||
```
|
||||
|
||||
启用本机复制 [Zookeeper](http://zookeeper.apache.org/) 是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。 建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。
|
||||
启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。
|
||||
|
||||
!!! note "注"
|
||||
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是 **不** 建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
|
||||
!!! note "注意"
|
||||
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
|
||||
|
||||
ZooKeeper位置在配置文件中指定:
|
||||
|
||||
@ -653,12 +653,12 @@ ENGINE = ReplcatedMergeTree(
|
||||
...
|
||||
```
|
||||
|
||||
在这里,我们使用 [ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md) 表引擎。 在参数中,我们指定包含分片和副本标识符的ZooKeeper路径。
|
||||
在这里,我们使用[ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md)表引擎。 在参数中,我们指定包含分片和副本标识符的ZooKeeper路径。
|
||||
|
||||
``` sql
|
||||
INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local;
|
||||
```
|
||||
|
||||
复制在多主机模式下运行。 数据可以加载到任何副本中,然后系统会自动将其与其他实例同步。 复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。 至少应有一个副本允许数据摄取。 其他人将同步数据和修复一致性,一旦他们将再次变得活跃。 请注意,这种方法允许最近插入的数据丢失的可能性很低。
|
||||
复制在多主机模式下运行。数据可以加载到任何副本中,然后系统自动将其与其他实例同步。复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。至少应该有一个副本允许数据摄入。另一些则会在重新激活后同步数据并修复一致性。请注意,这种方法允许最近插入的数据丢失的可能性很低。
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/getting_started/tutorial/) <!--hide-->
|
||||
|
@ -4,53 +4,50 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
|
||||
|
||||
在传统的行式数据库系统中,数据按如下顺序存储:
|
||||
|
||||
| row | watchID | JavaEnable | title | GoodEvent | EventTime |
|
||||
|-----|-------------|------------|------------|-----------|---------------------|
|
||||
| #0 | 89354350662 | 1 | 投资者关系 | 1 | 2016-05-18 05:19:20 |
|
||||
| #1 | 90329509958 | 0 | 联系我们 | 1 | 2016-05-18 08:10:20 |
|
||||
| #2 | 89953706054 | 1 | 任务 | 1 | 2016-05-18 07:38:00 |
|
||||
| #N | … | … | … | … | … |
|
||||
| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime |
|
||||
|-----|-------------|------------|--------------------|-----------|---------------------|
|
||||
| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
|
||||
| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
|
||||
| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
|
||||
| #N | … | … | … | … | … |
|
||||
|
||||
处于同一行中的数据总是被物理的存储在一起。
|
||||
|
||||
常见的行式数据库系统有: MySQL、Postgres和MS SQL Server。
|
||||
{: .灰色 }
|
||||
常见的行式数据库系统有:`MySQL`、`Postgres`和`MS SQL Server`。
|
||||
|
||||
在列式数据库系统中,数据按如下的顺序存储:
|
||||
|
||||
| row: | #0 | #1 | #2 | #N |
|
||||
| Row: | #0 | #1 | #2 | #N |
|
||||
|-------------|---------------------|---------------------|---------------------|-----|
|
||||
| watchID: | 89354350662 | 90329509958 | 89953706054 | … |
|
||||
| WatchID: | 89354350662 | 90329509958 | 89953706054 | … |
|
||||
| JavaEnable: | 1 | 0 | 1 | … |
|
||||
| title: | 投资者关系 | 联系我们 | 任务 | … |
|
||||
| Title: | Investor Relations | Contact us | Mission | … |
|
||||
| GoodEvent: | 1 | 1 | 1 | … |
|
||||
| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
|
||||
| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
|
||||
|
||||
该示例中只展示了数据在列式数据库中数据的排列方式。
|
||||
对于存储而言,列式数据库总是将同一列的数据存储在一起,不同列的数据也总是分开存储。
|
||||
这些示例只显示了数据的排列顺序。来自不同列的值被单独存储,来自同一列的数据被存储在一起。
|
||||
|
||||
常见的列式数据库有: Vertica、 Paraccel (Actian Matrix,Amazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise, Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。
|
||||
{: .灰色 }
|
||||
|
||||
不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例; 每种查询读取多少数据————行、列和字节;读取数据和写入数据之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。
|
||||
不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例;每种类型的查询(行、列和字节)读取多少数据;读取数据和更新之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。
|
||||
|
||||
系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有明显不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率?
|
||||
系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率?
|
||||
|
||||
## OLAP场景的关键特征 {#olapchang-jing-de-guan-jian-te-zheng}
|
||||
|
||||
- 大多数是读请求
|
||||
- 数据总是以相当大的批(\> 1000 rows)进行写入
|
||||
- 不修改已添加的数据
|
||||
- 每次查询都从数据库中读取大量的行,但是同时又仅需要少量的列
|
||||
- 绝大多数是读请求
|
||||
- 数据以相当大的批次(\> 1000行)更新,而不是单行更新;或者根本没有更新。
|
||||
- 已添加到数据库的数据不能修改。
|
||||
- 对于读取,从数据库中提取相当多的行,但只提取列的一小部分。
|
||||
- 宽表,即每个表包含着大量的列
|
||||
- 较少的查询(通常每台服务器每秒数百个查询或更少)
|
||||
- 查询相对较少(通常每台服务器每秒查询数百次或更少)
|
||||
- 对于简单查询,允许延迟大约50毫秒
|
||||
- 列中的数据相对较小: 数字和短字符串(例如,每个URL 60个字节)
|
||||
- 处理单个查询时需要高吞吐量(每个服务器每秒高达数十亿行)
|
||||
- 列中的数据相对较小:数字和短字符串(例如,每个URL 60个字节)
|
||||
- 处理单个查询时需要高吞吐量(每台服务器每秒可达数十亿行)
|
||||
- 事务不是必须的
|
||||
- 对数据一致性要求低
|
||||
- 每一个查询除了一个大表外都很小
|
||||
- 查询结果明显小于源数据,换句话说,数据被过滤或聚合后能够被盛放在单台服务器的内存中
|
||||
- 每个查询有一个大表。除了他意以外,其他的都很小。
|
||||
- 查询结果明显小于源数据。换句话说,数据经过过滤或聚合,因此结果适合于单个服务器的RAM中
|
||||
|
||||
很容易可以看出,OLAP场景与其他通常业务场景(例如,OLTP或K/V)有很大的不同, 因此想要使用OLTP或Key-Value数据库去高效的处理分析查询场景,并不是非常完美的适用方案。例如,使用OLAP数据库去处理分析请求通常要优于使用MongoDB或Redis去处理分析请求。
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
toc_priority: 8
|
||||
toc_title: "\u91C7\u7528\u8005"
|
||||
toc_priority: 5
|
||||
toc_title: "ClickHouse用户"
|
||||
---
|
||||
|
||||
# ClickHouse用户 {#clickhouse-adopters}
|
||||
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 2
|
||||
toc_title: ClickHouse的特性
|
||||
---
|
||||
|
||||
# ClickHouse的特性 {#clickhouse-de-te-xing}
|
||||
|
||||
## 真正的列式数据库管理系统 {#zhen-zheng-de-lie-shi-shu-ju-ku-guan-li-xi-tong}
|
||||
@ -12,9 +17,13 @@
|
||||
|
||||
在一些列式数据库管理系统中(例如:InfiniDB CE 和 MonetDB) 并没有使用数据压缩。但是, 若想达到比较优异的性能,数据压缩确实起到了至关重要的作用。
|
||||
|
||||
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外,ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create/table.md#create-query-specialized-codecs),这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
|
||||
|
||||
## 数据的磁盘存储 {#shu-ju-de-ci-pan-cun-chu}
|
||||
|
||||
许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作,这种方式会造成比实际更多的设备预算。ClickHouse被设计用于工作在传统磁盘上的系统,它提供每GB更低的存储成本,但如果有可以使用SSD和内存,它也会合理的利用这些资源。
|
||||
许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作,这种方式会造成比实际更多的设备预算。
|
||||
|
||||
ClickHouse被设计用于工作在传统磁盘上的系统,它提供每GB更低的存储成本,但如果可以使用SSD和内存,它也会合理的利用这些资源。
|
||||
|
||||
## 多核心并行处理 {#duo-he-xin-bing-xing-chu-li}
|
||||
|
||||
@ -27,9 +36,11 @@ ClickHouse会使用服务器上一切可用的资源,从而以最自然的方
|
||||
|
||||
## 支持SQL {#zhi-chi-sql}
|
||||
|
||||
ClickHouse支持基于SQL的声明式查询语言,该语言大部分情况下是与SQL标准兼容的。
|
||||
支持的查询包括 GROUP BY,ORDER BY,IN,JOIN以及非相关子查询。
|
||||
不支持窗口函数和相关子查询。
|
||||
ClickHouse支持一种[基于SQL的声明式查询语言](../sql-reference/index.md),它在许多情况下与[ANSI SQL标准](../sql-reference/ansi.md)相同。
|
||||
|
||||
支持的查询[GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md), [IN](../sql-reference/operators/in.md)以及非相关子查询。
|
||||
|
||||
相关(依赖性)子查询和窗口函数暂不受支持,但将来会被实现。
|
||||
|
||||
## 向量引擎 {#xiang-liang-yin-qing}
|
||||
|
||||
@ -55,12 +66,20 @@ ClickHouse提供各种各样在允许牺牲数据精度的情况下对查询进
|
||||
2. 基于数据的部分样本进行近似查询。这时,仅会从磁盘检索少部分比例的数据。
|
||||
3. 不使用全部的聚合条件,通过随机选择有限个数据聚合条件进行聚合。这在数据聚合条件满足某些分布条件下,在提供相当准确的聚合结果的同时降低了计算资源的使用。
|
||||
|
||||
## Adaptive Join Algorithm {#adaptive-join-algorithm}
|
||||
|
||||
ClickHouse支持自定义[JOIN](../sql-reference/statements/select/join.md)多个表,它更倾向于散列连接算法,如果有多个大表,则使用合并-连接算法
|
||||
|
||||
## 支持数据复制和数据完整性 {#zhi-chi-shu-ju-fu-zhi-he-shu-ju-wan-zheng-xing}
|
||||
|
||||
ClickHouse使用异步的多主复制技术。当数据被写入任何一个可用副本后,系统会在后台将数据分发给其他副本,以保证系统在不同副本上保持相同的数据。在大多数情况下ClickHouse能在故障后自动恢复,在一些少数的复杂情况下需要手动恢复。
|
||||
|
||||
更多信息,参见 [数据复制](../engines/table-engines/mergetree-family/replication.md)。
|
||||
|
||||
## 角色的访问控制 {#role-based-access-control}
|
||||
|
||||
ClickHouse使用SQL查询实现用户帐户管理,并允许[角色的访问控制](../operations/access-rights.md),类似于ANSI SQL标准和流行的关系数据库管理系统。
|
||||
|
||||
# 限制 {#clickhouseke-xian-zhi}
|
||||
|
||||
1. 没有完整的事务支持。
|
||||
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 4
|
||||
toc_title: ClickHouse历史
|
||||
---
|
||||
|
||||
# ClickHouse历史 {#clickhouseli-shi}
|
||||
|
||||
ClickHouse最初是为 [YandexMetrica](https://metrica.yandex.com/) [世界第二大Web分析平台](http://w3techs.com/technologies/overview/traffic_analysis/all) 而开发的。多年来一直作为该系统的核心组件被该系统持续使用着。目前为止,该系统在ClickHouse中有超过13万亿条记录,并且每天超过200多亿个事件被处理。它允许直接从原始数据中动态查询并生成报告。本文简要介绍了ClickHouse在其早期发展阶段的目标。
|
||||
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 3
|
||||
toc_title: ClickHouse性能
|
||||
---
|
||||
|
||||
# 性能 {#performance}
|
||||
|
||||
根据Yandex的内部测试结果,ClickHouse表现出了比同类可比较产品更优的性能。你可以在 [这里](https://clickhouse.tech/benchmark/dbms/) 查看具体的测试结果。
|
||||
|
@ -3,18 +3,18 @@ toc_priority: 60
|
||||
toc_title: clickhouse-local
|
||||
---
|
||||
|
||||
# ツ环板-ョツ嘉ッツ偲 {#clickhouse-local}
|
||||
# ClickHouse Local {#clickhouse-local}
|
||||
|
||||
该 `clickhouse-local` 程序使您能够对本地文件执行快速处理,而无需部署和配置ClickHouse服务器。
|
||||
`clickhouse-local`模式可以使您能够对本地文件执行快速处理,而无需部署和配置ClickHouse服务器。
|
||||
|
||||
接受表示表的数据并使用以下方式查询它们 [ツ环板ECTョツ嘉ッツ偲](../../operations/utilities/clickhouse-local.md).
|
||||
[ClickHouse SQL语法](../../operations/utilities/clickhouse-local.md)支持对表格数据的查询.
|
||||
|
||||
`clickhouse-local` 使用与ClickHouse server相同的核心,因此它支持大多数功能以及相同的格式和表引擎。
|
||||
`clickhouse-local`使用与ClickHouse Server相同的核心,因此它支持大多数功能以及相同的格式和表引擎。
|
||||
|
||||
默认情况下 `clickhouse-local` 不能访问同一主机上的数据,但它支持使用以下方式加载服务器配置 `--config-file` 争论。
|
||||
默认情况下`clickhouse-local`不能访问同一主机上的数据,但它支持使用`--config-file`方式加载服务器配置。
|
||||
|
||||
!!! warning "警告"
|
||||
不建议将生产服务器配置加载到 `clickhouse-local` 因为数据可以在人为错误的情况下被损坏。
|
||||
不建议将生产服务器配置加载到`clickhouse-local`因为数据可以在人为错误的情况下被损坏。
|
||||
|
||||
## 用途 {#usage}
|
||||
|
||||
@ -26,21 +26,21 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin
|
||||
|
||||
参数:
|
||||
|
||||
- `-S`, `--structure` — table structure for input data.
|
||||
- `-if`, `--input-format` — input format, `TSV` 默认情况下。
|
||||
- `-f`, `--file` — path to data, `stdin` 默认情况下。
|
||||
- `-q` `--query` — queries to execute with `;` 如delimeter。
|
||||
- `-N`, `--table` — table name where to put output data, `table` 默认情况下。
|
||||
- `-of`, `--format`, `--output-format` — output format, `TSV` 默认情况下。
|
||||
- `--stacktrace` — whether to dump debug output in case of exception.
|
||||
- `--verbose` — more details on query execution.
|
||||
- `-s` — disables `stderr` 记录。
|
||||
- `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty.
|
||||
- `--help` — arguments references for `clickhouse-local`.
|
||||
- `-S`, `--structure` — 输入数据的表结构。
|
||||
- `-if`, `--input-format` — 输入格式化类型, 默认是`TSV`。
|
||||
- `-f`, `--file` — 数据路径, 默认是`stdin`。
|
||||
- `-q` `--query` — 要查询的SQL语句使用`;`做分隔符。
|
||||
- `-N`, `--table` — 数据输出的表名,默认是`table`。
|
||||
- `-of`, `--format`, `--output-format` — 输出格式化类型, 默认是`TSV`。
|
||||
- `--stacktrace` — 是否在出现异常时输出栈信息。
|
||||
- `--verbose` — debug显示查询的详细信息。
|
||||
- `-s` — 禁用`stderr`输出信息。
|
||||
- `--config-file` — 与ClickHouse服务器格式相同配置文件的路径,默认情况下配置为空。
|
||||
- `--help` — `clickhouse-local`使用帮助信息。
|
||||
|
||||
还有每个ClickHouse配置变量的参数,这些变量更常用,而不是 `--config-file`.
|
||||
对于每个ClickHouse配置的参数,也可以单独使用,可以不使用`--config-file`指定。
|
||||
|
||||
## 例 {#examples}
|
||||
## 示例 {#examples}
|
||||
|
||||
``` bash
|
||||
echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table"
|
||||
@ -49,7 +49,7 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
|
||||
3 4
|
||||
```
|
||||
|
||||
前面的例子是一样的:
|
||||
另一个示例,类似上一个使用示例:
|
||||
|
||||
``` bash
|
||||
$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table"
|
||||
@ -58,7 +58,22 @@ Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec.
|
||||
3 4
|
||||
```
|
||||
|
||||
现在让我们为每个Unix用户输出内存用户:
|
||||
你可以使用`stdin`或`--file`参数, 打开任意数量的文件来使用多个文件[`file` table function](../../sql-reference/table-functions/file.md):
|
||||
|
||||
```bash
|
||||
$ echo 1 | tee 1.tsv
|
||||
1
|
||||
|
||||
$ echo 2 | tee 2.tsv
|
||||
2
|
||||
|
||||
$ clickhouse-local --query "
|
||||
select * from file('1.tsv', TSV, 'a int') t1
|
||||
cross join file('2.tsv', TSV, 'b int') t2"
|
||||
1 2
|
||||
```
|
||||
|
||||
现在让我们查询每个Unix用户使用内存:
|
||||
|
||||
``` bash
|
||||
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
|
||||
|
@ -43,13 +43,81 @@ else ()
|
||||
${ENABLE_CLICKHOUSE_ALL})
|
||||
endif ()
|
||||
|
||||
message(STATUS "ClickHouse modes:")
|
||||
|
||||
if (NOT ENABLE_CLICKHOUSE_SERVER)
|
||||
message(WARNING "ClickHouse server mode is not going to be built.")
|
||||
else()
|
||||
message(STATUS "Server mode: ON")
|
||||
endif()
|
||||
|
||||
if (NOT ENABLE_CLICKHOUSE_CLIENT)
|
||||
message(WARNING "ClickHouse client mode is not going to be built. You won't be able to connect to the server and run
|
||||
tests")
|
||||
else()
|
||||
message(STATUS "Client mode: ON")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_LOCAL)
|
||||
message(STATUS "Local mode: ON")
|
||||
else()
|
||||
message(STATUS "Local mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_BENCHMARK)
|
||||
message(STATUS "Benchmark mode: ON")
|
||||
else()
|
||||
message(STATUS "Benchmark mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
|
||||
message(STATUS "Extract from config mode: ON")
|
||||
else()
|
||||
message(STATUS "Extract from config mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_COMPRESSOR)
|
||||
message(STATUS "Compressor mode: ON")
|
||||
else()
|
||||
message(STATUS "Compressor mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_COPIER)
|
||||
message(STATUS "Copier mode: ON")
|
||||
else()
|
||||
message(STATUS "Copier mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_FORMAT)
|
||||
message(STATUS "Format mode: ON")
|
||||
else()
|
||||
message(STATUS "Format mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_OBFUSCATOR)
|
||||
message(STATUS "Obfuscator mode: ON")
|
||||
else()
|
||||
message(STATUS "Obfuscator mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
|
||||
message(STATUS "ODBC bridge mode: ON")
|
||||
else()
|
||||
message(STATUS "ODBC bridge mode: OFF")
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLICKHOUSE_INSTALL)
|
||||
message(STATUS "ClickHouse install: ON")
|
||||
else()
|
||||
message(STATUS "ClickHouse install: OFF")
|
||||
endif()
|
||||
|
||||
if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES))
|
||||
set(CLICKHOUSE_ONE_SHARED ON)
|
||||
endif()
|
||||
|
||||
configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h)
|
||||
|
||||
|
||||
macro(clickhouse_target_link_split_lib target name)
|
||||
if(NOT CLICKHOUSE_ONE_SHARED)
|
||||
target_link_libraries(${target} PRIVATE clickhouse-${name}-lib)
|
||||
|
@ -2515,7 +2515,7 @@ public:
|
||||
{
|
||||
std::string traceparent = options["opentelemetry-traceparent"].as<std::string>();
|
||||
std::string error;
|
||||
if (!context.getClientInfo().parseTraceparentHeader(
|
||||
if (!context.getClientInfo().client_trace_context.parseTraceparentHeader(
|
||||
traceparent, error))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
@ -2526,7 +2526,7 @@ public:
|
||||
|
||||
if (options.count("opentelemetry-tracestate"))
|
||||
{
|
||||
context.getClientInfo().opentelemetry_tracestate =
|
||||
context.getClientInfo().client_trace_context.tracestate =
|
||||
options["opentelemetry-tracestate"].as<std::string>();
|
||||
}
|
||||
|
||||
|
@ -62,6 +62,9 @@ decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries)
|
||||
{
|
||||
std::exception_ptr exception;
|
||||
|
||||
if (max_tries == 0)
|
||||
throw Exception("Cannot perform zero retries", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
for (UInt64 try_number = 1; try_number <= max_tries; ++try_number)
|
||||
{
|
||||
try
|
||||
@ -605,7 +608,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
|
||||
settings_push.replication_alter_partitions_sync = 2;
|
||||
|
||||
query_alter_ast_string += " ALTER TABLE " + getQuotedTable(original_table) +
|
||||
" ATTACH PARTITION " + partition_name +
|
||||
((partition_name == "'all'") ? " ATTACH PARTITION ID " : " ATTACH PARTITION ") + partition_name +
|
||||
" FROM " + getQuotedTable(helping_table);
|
||||
|
||||
LOG_DEBUG(log, "Executing ALTER query: {}", query_alter_ast_string);
|
||||
@ -636,7 +639,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
|
||||
if (!task_table.isReplicatedTable())
|
||||
{
|
||||
query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) +
|
||||
" PARTITION " + partition_name + " DEDUPLICATE;";
|
||||
((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;";
|
||||
|
||||
LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string);
|
||||
|
||||
@ -807,7 +810,7 @@ bool ClusterCopier::tryDropPartitionPiece(
|
||||
DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number));
|
||||
|
||||
String query = "ALTER TABLE " + getQuotedTable(helping_table);
|
||||
query += " DROP PARTITION " + task_partition.name + "";
|
||||
query += ((task_partition.name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + task_partition.name + "";
|
||||
|
||||
/// TODO: use this statement after servers will be updated up to 1.1.54310
|
||||
// query += " DROP PARTITION ID '" + task_partition.name + "'";
|
||||
@ -1567,7 +1570,7 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT
|
||||
DatabaseAndTableName original_table = task_table.table_push;
|
||||
DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number));
|
||||
|
||||
String query = "ALTER TABLE " + getQuotedTable(helping_table) + " DROP PARTITION " + partition_name;
|
||||
String query = "ALTER TABLE " + getQuotedTable(helping_table) + ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + partition_name;
|
||||
|
||||
const ClusterPtr & cluster_push = task_table.cluster_push;
|
||||
Settings settings_push = task_cluster->settings_push;
|
||||
@ -1670,14 +1673,24 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout
|
||||
|
||||
std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard)
|
||||
{
|
||||
std::set<String> res;
|
||||
|
||||
createShardInternalTables(timeouts, task_shard, false);
|
||||
|
||||
TaskTable & task_table = task_shard.task_table;
|
||||
|
||||
const String & partition_name = queryToString(task_table.engine_push_partition_key_ast);
|
||||
|
||||
if (partition_name == "'all'")
|
||||
{
|
||||
res.emplace("'all'");
|
||||
return res;
|
||||
}
|
||||
|
||||
String query;
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
wb << "SELECT DISTINCT " << queryToString(task_table.engine_push_partition_key_ast) << " AS partition FROM"
|
||||
wb << "SELECT DISTINCT " << partition_name << " AS partition FROM"
|
||||
<< " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC";
|
||||
query = wb.str();
|
||||
}
|
||||
@ -1692,7 +1705,6 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
|
||||
local_context.setSettings(task_cluster->settings_pull);
|
||||
Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_ast, local_context)->execute().getInputStream());
|
||||
|
||||
std::set<String> res;
|
||||
if (block)
|
||||
{
|
||||
ColumnWithTypeAndName & column = block.getByPosition(0);
|
||||
@ -1803,7 +1815,7 @@ UInt64 ClusterCopier::executeQueryOnCluster(
|
||||
if (execution_mode == ClusterExecutionMode::ON_EACH_NODE)
|
||||
max_successful_executions_per_shard = 0;
|
||||
|
||||
std::atomic<size_t> origin_replicas_number;
|
||||
std::atomic<size_t> origin_replicas_number = 0;
|
||||
|
||||
/// We need to execute query on one replica at least
|
||||
auto do_for_shard = [&] (UInt64 shard_index, Settings shard_settings)
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/MMapReadBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -70,7 +71,7 @@ namespace po = boost::program_options;
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
auto executeScript(const std::string & command, bool throw_on_error = false)
|
||||
static auto executeScript(const std::string & command, bool throw_on_error = false)
|
||||
{
|
||||
auto sh = ShellCommand::execute(command);
|
||||
WriteBufferFromFileDescriptor wb_stdout(STDOUT_FILENO);
|
||||
@ -87,7 +88,7 @@ auto executeScript(const std::string & command, bool throw_on_error = false)
|
||||
return sh->tryWait();
|
||||
}
|
||||
|
||||
bool ask(std::string question)
|
||||
static bool ask(std::string question)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
@ -104,6 +105,16 @@ bool ask(std::string question)
|
||||
}
|
||||
}
|
||||
|
||||
static bool filesEqual(std::string path1, std::string path2)
|
||||
{
|
||||
MMapReadBufferFromFile in1(path1, 0);
|
||||
MMapReadBufferFromFile in2(path2, 0);
|
||||
|
||||
/// memcmp is faster than hashing and comparing hashes
|
||||
return in1.buffer().size() == in2.buffer().size()
|
||||
&& 0 == memcmp(in1.buffer().begin(), in2.buffer().begin(), in1.buffer().size());
|
||||
}
|
||||
|
||||
|
||||
int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
{
|
||||
@ -143,57 +154,89 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary from {}, file doesn't exist",
|
||||
binary_self_path.string());
|
||||
|
||||
fs::path binary_self_canonical_path = fs::canonical(binary_self_path);
|
||||
|
||||
/// Copy binary to the destination directory.
|
||||
|
||||
/// TODO An option to link instead of copy - useful for developers.
|
||||
/// TODO Check if the binary is the same.
|
||||
|
||||
size_t binary_size = fs::file_size(binary_self_path);
|
||||
|
||||
fs::path prefix = fs::path(options["prefix"].as<std::string>());
|
||||
fs::path bin_dir = prefix / fs::path(options["binary-path"].as<std::string>());
|
||||
|
||||
size_t available_space = fs::space(bin_dir).available;
|
||||
if (available_space < binary_size)
|
||||
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
|
||||
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
|
||||
|
||||
fs::path main_bin_path = bin_dir / "clickhouse";
|
||||
fs::path main_bin_tmp_path = bin_dir / "clickhouse.new";
|
||||
fs::path main_bin_old_path = bin_dir / "clickhouse.old";
|
||||
|
||||
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
|
||||
size_t binary_size = fs::file_size(binary_self_path);
|
||||
|
||||
try
|
||||
bool old_binary_exists = fs::exists(main_bin_path);
|
||||
bool already_installed = false;
|
||||
|
||||
/// Check if the binary is the same file (already installed).
|
||||
if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
|
||||
{
|
||||
ReadBufferFromFile in(binary_self_path.string());
|
||||
WriteBufferFromFile out(main_bin_tmp_path.string());
|
||||
copyData(in, out);
|
||||
out.sync();
|
||||
|
||||
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
|
||||
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
out.finalize();
|
||||
already_installed = true;
|
||||
fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
|
||||
}
|
||||
catch (const Exception & e)
|
||||
/// Check if binary has the same content.
|
||||
else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
|
||||
{
|
||||
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
|
||||
std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
|
||||
throw;
|
||||
fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
|
||||
main_bin_path.string());
|
||||
|
||||
if (filesEqual(binary_self_path.string(), main_bin_path.string()))
|
||||
{
|
||||
already_installed = true;
|
||||
fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
|
||||
main_bin_path.string(), binary_self_canonical_path.string());
|
||||
}
|
||||
}
|
||||
|
||||
if (fs::exists(main_bin_path))
|
||||
if (already_installed)
|
||||
{
|
||||
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
|
||||
main_bin_path.string(), main_bin_old_path.string());
|
||||
|
||||
/// There is file exchange operation in Linux but it's not portable.
|
||||
fs::rename(main_bin_path, main_bin_old_path);
|
||||
if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
|
||||
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t available_space = fs::space(bin_dir).available;
|
||||
if (available_space < binary_size)
|
||||
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
|
||||
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
|
||||
|
||||
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
|
||||
fs::rename(main_bin_tmp_path, main_bin_path);
|
||||
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
|
||||
|
||||
try
|
||||
{
|
||||
ReadBufferFromFile in(binary_self_path.string());
|
||||
WriteBufferFromFile out(main_bin_tmp_path.string());
|
||||
copyData(in, out);
|
||||
out.sync();
|
||||
|
||||
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
|
||||
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
out.finalize();
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
|
||||
std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
|
||||
throw;
|
||||
}
|
||||
|
||||
if (old_binary_exists)
|
||||
{
|
||||
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
|
||||
main_bin_path.string(), main_bin_old_path.string());
|
||||
|
||||
/// There is file exchange operation in Linux but it's not portable.
|
||||
fs::rename(main_bin_path, main_bin_old_path);
|
||||
}
|
||||
|
||||
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
|
||||
fs::rename(main_bin_tmp_path, main_bin_path);
|
||||
}
|
||||
|
||||
/// Create symlinks.
|
||||
|
||||
@ -401,8 +444,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
|
||||
|
||||
if (!configuration->getString("users.default.password", "").empty()
|
||||
|| configuration->getString("users.default.password_sha256_hex", "").empty()
|
||||
|| configuration->getString("users.default.password_double_sha1_hex", "").empty())
|
||||
|| !configuration->getString("users.default.password_sha256_hex", "").empty()
|
||||
|| !configuration->getString("users.default.password_double_sha1_hex", "").empty())
|
||||
{
|
||||
has_password_for_default_user = true;
|
||||
}
|
||||
@ -576,7 +619,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
|
||||
" This is optional. Taskstats accounting will be disabled."
|
||||
" To enable taskstats accounting you may add the required capability later manually.\"",
|
||||
"/tmp/test_setcap.sh", main_bin_path.string());
|
||||
"/tmp/test_setcap.sh", fs::canonical(main_bin_path).string());
|
||||
fmt::print(" {}\n", command);
|
||||
executeScript(command);
|
||||
#endif
|
||||
@ -597,10 +640,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
}
|
||||
}
|
||||
|
||||
std::string maybe_sudo;
|
||||
if (getuid() != 0)
|
||||
maybe_sudo = "sudo ";
|
||||
|
||||
std::string maybe_password;
|
||||
if (has_password_for_default_user)
|
||||
maybe_password = " --password";
|
||||
@ -608,10 +647,19 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
fmt::print(
|
||||
"\nClickHouse has been successfully installed.\n"
|
||||
"\nStart clickhouse-server with:\n"
|
||||
" {}clickhouse start\n"
|
||||
" sudo clickhouse start\n"
|
||||
"\nStart clickhouse-client with:\n"
|
||||
" clickhouse-client{}\n\n",
|
||||
maybe_sudo, maybe_password);
|
||||
maybe_password);
|
||||
}
|
||||
catch (const fs::filesystem_error &)
|
||||
{
|
||||
std::cerr << getCurrentExceptionMessage(false) << '\n';
|
||||
|
||||
if (getuid() != 0)
|
||||
std::cerr << "\nRun with sudo.\n";
|
||||
|
||||
return getCurrentExceptionCode();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -783,17 +831,20 @@ namespace
|
||||
return pid;
|
||||
}
|
||||
|
||||
int stop(const fs::path & pid_file)
|
||||
int stop(const fs::path & pid_file, bool force)
|
||||
{
|
||||
UInt64 pid = isRunning(pid_file);
|
||||
|
||||
if (!pid)
|
||||
return 0;
|
||||
|
||||
if (0 == kill(pid, 15)) /// Terminate
|
||||
fmt::print("Sent termination signal.\n", pid);
|
||||
int signal = force ? SIGKILL : SIGTERM;
|
||||
const char * signal_name = force ? "kill" : "terminate";
|
||||
|
||||
if (0 == kill(pid, signal))
|
||||
fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid);
|
||||
else
|
||||
throwFromErrno("Cannot send termination signal", ErrorCodes::SYSTEM_ERROR);
|
||||
throwFromErrno(fmt::format("Cannot send {} signal", signal_name), ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
size_t try_num = 0;
|
||||
constexpr size_t num_tries = 60;
|
||||
@ -869,6 +920,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
|
||||
desc.add_options()
|
||||
("help,h", "produce help message")
|
||||
("pid-path", po::value<std::string>()->default_value("/var/run/clickhouse-server"), "directory for pid file")
|
||||
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
|
||||
;
|
||||
|
||||
po::variables_map options;
|
||||
@ -887,7 +939,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
|
||||
{
|
||||
fs::path pid_file = fs::path(options["pid-path"].as<std::string>()) / "clickhouse-server.pid";
|
||||
|
||||
return stop(pid_file);
|
||||
return stop(pid_file, options["force"].as<bool>());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -940,6 +992,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
|
||||
("config-path", po::value<std::string>()->default_value("/etc/clickhouse-server"), "directory with configs")
|
||||
("pid-path", po::value<std::string>()->default_value("/var/run/clickhouse-server"), "directory for pid file")
|
||||
("user", po::value<std::string>()->default_value("clickhouse"), "clickhouse user")
|
||||
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
|
||||
;
|
||||
|
||||
po::variables_map options;
|
||||
@ -962,7 +1015,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
|
||||
fs::path config = fs::path(options["config-path"].as<std::string>()) / "config.xml";
|
||||
fs::path pid_file = fs::path(options["pid-path"].as<std::string>()) / "clickhouse-server.pid";
|
||||
|
||||
if (int res = stop(pid_file))
|
||||
if (int res = stop(pid_file, options["force"].as<bool>()))
|
||||
return res;
|
||||
return start(user, executable, config, pid_file);
|
||||
}
|
||||
|
@ -109,6 +109,14 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options)
|
||||
.argument("err-log-path")
|
||||
.binding("logger.errorlog"));
|
||||
|
||||
options.addOption(Poco::Util::Option("stdout-path", "", "stdout log path, default console")
|
||||
.argument("stdout-path")
|
||||
.binding("logger.stdout"));
|
||||
|
||||
options.addOption(Poco::Util::Option("stderr-path", "", "stderr log path, default console")
|
||||
.argument("stderr-path")
|
||||
.binding("logger.stderr"));
|
||||
|
||||
using Me = std::decay_t<decltype(*this)>;
|
||||
options.addOption(Poco::Util::Option("help", "", "produce this help message")
|
||||
.binding("help")
|
||||
@ -127,6 +135,27 @@ void ODBCBridge::initialize(Application & self)
|
||||
|
||||
config().setString("logger", "ODBCBridge");
|
||||
|
||||
/// Redirect stdout, stderr to specified files.
|
||||
/// Some libraries and sanitizers write to stderr in case of errors.
|
||||
const auto stdout_path = config().getString("logger.stdout", "");
|
||||
if (!stdout_path.empty())
|
||||
{
|
||||
if (!freopen(stdout_path.c_str(), "a+", stdout))
|
||||
throw Poco::OpenFileException("Cannot attach stdout to " + stdout_path);
|
||||
|
||||
/// Disable buffering for stdout.
|
||||
setbuf(stdout, nullptr);
|
||||
}
|
||||
const auto stderr_path = config().getString("logger.stderr", "");
|
||||
if (!stderr_path.empty())
|
||||
{
|
||||
if (!freopen(stderr_path.c_str(), "a+", stderr))
|
||||
throw Poco::OpenFileException("Cannot attach stderr to " + stderr_path);
|
||||
|
||||
/// Disable buffering for stderr.
|
||||
setbuf(stderr, nullptr);
|
||||
}
|
||||
|
||||
buildLoggers(config(), logger(), self.commandName());
|
||||
|
||||
BaseDaemon::logRevision();
|
||||
|
@ -64,6 +64,7 @@
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include <Server/MySQLHandlerFactory.h>
|
||||
#include <Server/PostgreSQLHandlerFactory.h>
|
||||
#include <Server/ProtocolServerAdapter.h>
|
||||
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
@ -84,6 +85,11 @@
|
||||
# include <Poco/Net/SecureServerSocket.h>
|
||||
#endif
|
||||
|
||||
#if USE_GRPC
|
||||
# include <Server/GRPCServer.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric Revision;
|
||||
@ -806,7 +812,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
http_params->setTimeout(settings.http_receive_timeout);
|
||||
http_params->setKeepAliveTimeout(keep_alive_timeout);
|
||||
|
||||
std::vector<std::unique_ptr<Poco::Net::TCPServer>> servers;
|
||||
std::vector<ProtocolServerAdapter> servers;
|
||||
|
||||
std::vector<std::string> listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host");
|
||||
|
||||
@ -1035,6 +1041,15 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
LOG_INFO(log, "Listening for PostgreSQL compatibility protocol: " + address.toString());
|
||||
});
|
||||
|
||||
#if USE_GRPC
|
||||
create_server("grpc_port", [&](UInt16 port)
|
||||
{
|
||||
Poco::Net::SocketAddress server_address(listen_host, port);
|
||||
servers.emplace_back(std::make_unique<GRPCServer>(*this, make_socket_address(listen_host, port)));
|
||||
LOG_INFO(log, "Listening for gRPC protocol: " + server_address.toString());
|
||||
});
|
||||
#endif
|
||||
|
||||
/// Prometheus (if defined and not setup yet with http_port)
|
||||
create_server("prometheus.port", [&](UInt16 port)
|
||||
{
|
||||
@ -1056,7 +1071,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
global_context->enableNamedSessions();
|
||||
|
||||
for (auto & server : servers)
|
||||
server->start();
|
||||
server.start();
|
||||
|
||||
{
|
||||
String level_str = config().getString("text_log.level", "");
|
||||
@ -1088,8 +1103,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
int current_connections = 0;
|
||||
for (auto & server : servers)
|
||||
{
|
||||
server->stop();
|
||||
current_connections += server->currentConnections();
|
||||
server.stop();
|
||||
current_connections += server.currentConnections();
|
||||
}
|
||||
|
||||
if (current_connections)
|
||||
@ -1109,7 +1124,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
{
|
||||
current_connections = 0;
|
||||
for (auto & server : servers)
|
||||
current_connections += server->currentConnections();
|
||||
current_connections += server.currentConnections();
|
||||
if (!current_connections)
|
||||
break;
|
||||
sleep_current_ms += sleep_one_ms;
|
||||
|
@ -134,6 +134,34 @@
|
||||
<max_connections>4096</max_connections>
|
||||
<keep_alive_timeout>3</keep_alive_timeout>
|
||||
|
||||
<!-- gRPC protocol (see src/Server/grpc_protos/clickhouse_grpc.proto for the API)
|
||||
<grpc_port>9100</grpc_port>
|
||||
<grpc>
|
||||
<enable_ssl>true</enable_ssl> -->
|
||||
|
||||
<!-- The following two files are used only if enable_ssl=1
|
||||
<ssl_cert_file>/path/to/ssl_cert_file</ssl_cert_file>
|
||||
<ssl_key_file>/path/to/ssl_key_file</ssl_key_file> -->
|
||||
|
||||
<!-- Whether server will request client for a certificate
|
||||
<ssl_require_client_auth>true</ssl_require_client_auth> -->
|
||||
|
||||
<!-- The following file is used only if ssl_require_client_auth=1
|
||||
<ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file> -->
|
||||
|
||||
<!-- Default compression algorithm (applied if client doesn't specify another algorithm).
|
||||
Supported algorithms: none, deflate, gzip, stream_gzip
|
||||
<compression>gzip</compression> -->
|
||||
|
||||
<!-- Default compression level (applied if client doesn't specify another level).
|
||||
Supported levels: none, low, medium, high
|
||||
<compression_level>high</compression_level> -->
|
||||
|
||||
<!-- Send/receive message size limits in bytes. -1 means unlimited
|
||||
<max_send_message_size>-1</max_send_message_size>
|
||||
<max_receive_message_size>4194304</max_receive_message_size>
|
||||
</grpc> -->
|
||||
|
||||
<!-- Maximum number of concurrent queries. -->
|
||||
<max_concurrent_queries>100</max_concurrent_queries>
|
||||
|
||||
@ -678,7 +706,7 @@
|
||||
|
||||
|
||||
<!-- Configuration of external dictionaries. See:
|
||||
https://clickhouse.yandex/docs/en/dicts/external_dicts/
|
||||
https://clickhouse.tech/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts
|
||||
-->
|
||||
<dictionaries_config>*_dictionary.xml</dictionaries_config>
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <memory>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionAvg.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
@ -13,43 +14,37 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct Avg
|
||||
bool allowType(const DataTypePtr& type) noexcept
|
||||
{
|
||||
using FieldType = std::conditional_t<IsDecimalNumber<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
NearestFieldType<T>>;
|
||||
// using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
|
||||
using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType, UInt64>>;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using AggregateFuncAvg = typename Avg<T>::Function;
|
||||
const WhichDataType t(type);
|
||||
return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAvg(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
DataTypePtr data_type = argument_types[0];
|
||||
if (isDecimal(data_type))
|
||||
res.reset(createWithDecimalType<AggregateFuncAvg>(*data_type, *data_type, argument_types));
|
||||
else
|
||||
res.reset(createWithNumericType<AggregateFuncAvg>(*data_type, argument_types));
|
||||
const DataTypePtr& data_type = argument_types[0];
|
||||
|
||||
if (!allowType(data_type))
|
||||
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
|
||||
if (isDecimal(data_type))
|
||||
res.reset(createWithDecimalType<AggregateFunctionAvg>(
|
||||
*data_type, argument_types, getDecimalScale(*data_type)));
|
||||
else
|
||||
res.reset(createWithNumericType<AggregateFunctionAvg>(*data_type, argument_types));
|
||||
|
||||
if (!res)
|
||||
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionAvg(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,78 +1,102 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include "Core/DecimalFunctions.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
}
|
||||
template <class T>
|
||||
using DecimalOrVectorCol = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
|
||||
template <typename T, typename Denominator>
|
||||
struct AggregateFunctionAvgData
|
||||
{
|
||||
using NumeratorType = T;
|
||||
using DenominatorType = Denominator;
|
||||
template <class T> constexpr bool DecimalOrExtendedInt =
|
||||
IsDecimalNumber<T>
|
||||
|| std::is_same_v<T, Int128>
|
||||
|| std::is_same_v<T, Int256>
|
||||
|| std::is_same_v<T, UInt128>
|
||||
|| std::is_same_v<T, UInt256>;
|
||||
|
||||
T numerator{0};
|
||||
/**
|
||||
* Helper class to encapsulate values conversion for avg and avgWeighted.
|
||||
*/
|
||||
template <class Numerator, class Denominator>
|
||||
struct AvgFraction
|
||||
{
|
||||
Numerator numerator{0};
|
||||
Denominator denominator{0};
|
||||
|
||||
template <typename ResultT>
|
||||
ResultT NO_SANITIZE_UNDEFINED result() const
|
||||
/// Allow division by zero as sometimes we need to return NaN.
|
||||
/// Invoked only is either Numerator or Denominator are Decimal.
|
||||
Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale) const
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<ResultT>)
|
||||
if constexpr (std::numeric_limits<ResultT>::is_iec559)
|
||||
{
|
||||
if constexpr (is_big_int_v<Denominator>)
|
||||
return static_cast<ResultT>(numerator) / static_cast<ResultT>(denominator);
|
||||
else
|
||||
return static_cast<ResultT>(numerator) / denominator; /// allow division by zero
|
||||
}
|
||||
if constexpr (IsDecimalNumber<Numerator> && IsDecimalNumber<Denominator>)
|
||||
{
|
||||
// According to the docs, num(S1) / denom(S2) would have scale S1
|
||||
|
||||
if (denominator == static_cast<Denominator>(0))
|
||||
return static_cast<ResultT>(0);
|
||||
if constexpr (std::is_same_v<Numerator, Decimal256> && std::is_same_v<Denominator, Decimal128>)
|
||||
///Special case as Decimal256 / Decimal128 = compile error (as Decimal128 is not parametrized by a wide
|
||||
///int), but an __int128 instead
|
||||
return DecimalUtils::convertTo<Float64>(
|
||||
numerator / (denominator.template convertTo<Decimal256>()), num_scale);
|
||||
else
|
||||
return DecimalUtils::convertTo<Float64>(numerator / denominator, num_scale);
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, Decimal256>)
|
||||
return static_cast<ResultT>(numerator / static_cast<T>(denominator));
|
||||
/// Numerator is always casted to Float64 to divide correctly if the denominator is not Float64.
|
||||
Float64 num_converted;
|
||||
|
||||
if constexpr (IsDecimalNumber<Numerator>)
|
||||
num_converted = DecimalUtils::convertTo<Float64>(numerator, num_scale);
|
||||
else
|
||||
return static_cast<ResultT>(numerator / denominator);
|
||||
num_converted = static_cast<Float64>(numerator); /// all other types, including extended integral.
|
||||
|
||||
std::conditional_t<DecimalOrExtendedInt<Denominator>,
|
||||
Float64, Denominator> denom_converted;
|
||||
|
||||
if constexpr (IsDecimalNumber<Denominator>)
|
||||
denom_converted = DecimalUtils::convertTo<Float64>(denominator, denom_scale);
|
||||
else if constexpr (DecimalOrExtendedInt<Denominator>)
|
||||
/// no way to divide Float64 and extended integral type without an explicit cast.
|
||||
denom_converted = static_cast<Float64>(denominator);
|
||||
else
|
||||
denom_converted = denominator; /// can divide on float, no cast required.
|
||||
|
||||
return num_converted / denom_converted;
|
||||
}
|
||||
|
||||
Float64 NO_SANITIZE_UNDEFINED divide() const
|
||||
{
|
||||
if constexpr (DecimalOrExtendedInt<Denominator>) /// if extended int
|
||||
return static_cast<Float64>(numerator) / static_cast<Float64>(denominator);
|
||||
else
|
||||
return static_cast<Float64>(numerator) / denominator;
|
||||
}
|
||||
};
|
||||
|
||||
/// Calculates arithmetic mean of numbers.
|
||||
template <typename T, typename Data, typename Derived>
|
||||
class AggregateFunctionAvgBase : public IAggregateFunctionDataHelper<Data, Derived>
|
||||
|
||||
/**
|
||||
* @tparam Derived When deriving from this class, use the child class name as in CRTP, e.g.
|
||||
* class Self : Agg<char, bool, bool, Self>.
|
||||
*/
|
||||
template <class Numerator, class Denominator, class Derived>
|
||||
class AggregateFunctionAvgBase : public
|
||||
IAggregateFunctionDataHelper<AvgFraction<Numerator, Denominator>, Derived>
|
||||
{
|
||||
public:
|
||||
using ResultType = std::conditional_t<IsDecimalNumber<T>, T, Float64>;
|
||||
using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<T>, DataTypeNumber<Float64>>;
|
||||
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<Float64>>;
|
||||
using Fraction = AvgFraction<Numerator, Denominator>;
|
||||
using Base = IAggregateFunctionDataHelper<Fraction, Derived>;
|
||||
|
||||
/// ctor for native types
|
||||
AggregateFunctionAvgBase(const DataTypes & argument_types_) : IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(0) {}
|
||||
explicit AggregateFunctionAvgBase(const DataTypes & argument_types_,
|
||||
UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
|
||||
: Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {}
|
||||
|
||||
/// ctor for Decimals
|
||||
AggregateFunctionAvgBase(const IDataType & data_type, const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(getDecimalScale(data_type))
|
||||
{
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
if constexpr (IsDecimalNumber<T>)
|
||||
return std::make_shared<ResultDataType>(ResultDataType::maxPrecision(), scale);
|
||||
else
|
||||
return std::make_shared<ResultDataType>();
|
||||
}
|
||||
DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
@ -84,7 +108,7 @@ public:
|
||||
{
|
||||
writeBinary(this->data(place).numerator, buf);
|
||||
|
||||
if constexpr (std::is_unsigned_v<typename Data::DenominatorType>)
|
||||
if constexpr (std::is_unsigned_v<Denominator>)
|
||||
writeVarUInt(this->data(place).denominator, buf);
|
||||
else /// Floating point denominator type can be used
|
||||
writeBinary(this->data(place).denominator, buf);
|
||||
@ -94,7 +118,7 @@ public:
|
||||
{
|
||||
readBinary(this->data(place).numerator, buf);
|
||||
|
||||
if constexpr (std::is_unsigned_v<typename Data::DenominatorType>)
|
||||
if constexpr (std::is_unsigned_v<Denominator>)
|
||||
readVarUInt(this->data(place).denominator, buf);
|
||||
else /// Floating point denominator type can be used
|
||||
readBinary(this->data(place).denominator, buf);
|
||||
@ -102,29 +126,34 @@ public:
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & column = static_cast<ColVecResult &>(to);
|
||||
column.getData().push_back(this->data(place).template result<ResultType>());
|
||||
if constexpr (IsDecimalNumber<Numerator> || IsDecimalNumber<Denominator>)
|
||||
static_cast<ColumnVector<Float64> &>(to).getData().push_back(
|
||||
this->data(place).divideIfAnyDecimal(num_scale, denom_scale));
|
||||
else
|
||||
static_cast<ColumnVector<Float64> &>(to).getData().push_back(this->data(place).divide());
|
||||
}
|
||||
|
||||
protected:
|
||||
UInt32 scale;
|
||||
private:
|
||||
UInt32 num_scale;
|
||||
UInt32 denom_scale;
|
||||
};
|
||||
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>
|
||||
template <class T>
|
||||
using AvgFieldType = std::conditional_t<IsDecimalNumber<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
NearestFieldType<T>>;
|
||||
|
||||
template <class T>
|
||||
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>
|
||||
{
|
||||
public:
|
||||
using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>::AggregateFunctionAvgBase;
|
||||
using AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>::AggregateFunctionAvgBase;
|
||||
|
||||
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const final
|
||||
{
|
||||
const auto & column = static_cast<const ColVecType &>(*columns[0]);
|
||||
this->data(place).numerator += column.getData()[row_num];
|
||||
this->data(place).denominator += 1;
|
||||
this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
|
||||
++this->data(place).denominator;
|
||||
}
|
||||
|
||||
String getName() const override { return "avg"; }
|
||||
String getName() const final { return "avg"; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,3 +1,5 @@
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionAvgWeighted.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
@ -13,47 +15,91 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct AvgWeighted
|
||||
bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
|
||||
{
|
||||
using FieldType = std::conditional_t<IsDecimalNumber<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
NearestFieldType<T>>;
|
||||
// using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
|
||||
using Function = AggregateFunctionAvgWeighted<T, AggregateFunctionAvgData<FieldType, FieldType>>;
|
||||
};
|
||||
const WhichDataType l_dt(left), r_dt(right);
|
||||
|
||||
template <typename T>
|
||||
using AggregateFuncAvgWeighted = typename AvgWeighted<T>::Function;
|
||||
constexpr auto allow = [](WhichDataType t)
|
||||
{
|
||||
return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
|
||||
};
|
||||
|
||||
return allow(l_dt) && allow(r_dt);
|
||||
}
|
||||
|
||||
#define AT_SWITCH(LINE) \
|
||||
switch (which.idx) \
|
||||
{ \
|
||||
LINE(Int8); LINE(Int16); LINE(Int32); LINE(Int64); LINE(Int128); LINE(Int256); \
|
||||
LINE(UInt8); LINE(UInt16); LINE(UInt32); LINE(UInt64); LINE(UInt128); LINE(UInt256); \
|
||||
LINE(Decimal32); LINE(Decimal64); LINE(Decimal128); LINE(Decimal256); \
|
||||
LINE(Float32); LINE(Float64); \
|
||||
default: return nullptr; \
|
||||
}
|
||||
|
||||
template <class First, class ... TArgs>
|
||||
static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
const WhichDataType which(second_type);
|
||||
|
||||
#define LINE(Type) \
|
||||
case TypeIndex::Type: return new AggregateFunctionAvgWeighted<First, Type>(std::forward<TArgs>(args)...)
|
||||
AT_SWITCH(LINE)
|
||||
#undef LINE
|
||||
}
|
||||
|
||||
// Not using helper functions because there are no templates for binary decimal/numeric function.
|
||||
template <class... TArgs>
|
||||
static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
const WhichDataType which(first_type);
|
||||
|
||||
#define LINE(Type) \
|
||||
case TypeIndex::Type: return create<Type, TArgs...>(second_type, std::forward<TArgs>(args)...)
|
||||
AT_SWITCH(LINE)
|
||||
#undef LINE
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertBinary(name, argument_types);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
const auto data_type = static_cast<const DataTypePtr>(argument_types[0]);
|
||||
const auto data_type_weight = static_cast<const DataTypePtr>(argument_types[1]);
|
||||
if (!data_type->equals(*data_type_weight))
|
||||
throw Exception("Different types " + data_type->getName() + " and " + data_type_weight->getName() + " of arguments for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
if (isDecimal(data_type))
|
||||
res.reset(createWithDecimalType<AggregateFuncAvgWeighted>(*data_type, *data_type, argument_types));
|
||||
|
||||
if (!allowTypes(data_type, data_type_weight))
|
||||
throw Exception(
|
||||
"Types " + data_type->getName() +
|
||||
" and " + data_type_weight->getName() +
|
||||
" are non-conforming as arguments for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
AggregateFunctionPtr ptr;
|
||||
|
||||
const bool left_decimal = isDecimal(data_type);
|
||||
const bool right_decimal = isDecimal(data_type_weight);
|
||||
|
||||
if (left_decimal && right_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight,
|
||||
argument_types,
|
||||
getDecimalScale(*data_type), getDecimalScale(*data_type_weight)));
|
||||
else if (left_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types,
|
||||
getDecimalScale(*data_type)));
|
||||
else if (right_decimal)
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types,
|
||||
// numerator is not decimal, so its scale is 0
|
||||
0, getDecimalScale(*data_type_weight)));
|
||||
else
|
||||
res.reset(createWithNumericType<AggregateFuncAvgWeighted>(*data_type, argument_types));
|
||||
ptr.reset(create(*data_type, *data_type_weight, argument_types));
|
||||
|
||||
if (!res)
|
||||
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
return res;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted, AggregateFunctionFactory::CaseSensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,26 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <AggregateFunctions/AggregateFunctionAvg.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename T, typename Data>
|
||||
class AggregateFunctionAvgWeighted final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>
|
||||
template <class T>
|
||||
using AvgWeightedFieldType = std::conditional_t<IsDecimalNumber<T>,
|
||||
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
|
||||
std::conditional_t<DecimalOrExtendedInt<T>,
|
||||
Float64, // no way to do UInt128 * UInt128, better cast to Float64
|
||||
NearestFieldType<T>>>;
|
||||
|
||||
template <class T, class U>
|
||||
using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
|
||||
AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
|
||||
|
||||
template <class Value, class Weight>
|
||||
class AggregateFunctionAvgWeighted final :
|
||||
public AggregateFunctionAvgBase<
|
||||
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
|
||||
{
|
||||
public:
|
||||
using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>::AggregateFunctionAvgBase;
|
||||
using Base = AggregateFunctionAvgBase<
|
||||
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
|
||||
using Base::Base;
|
||||
|
||||
using ValueT = MaxFieldType<Value, Weight>;
|
||||
|
||||
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
const auto & values = static_cast<const ColVecType &>(*columns[0]);
|
||||
const auto & weights = static_cast<const ColVecType &>(*columns[1]);
|
||||
const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);
|
||||
|
||||
this->data(place).numerator += static_cast<typename Data::NumeratorType>(values.getData()[row_num]) * weights.getData()[row_num];
|
||||
this->data(place).denominator += weights.getData()[row_num];
|
||||
this->data(place).numerator += static_cast<ValueT>(
|
||||
static_cast<const DecimalOrVectorCol<Value> &>(*columns[0]).getData()[row_num]) *
|
||||
static_cast<ValueT>(weights.getData()[row_num]);
|
||||
|
||||
this->data(place).denominator += static_cast<AvgWeightedFieldType<Weight>>(weights.getData()[row_num]);
|
||||
}
|
||||
|
||||
String getName() const override { return "avgWeighted"; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -21,7 +21,8 @@ class IDataType;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
using DataTypes = std::vector<DataTypePtr>;
|
||||
|
||||
/** Creator have arguments: name of aggregate function, types of arguments, values of parameters.
|
||||
/**
|
||||
* The invoker has arguments: name of aggregate function, types of arguments, values of parameters.
|
||||
* Parameters are for "parametric" aggregate functions.
|
||||
* For example, in quantileWeighted(0.9)(x, weight), 0.9 is "parameter" and x, weight are "arguments".
|
||||
*/
|
||||
@ -87,7 +88,6 @@ private:
|
||||
|
||||
std::optional<AggregateFunctionProperties> tryGetPropertiesImpl(const String & name) const;
|
||||
|
||||
private:
|
||||
using AggregateFunctions = std::unordered_map<String, Value>;
|
||||
|
||||
AggregateFunctions aggregate_functions;
|
||||
|
37
src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
Normal file
37
src/AggregateFunctions/AggregateFunctionMannWhitney.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionMannWhitney.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMannWhitneyUTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
return std::make_shared<AggregateFunctionMannWhitney>(argument_types, parameters);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("mannWhitneyUTest", createAggregateFunctionMannWhitneyUTest);
|
||||
}
|
||||
|
||||
}
|
246
src/AggregateFunctions/AggregateFunctionMannWhitney.h
Normal file
246
src/AggregateFunctions/AggregateFunctionMannWhitney.h
Normal file
@ -0,0 +1,246 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <common/types.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <limits>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
#include <iostream>
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
struct MannWhitneyData : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
/*Since null hypothesis is "for randomly selected values X and Y from two populations,
|
||||
*the probability of X being greater than Y is equal to the probability of Y being greater than X".
|
||||
*Or "the distribution F of first sample equals to the distribution G of second sample".
|
||||
*Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */
|
||||
enum class Alternative
|
||||
{
|
||||
TwoSided,
|
||||
Less,
|
||||
Greater
|
||||
};
|
||||
|
||||
/// The behaviour equals to the similar function from scipy.
|
||||
/// https://github.com/scipy/scipy/blob/ab9e9f17e0b7b2d618c4d4d8402cd4c0c200d6c0/scipy/stats/stats.py#L6978
|
||||
std::pair<Float64, Float64> getResult(Alternative alternative, bool continuity_correction)
|
||||
{
|
||||
ConcatenatedSamples both(this->x, this->y);
|
||||
RanksArray ranks;
|
||||
Float64 tie_correction;
|
||||
|
||||
/// Compute ranks according to both samples.
|
||||
std::tie(ranks, tie_correction) = computeRanksAndTieCorrection(both);
|
||||
|
||||
const Float64 n1 = this->size_x;
|
||||
const Float64 n2 = this->size_y;
|
||||
|
||||
Float64 r1 = 0;
|
||||
for (size_t i = 0; i < n1; ++i)
|
||||
r1 += ranks[i];
|
||||
|
||||
const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1;
|
||||
const Float64 u2 = n1 * n2 - u1;
|
||||
|
||||
/// The distribution of U-statistic under null hypothesis H0 is symmetric with respect to meanrank.
|
||||
const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction;
|
||||
const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0);
|
||||
|
||||
Float64 u = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
/// There is no difference which u_i to take as u, because z will be differ only in sign and we take std::abs() from it.
|
||||
u = std::max(u1, u2);
|
||||
else if (alternative == Alternative::Less)
|
||||
u = u1;
|
||||
else if (alternative == Alternative::Greater)
|
||||
u = u2;
|
||||
|
||||
Float64 z = (u - meanrank) / sd;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
z = std::abs(z);
|
||||
|
||||
/// In fact cdf is a probability function, so it is intergral of density from (-inf, z].
|
||||
/// But since standard normal distribution is symmetric, cdf(0) = 0.5 and we have to compute integral from [0, z].
|
||||
const Float64 cdf = integrateSimpson(0, z, [] (Float64 t) { return std::pow(M_E, -0.5 * t * t) / std::sqrt(2 * M_PI);});
|
||||
|
||||
Float64 p_value = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
p_value = 1 - 2 * cdf;
|
||||
else
|
||||
p_value = 0.5 - cdf;
|
||||
|
||||
return {u2, p_value};
|
||||
}
|
||||
|
||||
private:
|
||||
using Sample = typename StatisticalSample<Float64, Float64>::SampleX;
|
||||
|
||||
/// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
|
||||
class ConcatenatedSamples
|
||||
{
|
||||
public:
|
||||
ConcatenatedSamples(const Sample & first_, const Sample & second_)
|
||||
: first(first_), second(second_) {}
|
||||
|
||||
const Float64 & operator[](size_t ind) const
|
||||
{
|
||||
if (ind < first.size())
|
||||
return first[ind];
|
||||
return second[ind % first.size()];
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return first.size() + second.size();
|
||||
}
|
||||
|
||||
private:
|
||||
const Sample & first;
|
||||
const Sample & second;
|
||||
};
|
||||
};
|
||||
|
||||
class AggregateFunctionMannWhitney final:
|
||||
public IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney>
|
||||
{
|
||||
private:
|
||||
using Alternative = typename MannWhitneyData::Alternative;
|
||||
Alternative alternative;
|
||||
bool continuity_correction{true};
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params)
|
||||
:IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney> ({arguments}, {})
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception("Aggregate function " + getName() + " require two parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (params.empty())
|
||||
{
|
||||
alternative = Alternative::TwoSided;
|
||||
return;
|
||||
}
|
||||
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
auto param = params[0].get<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
alternative = Alternative::Less;
|
||||
else if (param == "greater")
|
||||
alternative = Alternative::Greater;
|
||||
else
|
||||
throw Exception("Unknown parameter in aggregate function " + getName() +
|
||||
". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (params.size() != 2)
|
||||
return;
|
||||
|
||||
if (params[1].getType() != Field::Types::UInt64)
|
||||
throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "mannWhitneyUTest";
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"u_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
|
||||
if (is_second)
|
||||
this->data(place).addY(value, arena);
|
||||
else
|
||||
this->data(place).addX(value, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
auto & b = this->data(rhs);
|
||||
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||
throw Exception("Aggregate function " + getName() + " require both samples to be non empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(u_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
};
|
@ -21,23 +21,10 @@ AggregateFunctionPtr createAggregateFunctionRankCorrelation(const std::string &
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
|
||||
if (isDecimal(argument_types[0]) || isDecimal(argument_types[1]))
|
||||
{
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
else
|
||||
{
|
||||
res.reset(createWithTwoNumericTypes<AggregateFunctionRankCorrelation>(*argument_types[0], *argument_types[1], argument_types));
|
||||
}
|
||||
|
||||
if (!res)
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
return res;
|
||||
return std::make_shared<AggregateFunctionRankCorrelation>(argument_types);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,73 +1,56 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <common/types.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <limits>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <template <typename> class Comparator>
|
||||
struct ComparePairFirst final
|
||||
|
||||
struct RankCorrelationData : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
template <typename X, typename Y>
|
||||
bool operator()(const std::pair<X, Y> & lhs, const std::pair<X, Y> & rhs) const
|
||||
Float64 getResult()
|
||||
{
|
||||
return Comparator<X>{}(lhs.first, rhs.first);
|
||||
RanksArray ranks_x;
|
||||
std::tie(ranks_x, std::ignore) = computeRanksAndTieCorrection(this->x);
|
||||
|
||||
RanksArray ranks_y;
|
||||
std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
|
||||
|
||||
/// In our case sizes of both samples are equal.
|
||||
const auto size = this->size_x;
|
||||
|
||||
/// Count d^2 sum
|
||||
Float64 answer = 0;
|
||||
for (size_t j = 0; j < size; ++j)
|
||||
answer += (ranks_x[j] - ranks_y[j]) * (ranks_x[j] - ranks_y[j]);
|
||||
|
||||
answer *= 6;
|
||||
answer /= size * (size * size - 1);
|
||||
answer = 1 - answer;
|
||||
return answer;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <template <typename> class Comparator>
|
||||
struct ComparePairSecond final
|
||||
{
|
||||
template <typename X, typename Y>
|
||||
bool operator()(const std::pair<X, Y> & lhs, const std::pair<X, Y> & rhs) const
|
||||
{
|
||||
return Comparator<Y>{}(lhs.second, rhs.second);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename X = Float64, typename Y = Float64>
|
||||
struct AggregateFunctionRankCorrelationData final
|
||||
{
|
||||
size_t size_x = 0;
|
||||
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(std::pair<X, Y>), 4096>;
|
||||
using Array = PODArray<std::pair<X, Y>, 32, Allocator>;
|
||||
|
||||
Array values;
|
||||
};
|
||||
|
||||
template <typename X, typename Y>
|
||||
class AggregateFunctionRankCorrelation :
|
||||
public IAggregateFunctionDataHelper<AggregateFunctionRankCorrelationData<X, Y>, AggregateFunctionRankCorrelation<X, Y>>
|
||||
public IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation>
|
||||
{
|
||||
using Data = AggregateFunctionRankCorrelationData<X, Y>;
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(std::pair<Float64, Float64>), 4096>;
|
||||
using Array = PODArray<std::pair<Float64, Float64>, 32, Allocator>;
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionRankCorrelation(const DataTypes & arguments)
|
||||
:IAggregateFunctionDataHelper<AggregateFunctionRankCorrelationData<X, Y>,AggregateFunctionRankCorrelation<X, Y>> ({arguments}, {})
|
||||
:IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation> ({arguments}, {})
|
||||
{}
|
||||
|
||||
String getName() const override
|
||||
@ -80,24 +63,12 @@ public:
|
||||
return std::make_shared<DataTypeNumber<Float64>>();
|
||||
}
|
||||
|
||||
void insert(Data & a, const std::pair<X, Y> & x, Arena * arena) const
|
||||
{
|
||||
++a.size_x;
|
||||
a.values.push_back(x, arena);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & a = this->data(place);
|
||||
|
||||
auto new_x = assert_cast<const ColumnVector<X> &>(*columns[0]).getData()[row_num];
|
||||
auto new_y = assert_cast<const ColumnVector<Y> &>(*columns[1]).getData()[row_num];
|
||||
|
||||
auto new_arg = std::make_pair(new_x, new_y);
|
||||
|
||||
a.size_x += 1;
|
||||
|
||||
a.values.push_back(new_arg, arena);
|
||||
Float64 new_x = columns[0]->getFloat64(row_num);
|
||||
Float64 new_y = columns[1]->getFloat64(row_num);
|
||||
this->data(place).addX(new_x, arena);
|
||||
this->data(place).addY(new_y, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
@ -105,116 +76,22 @@ public:
|
||||
auto & a = this->data(place);
|
||||
auto & b = this->data(rhs);
|
||||
|
||||
if (b.size_x)
|
||||
for (size_t i = 0; i < b.size_x; ++i)
|
||||
insert(a, b.values[i], arena);
|
||||
a.merge(b, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
const auto & value = this->data(place).values;
|
||||
size_t size = this->data(place).size_x;
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
auto & value = this->data(place).values;
|
||||
|
||||
value.resize(size, arena);
|
||||
buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & value = this->data(place).values;
|
||||
size_t size = this->data(place).size_x;
|
||||
|
||||
// create a copy of values not to format data
|
||||
PODArrayWithStackMemory<std::pair<Float64, Float64>, 32> tmp_values;
|
||||
tmp_values.resize(size);
|
||||
for (size_t j = 0; j < size; ++ j)
|
||||
tmp_values[j] = static_cast<std::pair<Float64, Float64>>(value[j]);
|
||||
|
||||
// sort x_values
|
||||
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairFirst<std::greater>{});
|
||||
|
||||
for (size_t j = 0; j < size;)
|
||||
{
|
||||
// replace x_values with their ranks
|
||||
size_t rank = j + 1;
|
||||
size_t same = 1;
|
||||
size_t cur_sum = rank;
|
||||
size_t cur_start = j;
|
||||
|
||||
while (j < size - 1)
|
||||
{
|
||||
if (tmp_values[j].first == tmp_values[j + 1].first)
|
||||
{
|
||||
// rank of (j + 1)th number
|
||||
rank += 1;
|
||||
++same;
|
||||
cur_sum += rank;
|
||||
++j;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// insert rank is calculated as average of ranks of equal values
|
||||
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
|
||||
for (size_t i = cur_start; i <= j; ++i)
|
||||
tmp_values[i].first = insert_rank;
|
||||
++j;
|
||||
}
|
||||
|
||||
// sort y_values
|
||||
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairSecond<std::greater>{});
|
||||
|
||||
// replace y_values with their ranks
|
||||
for (size_t j = 0; j < size;)
|
||||
{
|
||||
// replace x_values with their ranks
|
||||
size_t rank = j + 1;
|
||||
size_t same = 1;
|
||||
size_t cur_sum = rank;
|
||||
size_t cur_start = j;
|
||||
|
||||
while (j < size - 1)
|
||||
{
|
||||
if (tmp_values[j].second == tmp_values[j + 1].second)
|
||||
{
|
||||
// rank of (j + 1)th number
|
||||
rank += 1;
|
||||
++same;
|
||||
cur_sum += rank;
|
||||
++j;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// insert rank is calculated as average of ranks of equal values
|
||||
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
|
||||
for (size_t i = cur_start; i <= j; ++i)
|
||||
tmp_values[i].second = insert_rank;
|
||||
++j;
|
||||
}
|
||||
|
||||
// count d^2 sum
|
||||
Float64 answer = static_cast<Float64>(0);
|
||||
for (size_t j = 0; j < size; ++ j)
|
||||
answer += (tmp_values[j].first - tmp_values[j].second) * (tmp_values[j].first - tmp_values[j].second);
|
||||
|
||||
answer *= 6;
|
||||
answer /= size * (size * size - 1);
|
||||
answer = 1 - answer;
|
||||
auto answer = this->data(place).getResult();
|
||||
|
||||
auto & column = static_cast<ColumnVector<Float64> &>(to);
|
||||
column.getData().push_back(answer);
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/Moments.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
@ -30,310 +31,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int DECIMAL_OVERFLOW;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculating univariate central moments
|
||||
Levels:
|
||||
level 2 (pop & samp): var, stddev
|
||||
level 3: skewness
|
||||
level 4: kurtosis
|
||||
References:
|
||||
https://en.wikipedia.org/wiki/Moment_(mathematics)
|
||||
https://en.wikipedia.org/wiki/Skewness
|
||||
https://en.wikipedia.org/wiki/Kurtosis
|
||||
*/
|
||||
template <typename T, size_t _level>
|
||||
struct VarMoments
|
||||
{
|
||||
T m[_level + 1]{};
|
||||
|
||||
void add(T x)
|
||||
{
|
||||
++m[0];
|
||||
m[1] += x;
|
||||
m[2] += x * x;
|
||||
if constexpr (_level >= 3) m[3] += x * x * x;
|
||||
if constexpr (_level >= 4) m[4] += x * x * x * x;
|
||||
}
|
||||
|
||||
void merge(const VarMoments & rhs)
|
||||
{
|
||||
m[0] += rhs.m[0];
|
||||
m[1] += rhs.m[1];
|
||||
m[2] += rhs.m[2];
|
||||
if constexpr (_level >= 3) m[3] += rhs.m[3];
|
||||
if constexpr (_level >= 4) m[4] += rhs.m[4];
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T getPopulation() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
|
||||
/// Due to numerical errors, the result can be slightly less than zero,
|
||||
/// but it should be impossible. Trim to zero.
|
||||
|
||||
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / m[0]);
|
||||
}
|
||||
|
||||
T getSample() const
|
||||
{
|
||||
if (m[0] <= 1)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / (m[0] - 1));
|
||||
}
|
||||
|
||||
T getMoment3() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
// to avoid accuracy problem
|
||||
if (m[0] == 1)
|
||||
return 0;
|
||||
return (m[3]
|
||||
- (3 * m[2]
|
||||
- 2 * m[1] * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) / m[0];
|
||||
}
|
||||
|
||||
T getMoment4() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
// to avoid accuracy problem
|
||||
if (m[0] == 1)
|
||||
return 0;
|
||||
return (m[4]
|
||||
- (4 * m[3]
|
||||
- (6 * m[2]
|
||||
- 3 * m[1] * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) / m[0];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t _level>
|
||||
class VarMomentsDecimal
|
||||
{
|
||||
public:
|
||||
using NativeType = typename T::NativeType;
|
||||
|
||||
void add(NativeType x)
|
||||
{
|
||||
++m0;
|
||||
getM(1) += x;
|
||||
|
||||
NativeType tmp;
|
||||
bool overflow = common::mulOverflow(x, x, tmp) || common::addOverflow(getM(2), tmp, getM(2));
|
||||
if constexpr (_level >= 3)
|
||||
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(3), tmp, getM(3));
|
||||
if constexpr (_level >= 4)
|
||||
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(4), tmp, getM(4));
|
||||
|
||||
if (overflow)
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
}
|
||||
|
||||
void merge(const VarMomentsDecimal & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
getM(1) += rhs.getM(1);
|
||||
|
||||
bool overflow = common::addOverflow(getM(2), rhs.getM(2), getM(2));
|
||||
if constexpr (_level >= 3)
|
||||
overflow = overflow || common::addOverflow(getM(3), rhs.getM(3), getM(3));
|
||||
if constexpr (_level >= 4)
|
||||
overflow = overflow || common::addOverflow(getM(4), rhs.getM(4), getM(4));
|
||||
|
||||
if (overflow)
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const { writePODBinary(*this, buf); }
|
||||
void read(ReadBuffer & buf) { readPODBinary(*this, buf); }
|
||||
|
||||
Float64 getPopulation(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / m0), scale));
|
||||
}
|
||||
|
||||
Float64 getSample(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
if (m0 == 1)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / (m0 - 1)), scale));
|
||||
}
|
||||
|
||||
Float64 getMoment3(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(2 * getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(3 * getM(2), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(getM(3), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
|
||||
}
|
||||
|
||||
Float64 getMoment4(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(3 * getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(6 * getM(2), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(4 * getM(3), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(getM(4), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
|
||||
}
|
||||
|
||||
private:
|
||||
UInt64 m0{};
|
||||
NativeType m[_level]{};
|
||||
|
||||
NativeType & getM(size_t i) { return m[i - 1]; }
|
||||
const NativeType & getM(size_t i) const { return m[i - 1]; }
|
||||
};
|
||||
|
||||
/**
|
||||
Calculating multivariate central moments
|
||||
Levels:
|
||||
level 2 (pop & samp): covar
|
||||
References:
|
||||
https://en.wikipedia.org/wiki/Moment_(mathematics)
|
||||
*/
|
||||
template <typename T>
|
||||
struct CovarMoments
|
||||
{
|
||||
T m0{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T xy{};
|
||||
|
||||
void add(T x, T y)
|
||||
{
|
||||
++m0;
|
||||
x1 += x;
|
||||
y1 += y;
|
||||
xy += x * y;
|
||||
}
|
||||
|
||||
void merge(const CovarMoments & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
xy += rhs.xy;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED getPopulation() const
|
||||
{
|
||||
return (xy - x1 * y1 / m0) / m0;
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED getSample() const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
return (xy - x1 * y1 / m0) / (m0 - 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CorrMoments
|
||||
{
|
||||
T m0{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T xy{};
|
||||
T x2{};
|
||||
T y2{};
|
||||
|
||||
void add(T x, T y)
|
||||
{
|
||||
++m0;
|
||||
x1 += x;
|
||||
y1 += y;
|
||||
xy += x * y;
|
||||
x2 += x * x;
|
||||
y2 += y * y;
|
||||
}
|
||||
|
||||
void merge(const CorrMoments & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
xy += rhs.xy;
|
||||
x2 += rhs.x2;
|
||||
y2 += rhs.y2;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get() const
|
||||
{
|
||||
return (m0 * xy - x1 * y1) / sqrt((m0 * x2 - x1 * x1) * (m0 * y2 - y1 * y1));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
enum class StatisticsFunctionKind
|
||||
{
|
||||
varPop, varSamp,
|
||||
|
77
src/AggregateFunctions/AggregateFunctionStudentTTest.cpp
Normal file
77
src/AggregateFunctions/AggregateFunctionStudentTTest.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionTTest.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Moments.h>
|
||||
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Student T-test applies to two samples of independent random variables
|
||||
* that have normal distributions with equal (but unknown) variances.
|
||||
* It allows to answer the question whether means of the distributions differ.
|
||||
*
|
||||
* If variances are not considered equal, Welch T-test should be used instead.
|
||||
*/
|
||||
struct StudentTTestData : public TTestMoments<Float64>
|
||||
{
|
||||
static constexpr auto name = "studentTTest";
|
||||
|
||||
std::pair<Float64, Float64> getResult() const
|
||||
{
|
||||
Float64 mean_x = x1 / nx;
|
||||
Float64 mean_y = y1 / ny;
|
||||
|
||||
/// To estimate the variance we first estimate two means.
|
||||
/// That's why the number of degrees of freedom is the total number of values of both samples minus 2.
|
||||
Float64 degrees_of_freedom = nx + ny - 2;
|
||||
|
||||
/// Calculate s^2
|
||||
/// The original formulae looks like
|
||||
/// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2}
|
||||
/// But we made some mathematical transformations not to store original sequences.
|
||||
/// Also we dropped sqrt, because later it will be squared later.
|
||||
|
||||
Float64 all_x = x2 + nx * mean_x * mean_x - 2 * mean_x * x1;
|
||||
Float64 all_y = y2 + ny * mean_y * mean_y - 2 * mean_y * y1;
|
||||
|
||||
Float64 s2 = (all_x + all_y) / degrees_of_freedom;
|
||||
Float64 std_err2 = s2 * (1. / nx + 1. / ny);
|
||||
|
||||
/// t-statistic
|
||||
Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2);
|
||||
|
||||
return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)};
|
||||
}
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return std::make_shared<AggregateFunctionTTest<StudentTTestData>>(argument_types);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest);
|
||||
}
|
||||
|
||||
}
|
154
src/AggregateFunctions/AggregateFunctionTTest.h
Normal file
154
src/AggregateFunctions/AggregateFunctionTTest.h
Normal file
@ -0,0 +1,154 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Core/Types.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <cmath>
|
||||
|
||||
|
||||
/// This function is used in implementations of different T-Tests.
|
||||
/// On Darwin it's unavailable in math.h but actually exists in the library (can be linked successfully).
|
||||
#if defined(OS_DARWIN)
|
||||
extern "C"
|
||||
{
|
||||
double lgamma_r(double x, int * signgamp);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
|
||||
/**
|
||||
* If you have a cumulative distribution function F, then calculating the p-value for given statistic T is simply 1−F(T)
|
||||
* In our case p-value is two-sided, so we multiply it by 2.
|
||||
* So cumulative distribution function F equals to
|
||||
* \[ F(t) = \int_{-\infty}^{t} f(u)du = 1 - \frac{1}{2} I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \]
|
||||
* where \[ x(t) = \frac{v}{t^2 + v} \]: https://en.wikipedia.org/wiki/Student%27s_t-distribution#Cumulative_distribution_function
|
||||
*
|
||||
* so our resulting \[ p-value = I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \].
|
||||
*
|
||||
* And I is regularized incomplete beta function: https://en.wikipedia.org/wiki/Beta_function#Incomplete_beta_function
|
||||
*
|
||||
* Keepenig in mind that \[ \mathrm {B} (x;a,b)=\int _{0}^{x}r^{a-1}\,(1-r)^{b-1}\,\mathrm {d} r.\! \]
|
||||
* and
|
||||
* \[ \mathrm {B} (x,y)={\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}}=\
|
||||
* \exp(\ln {\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}})=\exp((\ln(\Gamma (x))+\ln(\Gamma (y))-\ln(\Gamma (x+y))) \]
|
||||
*
|
||||
* p-value can be calculated in terms of gamma functions and integrals more simply:
|
||||
* \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}r^{{\frac {\nu }{2}}-1}\,(1-r)^{-0.5}\,\mathrm {d} r}\
|
||||
* {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \]
|
||||
*
|
||||
* which simplifies to:
|
||||
*
|
||||
* \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}{\frac {r^{{\frac {\nu }{2}}-1}}{\sqrt {1-r}}}\,\mathrm {d} r}\
|
||||
* {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \]
|
||||
*
|
||||
* Read here for details https://rosettacode.org/wiki/Welch%27s_t-test#
|
||||
*
|
||||
* Both WelchTTest and StudentTTest have t-statistric with Student distribution but with different degrees of freedom.
|
||||
* So the procedure of computing p-value is the same.
|
||||
*/
|
||||
static inline Float64 getPValue(Float64 degrees_of_freedom, Float64 t_stat2)
|
||||
{
|
||||
Float64 numerator = integrateSimpson(0, degrees_of_freedom / (t_stat2 + degrees_of_freedom),
|
||||
[degrees_of_freedom](double x) { return std::pow(x, degrees_of_freedom / 2 - 1) / std::sqrt(1 - x); });
|
||||
|
||||
int unused;
|
||||
Float64 denominator = std::exp(
|
||||
lgamma_r(degrees_of_freedom / 2, &unused)
|
||||
+ lgamma_r(0.5, &unused)
|
||||
- lgamma_r(degrees_of_freedom / 2 + 0.5, &unused));
|
||||
|
||||
return std::min(1.0, std::max(0.0, numerator / denominator));
|
||||
}
|
||||
|
||||
|
||||
/// Returns tuple of (t-statistic, p-value)
|
||||
/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
|
||||
template <typename Data>
|
||||
class AggregateFunctionTTest :
|
||||
public IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>
|
||||
{
|
||||
public:
|
||||
AggregateFunctionTTest(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>({arguments}, {})
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return Data::name;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"t_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
|
||||
if (is_second)
|
||||
this->data(place).addY(value);
|
||||
else
|
||||
this->data(place).addX(value);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
||||
{
|
||||
this->data(place).read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto [t_statistic, p_value] = this->data(place).getResult();
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(t_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
};
|
||||
|
||||
};
|
@ -1,35 +0,0 @@
|
||||
#include "AggregateFunctionTimeSeriesGroupSum.h"
|
||||
#include "AggregateFunctionFactory.h"
|
||||
#include "FactoryHelpers.h"
|
||||
#include "Helpers.h"
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
namespace
|
||||
{
|
||||
template <bool rate>
|
||||
AggregateFunctionPtr createAggregateFunctionTimeSeriesGroupSum(const std::string & name, const DataTypes & arguments, const Array & params)
|
||||
{
|
||||
assertNoParameters(name, params);
|
||||
|
||||
if (arguments.size() < 3)
|
||||
throw Exception("Not enough event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
return std::make_shared<AggregateFunctionTimeSeriesGroupSum<rate>>(arguments);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("timeSeriesGroupSum", createAggregateFunctionTimeSeriesGroupSum<false>);
|
||||
factory.registerFunction("timeSeriesGroupRateSum", createAggregateFunctionTimeSeriesGroupSum<true>);
|
||||
}
|
||||
|
||||
}
|
@ -1,291 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <ext/range.h>
|
||||
#include "IAggregateFunction.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
template <bool rate>
|
||||
struct AggregateFunctionTimeSeriesGroupSumData
|
||||
{
|
||||
using DataPoint = std::pair<Int64, Float64>;
|
||||
struct Points
|
||||
{
|
||||
using Dps = std::queue<DataPoint>;
|
||||
Dps dps;
|
||||
void add(Int64 t, Float64 v)
|
||||
{
|
||||
dps.push(std::make_pair(t, v));
|
||||
if (dps.size() > 2)
|
||||
dps.pop();
|
||||
}
|
||||
Float64 getval(Int64 t)
|
||||
{
|
||||
Int64 t1, t2;
|
||||
Float64 v1, v2;
|
||||
if (rate)
|
||||
{
|
||||
if (dps.size() < 2)
|
||||
return 0;
|
||||
t1 = dps.back().first;
|
||||
t2 = dps.front().first;
|
||||
v1 = dps.back().second;
|
||||
v2 = dps.front().second;
|
||||
return (v1 - v2) / Float64(t1 - t2);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dps.size() == 1 && t == dps.front().first)
|
||||
return dps.front().second;
|
||||
t1 = dps.back().first;
|
||||
t2 = dps.front().first;
|
||||
v1 = dps.back().second;
|
||||
v2 = dps.front().second;
|
||||
return v2 + ((v1 - v2) * Float64(t - t2)) / Float64(t1 - t2);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::map<UInt64, Points> Series;
|
||||
typedef PODArrayWithStackMemory<DataPoint, 128> AggSeries;
|
||||
Series ss;
|
||||
AggSeries result;
|
||||
|
||||
void add(UInt64 uid, Int64 t, Float64 v)
|
||||
{ //suppose t is coming asc
|
||||
typename Series::iterator it_ss;
|
||||
if (ss.count(uid) == 0)
|
||||
{ //time series not exist, insert new one
|
||||
Points tmp;
|
||||
tmp.add(t, v);
|
||||
ss.emplace(uid, tmp);
|
||||
it_ss = ss.find(uid);
|
||||
}
|
||||
else
|
||||
{
|
||||
it_ss = ss.find(uid);
|
||||
it_ss->second.add(t, v);
|
||||
}
|
||||
if (result.size() > 0 && t < result.back().first)
|
||||
throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc.", ErrorCodes::LOGICAL_ERROR};
|
||||
if (result.size() > 0 && t == result.back().first)
|
||||
{
|
||||
//do not add new point
|
||||
if (rate)
|
||||
result.back().second += it_ss->second.getval(t);
|
||||
else
|
||||
result.back().second += v;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rate)
|
||||
result.emplace_back(std::make_pair(t, it_ss->second.getval(t)));
|
||||
else
|
||||
result.emplace_back(std::make_pair(t, v));
|
||||
}
|
||||
ssize_t i = result.size() - 1;
|
||||
//reverse find out the index of timestamp that more than previous timestamp of t
|
||||
while (result[i].first > it_ss->second.dps.front().first && i >= 0)
|
||||
i--;
|
||||
|
||||
i++;
|
||||
while (i < ssize_t(result.size()) - 1)
|
||||
{
|
||||
result[i].second += it_ss->second.getval(result[i].first);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionTimeSeriesGroupSumData & other)
|
||||
{
|
||||
//if ts has overlap, then aggregate two series by interpolation;
|
||||
AggSeries tmp;
|
||||
tmp.reserve(other.result.size() + result.size());
|
||||
size_t i = 0, j = 0;
|
||||
Int64 t1, t2;
|
||||
Float64 v1, v2;
|
||||
while (i < result.size() && j < other.result.size())
|
||||
{
|
||||
if (result[i].first < other.result[j].first)
|
||||
{
|
||||
if (j == 0)
|
||||
{
|
||||
tmp.emplace_back(result[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
t1 = other.result[j].first;
|
||||
t2 = other.result[j - 1].first;
|
||||
v1 = other.result[j].second;
|
||||
v2 = other.result[j - 1].second;
|
||||
Float64 value = result[i].second + v2 + (v1 - v2) * (Float64(result[i].first - t2)) / Float64(t1 - t2);
|
||||
tmp.emplace_back(std::make_pair(result[i].first, value));
|
||||
}
|
||||
i++;
|
||||
}
|
||||
else if (result[i].first > other.result[j].first)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
tmp.emplace_back(other.result[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
t1 = result[i].first;
|
||||
t2 = result[i - 1].first;
|
||||
v1 = result[i].second;
|
||||
v2 = result[i - 1].second;
|
||||
Float64 value = other.result[j].second + v2 + (v1 - v2) * (Float64(other.result[j].first - t2)) / Float64(t1 - t2);
|
||||
tmp.emplace_back(std::make_pair(other.result[j].first, value));
|
||||
}
|
||||
j++;
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp.emplace_back(std::make_pair(result[i].first, result[i].second + other.result[j].second));
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
while (i < result.size())
|
||||
{
|
||||
tmp.emplace_back(result[i]);
|
||||
i++;
|
||||
}
|
||||
while (j < other.result.size())
|
||||
{
|
||||
tmp.push_back(other.result[j]);
|
||||
j++;
|
||||
}
|
||||
swap(result, tmp);
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
size_t size = result.size();
|
||||
writeVarUInt(size, buf);
|
||||
if (size > 0)
|
||||
{
|
||||
buf.write(reinterpret_cast<const char *>(result.data()), size * sizeof(result[0]));
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
result.resize(size);
|
||||
if (size > 0)
|
||||
{
|
||||
buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
|
||||
}
|
||||
}
|
||||
};
|
||||
template <bool rate>
|
||||
class AggregateFunctionTimeSeriesGroupSum final
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionTimeSeriesGroupSumData<rate>, AggregateFunctionTimeSeriesGroupSum<rate>>
|
||||
{
|
||||
private:
|
||||
public:
|
||||
String getName() const override { return rate ? "timeSeriesGroupRateSum" : "timeSeriesGroupSum"; }
|
||||
|
||||
AggregateFunctionTimeSeriesGroupSum(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionTimeSeriesGroupSumData<rate>, AggregateFunctionTimeSeriesGroupSum<rate>>(arguments, {})
|
||||
{
|
||||
if (!WhichDataType(arguments[0].get()).isUInt64())
|
||||
throw Exception{"Illegal type " + arguments[0].get()->getName() + " of argument 1 of aggregate function " + getName()
|
||||
+ ", must be UInt64",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (!WhichDataType(arguments[1].get()).isInt64())
|
||||
throw Exception{"Illegal type " + arguments[1].get()->getName() + " of argument 2 of aggregate function " + getName()
|
||||
+ ", must be Int64",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (!WhichDataType(arguments[2].get()).isFloat64())
|
||||
throw Exception{"Illegal type " + arguments[2].get()->getName() + " of argument 3 of aggregate function " + getName()
|
||||
+ ", must be Float64",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
auto datatypes = std::vector<DataTypePtr>();
|
||||
datatypes.push_back(std::make_shared<DataTypeInt64>());
|
||||
datatypes.push_back(std::make_shared<DataTypeFloat64>());
|
||||
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(datatypes));
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
||||
{
|
||||
auto uid = assert_cast<const ColumnVector<UInt64> *>(columns[0])->getData()[row_num];
|
||||
auto ts = assert_cast<const ColumnVector<Int64> *>(columns[1])->getData()[row_num];
|
||||
auto val = assert_cast<const ColumnVector<Float64> *>(columns[2])->getData()[row_num];
|
||||
if (uid && ts && val)
|
||||
{
|
||||
this->data(place).add(uid, ts, val);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).serialize(buf); }
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); }
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & value = this->data(place).result;
|
||||
size_t size = value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
size_t old_size = offsets_to.back();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnInt64::Container & ts_to
|
||||
= assert_cast<ColumnInt64 &>(assert_cast<ColumnTuple &>(arr_to.getData()).getColumn(0)).getData();
|
||||
typename ColumnFloat64::Container & val_to
|
||||
= assert_cast<ColumnFloat64 &>(assert_cast<ColumnTuple &>(arr_to.getData()).getColumn(1)).getData();
|
||||
ts_to.reserve(old_size + size);
|
||||
val_to.reserve(old_size + size);
|
||||
size_t i = 0;
|
||||
while (i < this->data(place).result.size())
|
||||
{
|
||||
ts_to.push_back(this->data(place).result[i].first);
|
||||
val_to.push_back(this->data(place).result[i].second);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
}
|
74
src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
Normal file
74
src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionTTest.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Moments.h>
|
||||
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct WelchTTestData : public TTestMoments<Float64>
|
||||
{
|
||||
static constexpr auto name = "welchTTest";
|
||||
|
||||
std::pair<Float64, Float64> getResult() const
|
||||
{
|
||||
Float64 mean_x = x1 / nx;
|
||||
Float64 mean_y = y1 / ny;
|
||||
|
||||
/// s_x^2, s_y^2
|
||||
|
||||
/// The original formulae looks like \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2}
|
||||
/// But we made some mathematical transformations not to store original sequences.
|
||||
/// Also we dropped sqrt, because later it will be squared later.
|
||||
|
||||
Float64 sx2 = (x2 + nx * mean_x * mean_x - 2 * mean_x * x1) / (nx - 1);
|
||||
Float64 sy2 = (y2 + ny * mean_y * mean_y - 2 * mean_y * y1) / (ny - 1);
|
||||
|
||||
/// t-statistic
|
||||
Float64 t_stat = (mean_x - mean_y) / sqrt(sx2 / nx + sy2 / ny);
|
||||
|
||||
/// degrees of freedom
|
||||
|
||||
Float64 numerator_sqrt = sx2 / nx + sy2 / ny;
|
||||
Float64 numerator = numerator_sqrt * numerator_sqrt;
|
||||
|
||||
Float64 denominator_x = sx2 * sx2 / (nx * nx * (nx - 1));
|
||||
Float64 denominator_y = sy2 * sy2 / (ny * ny * (ny - 1));
|
||||
|
||||
Float64 degrees_of_freedom = numerator / (denominator_x + denominator_y);
|
||||
|
||||
return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)};
|
||||
}
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return std::make_shared<AggregateFunctionTTest<WelchTTestData>>(argument_types);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest);
|
||||
}
|
||||
|
||||
}
|
@ -15,6 +15,7 @@
|
||||
M(Float32) \
|
||||
M(Float64)
|
||||
|
||||
// No UInt128 here because of the name conflict
|
||||
#define FOR_NUMERIC_TYPES(M) \
|
||||
M(UInt8) \
|
||||
M(UInt16) \
|
||||
|
361
src/AggregateFunctions/Moments.h
Normal file
361
src/AggregateFunctions/Moments.h
Normal file
@ -0,0 +1,361 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int DECIMAL_OVERFLOW;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculating univariate central moments
|
||||
Levels:
|
||||
level 2 (pop & samp): var, stddev
|
||||
level 3: skewness
|
||||
level 4: kurtosis
|
||||
References:
|
||||
https://en.wikipedia.org/wiki/Moment_(mathematics)
|
||||
https://en.wikipedia.org/wiki/Skewness
|
||||
https://en.wikipedia.org/wiki/Kurtosis
|
||||
*/
|
||||
template <typename T, size_t _level>
|
||||
struct VarMoments
|
||||
{
|
||||
T m[_level + 1]{};
|
||||
|
||||
void add(T x)
|
||||
{
|
||||
++m[0];
|
||||
m[1] += x;
|
||||
m[2] += x * x;
|
||||
if constexpr (_level >= 3) m[3] += x * x * x;
|
||||
if constexpr (_level >= 4) m[4] += x * x * x * x;
|
||||
}
|
||||
|
||||
void merge(const VarMoments & rhs)
|
||||
{
|
||||
m[0] += rhs.m[0];
|
||||
m[1] += rhs.m[1];
|
||||
m[2] += rhs.m[2];
|
||||
if constexpr (_level >= 3) m[3] += rhs.m[3];
|
||||
if constexpr (_level >= 4) m[4] += rhs.m[4];
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T getPopulation() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
|
||||
/// Due to numerical errors, the result can be slightly less than zero,
|
||||
/// but it should be impossible. Trim to zero.
|
||||
|
||||
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / m[0]);
|
||||
}
|
||||
|
||||
T getSample() const
|
||||
{
|
||||
if (m[0] <= 1)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / (m[0] - 1));
|
||||
}
|
||||
|
||||
T getMoment3() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
// to avoid accuracy problem
|
||||
if (m[0] == 1)
|
||||
return 0;
|
||||
/// \[ \frac{1}{m_0} (m_3 - (3 * m_2 - \frac{2 * {m_1}^2}{m_0}) * \frac{m_1}{m_0});\]
|
||||
return (m[3]
|
||||
- (3 * m[2]
|
||||
- 2 * m[1] * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) / m[0];
|
||||
}
|
||||
|
||||
T getMoment4() const
|
||||
{
|
||||
if (m[0] == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
// to avoid accuracy problem
|
||||
if (m[0] == 1)
|
||||
return 0;
|
||||
/// \[ \frac{1}{m_0}(m_4 - (4 * m_3 - (6 * m_2 - \frac{3 * m_1^2}{m_0} ) \frac{m_1}{m_0})\frac{m_1}{m_0})\]
|
||||
return (m[4]
|
||||
- (4 * m[3]
|
||||
- (6 * m[2]
|
||||
- 3 * m[1] * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) * m[1] / m[0]
|
||||
) / m[0];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t _level>
|
||||
class VarMomentsDecimal
|
||||
{
|
||||
public:
|
||||
using NativeType = typename T::NativeType;
|
||||
|
||||
void add(NativeType x)
|
||||
{
|
||||
++m0;
|
||||
getM(1) += x;
|
||||
|
||||
NativeType tmp;
|
||||
bool overflow = common::mulOverflow(x, x, tmp) || common::addOverflow(getM(2), tmp, getM(2));
|
||||
if constexpr (_level >= 3)
|
||||
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(3), tmp, getM(3));
|
||||
if constexpr (_level >= 4)
|
||||
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(4), tmp, getM(4));
|
||||
|
||||
if (overflow)
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
}
|
||||
|
||||
void merge(const VarMomentsDecimal & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
getM(1) += rhs.getM(1);
|
||||
|
||||
bool overflow = common::addOverflow(getM(2), rhs.getM(2), getM(2));
|
||||
if constexpr (_level >= 3)
|
||||
overflow = overflow || common::addOverflow(getM(3), rhs.getM(3), getM(3));
|
||||
if constexpr (_level >= 4)
|
||||
overflow = overflow || common::addOverflow(getM(4), rhs.getM(4), getM(4));
|
||||
|
||||
if (overflow)
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const { writePODBinary(*this, buf); }
|
||||
void read(ReadBuffer & buf) { readPODBinary(*this, buf); }
|
||||
|
||||
Float64 getPopulation(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / m0), scale));
|
||||
}
|
||||
|
||||
Float64 getSample(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
if (m0 == 1)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / (m0 - 1)), scale));
|
||||
}
|
||||
|
||||
Float64 getMoment3(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(2 * getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(3 * getM(2), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(getM(3), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
|
||||
}
|
||||
|
||||
Float64 getMoment4(UInt32 scale) const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<Float64>::infinity();
|
||||
|
||||
NativeType tmp;
|
||||
if (common::mulOverflow(3 * getM(1), getM(1), tmp) ||
|
||||
common::subOverflow(6 * getM(2), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(4 * getM(3), NativeType(tmp / m0), tmp) ||
|
||||
common::mulOverflow(tmp, getM(1), tmp) ||
|
||||
common::subOverflow(getM(4), NativeType(tmp / m0), tmp))
|
||||
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
|
||||
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
|
||||
}
|
||||
|
||||
private:
|
||||
UInt64 m0{};
|
||||
NativeType m[_level]{};
|
||||
|
||||
NativeType & getM(size_t i) { return m[i - 1]; }
|
||||
const NativeType & getM(size_t i) const { return m[i - 1]; }
|
||||
};
|
||||
|
||||
/**
|
||||
Calculating multivariate central moments
|
||||
Levels:
|
||||
level 2 (pop & samp): covar
|
||||
References:
|
||||
https://en.wikipedia.org/wiki/Moment_(mathematics)
|
||||
*/
|
||||
template <typename T>
|
||||
struct CovarMoments
|
||||
{
|
||||
T m0{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T xy{};
|
||||
|
||||
void add(T x, T y)
|
||||
{
|
||||
++m0;
|
||||
x1 += x;
|
||||
y1 += y;
|
||||
xy += x * y;
|
||||
}
|
||||
|
||||
void merge(const CovarMoments & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
xy += rhs.xy;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED getPopulation() const
|
||||
{
|
||||
return (xy - x1 * y1 / m0) / m0;
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED getSample() const
|
||||
{
|
||||
if (m0 == 0)
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
return (xy - x1 * y1 / m0) / (m0 - 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CorrMoments
|
||||
{
|
||||
T m0{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T xy{};
|
||||
T x2{};
|
||||
T y2{};
|
||||
|
||||
void add(T x, T y)
|
||||
{
|
||||
++m0;
|
||||
x1 += x;
|
||||
y1 += y;
|
||||
xy += x * y;
|
||||
x2 += x * x;
|
||||
y2 += y * y;
|
||||
}
|
||||
|
||||
void merge(const CorrMoments & rhs)
|
||||
{
|
||||
m0 += rhs.m0;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
xy += rhs.xy;
|
||||
x2 += rhs.x2;
|
||||
y2 += rhs.y2;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get() const
|
||||
{
|
||||
return (m0 * xy - x1 * y1) / sqrt((m0 * x2 - x1 * x1) * (m0 * y2 - y1 * y1));
|
||||
}
|
||||
};
|
||||
|
||||
/// Data for calculation of Student and Welch T-Tests.
|
||||
template <typename T>
|
||||
struct TTestMoments
|
||||
{
|
||||
T nx{};
|
||||
T ny{};
|
||||
T x1{};
|
||||
T y1{};
|
||||
T x2{};
|
||||
T y2{};
|
||||
|
||||
void addX(T value)
|
||||
{
|
||||
++nx;
|
||||
x1 += value;
|
||||
x2 += value * value;
|
||||
}
|
||||
|
||||
void addY(T value)
|
||||
{
|
||||
++ny;
|
||||
y1 += value;
|
||||
y2 += value * value;
|
||||
}
|
||||
|
||||
void merge(const TTestMoments & rhs)
|
||||
{
|
||||
nx += rhs.nx;
|
||||
ny += rhs.ny;
|
||||
x1 += rhs.x1;
|
||||
y1 += rhs.y1;
|
||||
x2 += rhs.x2;
|
||||
y2 += rhs.y2;
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writePODBinary(*this, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readPODBinary(*this, buf);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -114,7 +114,7 @@ class QuantileTDigest
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
|
||||
using Transform = RadixSortFloatTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
using Allocator = RadixSortAllocator;
|
||||
|
||||
/// The function to get the key from an array element.
|
||||
static Key & extractKey(Element & elem) { return elem.mean; }
|
||||
|
114
src/AggregateFunctions/StatCommon.h
Normal file
114
src/AggregateFunctions/StatCommon.h
Normal file
@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename F>
|
||||
static Float64 integrateSimpson(Float64 a, Float64 b, F && func)
|
||||
{
|
||||
const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b) - std::round(a)));
|
||||
const long double h = (b - a) / iterations;
|
||||
Float64 sum_odds = 0.0;
|
||||
for (size_t i = 1; i < iterations; i += 2)
|
||||
sum_odds += func(a + i * h);
|
||||
Float64 sum_evens = 0.0;
|
||||
for (size_t i = 2; i < iterations; i += 2)
|
||||
sum_evens += func(a + i * h);
|
||||
return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3;
|
||||
}
|
||||
|
||||
/// Because ranks are adjusted, we have to store each of them in Float type.
|
||||
using RanksArray = std::vector<Float64>;
|
||||
|
||||
template <typename Values>
|
||||
std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & values)
|
||||
{
|
||||
const size_t size = values.size();
|
||||
/// Save initial positions, than sort indices according to the values.
|
||||
std::vector<size_t> indexes(size);
|
||||
std::iota(indexes.begin(), indexes.end(), 0);
|
||||
std::sort(indexes.begin(), indexes.end(),
|
||||
[&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
|
||||
|
||||
size_t left = 0;
|
||||
Float64 tie_numenator = 0;
|
||||
RanksArray out(size);
|
||||
while (left < size)
|
||||
{
|
||||
size_t right = left;
|
||||
while (right < size && values[indexes[left]] == values[indexes[right]])
|
||||
++right;
|
||||
auto adjusted = (left + right + 1.) / 2.;
|
||||
auto count_equal = right - left;
|
||||
tie_numenator += std::pow(count_equal, 3) - count_equal;
|
||||
for (size_t iter = left; iter < right; ++iter)
|
||||
out[indexes[iter]] = adjusted;
|
||||
left = right;
|
||||
}
|
||||
return {out, 1 - (tie_numenator / (std::pow(size, 3) - size))};
|
||||
}
|
||||
|
||||
|
||||
template <typename X, typename Y>
|
||||
struct StatisticalSample
|
||||
{
|
||||
using AllocatorXSample = MixedAlignedArenaAllocator<alignof(X), 4096>;
|
||||
using SampleX = PODArray<X, 32, AllocatorXSample>;
|
||||
|
||||
using AllocatorYSample = MixedAlignedArenaAllocator<alignof(Y), 4096>;
|
||||
using SampleY = PODArray<Y, 32, AllocatorYSample>;
|
||||
|
||||
SampleX x{};
|
||||
SampleY y{};
|
||||
size_t size_x{0};
|
||||
size_t size_y{0};
|
||||
|
||||
void addX(X value, Arena * arena)
|
||||
{
|
||||
++size_x;
|
||||
x.push_back(value, arena);
|
||||
}
|
||||
|
||||
void addY(Y value, Arena * arena)
|
||||
{
|
||||
++size_y;
|
||||
y.push_back(value, arena);
|
||||
}
|
||||
|
||||
void merge(const StatisticalSample & rhs, Arena * arena)
|
||||
{
|
||||
size_x += rhs.size_x;
|
||||
size_y += rhs.size_y;
|
||||
x.insert(rhs.x.begin(), rhs.x.end(), arena);
|
||||
y.insert(rhs.y.begin(), rhs.y.end(), arena);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writeVarUInt(size_x, buf);
|
||||
writeVarUInt(size_y, buf);
|
||||
buf.write(reinterpret_cast<const char *>(x.data()), size_x * sizeof(x[0]));
|
||||
buf.write(reinterpret_cast<const char *>(y.data()), size_y * sizeof(y[0]));
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf, Arena * arena)
|
||||
{
|
||||
readVarUInt(size_x, buf);
|
||||
readVarUInt(size_y, buf);
|
||||
x.resize(size_x, arena);
|
||||
y.resize(size_y, arena);
|
||||
buf.read(reinterpret_cast<char *>(x.data()), size_x * sizeof(x[0]));
|
||||
buf.read(reinterpret_cast<char *>(y.data()), size_y * sizeof(y[0]));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -32,16 +32,16 @@ void registerAggregateFunctionsBitmap(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionHistogram(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRetention(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionMLMethod(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionEntropy(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionSimpleLinearRegression(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionMoving(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionCategoricalIV(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionAggThrow(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
|
||||
|
||||
class AggregateFunctionCombinatorFactory;
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||
@ -86,7 +86,6 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionsMaxIntersections(factory);
|
||||
registerAggregateFunctionHistogram(factory);
|
||||
registerAggregateFunctionRetention(factory);
|
||||
registerAggregateFunctionTimeSeriesGroupSum(factory);
|
||||
registerAggregateFunctionMLMethod(factory);
|
||||
registerAggregateFunctionEntropy(factory);
|
||||
registerAggregateFunctionSimpleLinearRegression(factory);
|
||||
@ -94,6 +93,9 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionCategoricalIV(factory);
|
||||
registerAggregateFunctionAggThrow(factory);
|
||||
registerAggregateFunctionRankCorrelation(factory);
|
||||
registerAggregateFunctionMannWhitney(factory);
|
||||
registerAggregateFunctionWelchTTest(factory);
|
||||
registerAggregateFunctionStudentTTest(factory);
|
||||
}
|
||||
|
||||
{
|
||||
|
27
src/AggregateFunctions/tests/gtest_ranks.cpp
Normal file
27
src/AggregateFunctions/tests/gtest_ranks.cpp
Normal file
@ -0,0 +1,27 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <iostream>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
||||
TEST(Ranks, Simple)
|
||||
{
|
||||
using namespace DB;
|
||||
RanksArray sample = {310, 195, 480, 530, 155, 530, 245, 385, 450, 450, 465, 545, 170, 180, 125, 180, 230, 170, 75, 430, 480, 495, 295};
|
||||
|
||||
RanksArray ranks;
|
||||
Float64 t = 0;
|
||||
std::tie(ranks, t) = computeRanksAndTieCorrection(sample);
|
||||
|
||||
RanksArray expected{12.0, 8.0, 18.5, 21.5, 3.0, 21.5, 10.0, 13.0, 15.5, 15.5, 17.0, 23.0, 4.5, 6.5, 2.0, 6.5, 9.0, 4.5, 1.0, 14.0, 18.5, 20.0, 11.0};
|
||||
|
||||
ASSERT_EQ(ranks.size(), expected.size());
|
||||
|
||||
for (size_t i = 0; i < ranks.size(); ++i)
|
||||
ASSERT_DOUBLE_EQ(ranks[i], expected[i]);
|
||||
|
||||
ASSERT_DOUBLE_EQ(t, 0.9975296442687747);
|
||||
}
|
@ -29,6 +29,7 @@ SRCS(
|
||||
AggregateFunctionHistogram.cpp
|
||||
AggregateFunctionIf.cpp
|
||||
AggregateFunctionMLMethod.cpp
|
||||
AggregateFunctionMannWhitney.cpp
|
||||
AggregateFunctionMaxIntersections.cpp
|
||||
AggregateFunctionMerge.cpp
|
||||
AggregateFunctionMinMaxAny.cpp
|
||||
@ -43,13 +44,14 @@ SRCS(
|
||||
AggregateFunctionState.cpp
|
||||
AggregateFunctionStatistics.cpp
|
||||
AggregateFunctionStatisticsSimple.cpp
|
||||
AggregateFunctionStudentTTest.cpp
|
||||
AggregateFunctionSum.cpp
|
||||
AggregateFunctionSumMap.cpp
|
||||
AggregateFunctionTimeSeriesGroupSum.cpp
|
||||
AggregateFunctionTopK.cpp
|
||||
AggregateFunctionUniq.cpp
|
||||
AggregateFunctionUniqCombined.cpp
|
||||
AggregateFunctionUniqUpTo.cpp
|
||||
AggregateFunctionWelchTTest.cpp
|
||||
AggregateFunctionWindowFunnel.cpp
|
||||
UniqCombinedBiasData.cpp
|
||||
UniqVariadicHash.cpp
|
||||
|
@ -223,7 +223,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW
|
||||
Dictionaries/FlatDictionary.cpp
|
||||
Dictionaries/HashedDictionary.cpp
|
||||
Dictionaries/CacheDictionary.cpp
|
||||
Dictionaries/TrieDictionary.cpp
|
||||
Dictionaries/IPAddressDictionary.cpp
|
||||
Dictionaries/RangeHashedDictionary.cpp
|
||||
Dictionaries/ComplexKeyHashedDictionary.cpp
|
||||
Dictionaries/ComplexKeyCacheDictionary.cpp
|
||||
@ -305,7 +305,6 @@ endif()
|
||||
|
||||
dbms_target_link_libraries (
|
||||
PRIVATE
|
||||
${BTRIE_LIBRARIES}
|
||||
boost::filesystem
|
||||
boost::program_options
|
||||
clickhouse_common_config
|
||||
@ -382,6 +381,10 @@ if (USE_PROTOBUF)
|
||||
dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR})
|
||||
endif ()
|
||||
|
||||
if (USE_GRPC)
|
||||
dbms_target_link_libraries (PUBLIC clickhouse_grpc_protos)
|
||||
endif()
|
||||
|
||||
if (USE_HDFS)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ${HDFS3_LIBRARY})
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
|
||||
|
@ -207,6 +207,12 @@ void Connection::receiveHello()
|
||||
/// Receive hello packet.
|
||||
UInt64 packet_type = 0;
|
||||
|
||||
/// Prevent read after eof in readVarUInt in case of reset connection
|
||||
/// (Poco should throw such exception while reading from socket but
|
||||
/// sometimes it doesn't for unknown reason)
|
||||
if (in->eof())
|
||||
throw Poco::Net::NetException("Connection reset by peer");
|
||||
|
||||
readVarUInt(packet_type, *in);
|
||||
if (packet_type == Protocol::Server::Hello)
|
||||
{
|
||||
|
@ -82,7 +82,7 @@ public:
|
||||
|
||||
bool isNumeric() const override { return false; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
bool isFixedAndContiguous() const override { return true; }
|
||||
bool isFixedAndContiguous() const final { return true; }
|
||||
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
|
||||
|
||||
size_t size() const override { return data.size(); }
|
||||
|
@ -523,7 +523,10 @@
|
||||
M(554, LZMA_STREAM_DECODER_FAILED) \
|
||||
M(555, ROCKSDB_ERROR) \
|
||||
M(556, SYNC_MYSQL_USER_ACCESS_ERROR)\
|
||||
M(557, DATABASE_REPLICATION_FAILED) \
|
||||
M(557, UNKNOWN_UNION) \
|
||||
M(558, EXPECTED_ALL_OR_DISTINCT) \
|
||||
M(559, INVALID_GRPC_QUERY_INFO) \
|
||||
M(560, DATABASE_REPLICATION_FAILED) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -1,30 +1,35 @@
|
||||
#include "IPv6ToBinary.h"
|
||||
#include <Poco/Net/IPAddress.h>
|
||||
#include <Poco/ByteOrder.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
|
||||
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res)
|
||||
{
|
||||
std::array<char, 16> res;
|
||||
|
||||
if (Poco::Net::IPAddress::IPv6 == address.family())
|
||||
{
|
||||
memcpy(res.data(), address.addr(), 16);
|
||||
memcpy(res, address.addr(), 16);
|
||||
}
|
||||
else if (Poco::Net::IPAddress::IPv4 == address.family())
|
||||
{
|
||||
/// Convert to IPv6-mapped address.
|
||||
memset(res.data(), 0, 10);
|
||||
memset(res, 0, 10);
|
||||
res[10] = '\xFF';
|
||||
res[11] = '\xFF';
|
||||
memcpy(&res[12], address.addr(), 4);
|
||||
}
|
||||
else
|
||||
memset(res.data(), 0, 16);
|
||||
memset(res, 0, 16);
|
||||
}
|
||||
|
||||
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
|
||||
{
|
||||
std::array<char, 16> res;
|
||||
IPv6ToRawBinary(address, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -1,11 +1,16 @@
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <common/types.h>
|
||||
|
||||
namespace Poco { namespace Net { class IPAddress; }}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Convert IP address to raw binary with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
|
||||
/// Saves result into the first 16 bytes of `res`.
|
||||
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res);
|
||||
|
||||
/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
|
||||
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address);
|
||||
|
||||
|
21
src/Common/OpenTelemetryTraceContext.h
Normal file
21
src/Common/OpenTelemetryTraceContext.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
// The runtime info we need to create new OpenTelemetry spans.
|
||||
struct OpenTelemetryTraceContext
|
||||
{
|
||||
__uint128_t trace_id = 0;
|
||||
UInt64 span_id = 0;
|
||||
// The incoming tracestate header and the trace flags, we just pass them
|
||||
// downstream. See https://www.w3.org/TR/trace-context/
|
||||
String tracestate;
|
||||
__uint8_t trace_flags = 0;
|
||||
|
||||
// Parse/compose OpenTelemetry traceparent header.
|
||||
bool parseTraceparentHeader(const std::string & traceparent, std::string & error);
|
||||
std::string composeTraceparentHeader() const;
|
||||
};
|
||||
|
||||
}
|
@ -97,9 +97,6 @@
|
||||
M(DistributedConnectionStaleReplica, "") \
|
||||
M(DistributedConnectionFailAtAll, "Total count when distributed connection fails after all retries finished") \
|
||||
\
|
||||
M(CompileAttempt, "Number of times a compilation of generated C++ code was initiated.") \
|
||||
M(CompileSuccess, "Number of times a compilation of generated C++ code was successful.") \
|
||||
\
|
||||
M(CompileFunction, "Number of times a compilation of generated LLVM code (to create fused function for complex expressions) was initiated.") \
|
||||
M(CompiledFunctionExecute, "Number of times a compiled function was executed.") \
|
||||
M(CompileExpressionsMicroseconds, "Total time spent for compilation of expressions to LLVM code.") \
|
||||
|
@ -176,7 +176,7 @@ template class QueryProfilerBase<QueryProfilerReal>;
|
||||
template class QueryProfilerBase<QueryProfilerCpu>;
|
||||
|
||||
QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period)
|
||||
: QueryProfilerBase(thread_id, CLOCK_REALTIME, period, SIGUSR1)
|
||||
: QueryProfilerBase(thread_id, CLOCK_MONOTONIC, period, SIGUSR1)
|
||||
{}
|
||||
|
||||
void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)
|
||||
|
@ -35,16 +35,16 @@
|
||||
|
||||
/** Used as a template parameter. See below.
|
||||
*/
|
||||
struct RadixSortMallocAllocator
|
||||
struct RadixSortAllocator
|
||||
{
|
||||
void * allocate(size_t size)
|
||||
{
|
||||
return malloc(size);
|
||||
return ::operator new(size);
|
||||
}
|
||||
|
||||
void deallocate(void * ptr, size_t /*size*/)
|
||||
void deallocate(void * ptr, size_t size)
|
||||
{
|
||||
return free(ptr);
|
||||
::operator delete(ptr, size);
|
||||
}
|
||||
};
|
||||
|
||||
@ -100,7 +100,7 @@ struct RadixSortFloatTraits
|
||||
/// An object with the functions allocate and deallocate.
|
||||
/// Can be used, for example, to allocate memory for a temporary array on the stack.
|
||||
/// To do this, the allocator itself is created on the stack.
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
using Allocator = RadixSortAllocator;
|
||||
|
||||
/// The function to get the key from an array element.
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
@ -139,7 +139,7 @@ struct RadixSortUIntTraits
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
|
||||
using Transform = RadixSortIdentityTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
using Allocator = RadixSortAllocator;
|
||||
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
static Result & extractResult(Element & elem) { return elem; }
|
||||
@ -173,7 +173,7 @@ struct RadixSortIntTraits
|
||||
static constexpr size_t PART_SIZE_BITS = 8;
|
||||
|
||||
using Transform = RadixSortSignedTransform<KeyBits>;
|
||||
using Allocator = RadixSortMallocAllocator;
|
||||
using Allocator = RadixSortAllocator;
|
||||
|
||||
static Key & extractKey(Element & elem) { return elem; }
|
||||
static Result & extractResult(Element & elem) { return elem; }
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user