Merge branch 'master' into database_replicated

This commit is contained in:
Alexander Tokmakov 2020-11-27 17:08:34 +03:00
commit 9e3fd3c170
364 changed files with 15844 additions and 5180 deletions

2
.gitignore vendored
View File

@ -124,3 +124,5 @@ website/package-lock.json
# Toolchains
/cmake/toolchain/*
*.iml

10
.gitmodules vendored
View File

@ -44,6 +44,7 @@
[submodule "contrib/protobuf"]
path = contrib/protobuf
url = https://github.com/ClickHouse-Extras/protobuf.git
branch = v3.13.0.1
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/ClickHouse-Extras/boost.git
@ -107,6 +108,7 @@
[submodule "contrib/grpc"]
path = contrib/grpc
url = https://github.com/ClickHouse-Extras/grpc.git
branch = v1.33.2
[submodule "contrib/aws"]
path = contrib/aws
url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git
@ -198,5 +200,9 @@
url = https://github.com/facebook/rocksdb
branch = v6.14.5
[submodule "contrib/xz"]
path = contrib/xz
url = https://github.com/xz-mirror/xz
path = contrib/xz
url = https://github.com/xz-mirror/xz
[submodule "contrib/abseil-cpp"]
path = contrib/abseil-cpp
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
branch = lts_2020_02_25

View File

@ -475,9 +475,6 @@ find_contrib_lib(cityhash)
find_contrib_lib(farmhash)
set (USE_INTERNAL_BTRIE_LIBRARY ON CACHE INTERNAL "")
find_contrib_lib(btrie)
if (ENABLE_TESTS)
include (cmake/find/gtest.cmake)
endif ()

View File

@ -1,6 +1,6 @@
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.tech)
ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real time.
## Useful Links
@ -14,9 +14,3 @@ ClickHouse is an open-source column-oriented database management system that all
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [The Second ClickHouse Meetup East (online)](https://www.eventbrite.com/e/the-second-clickhouse-meetup-east-tickets-126787955187) on October 31, 2020.
* [ClickHouse for Enterprise Meetup (online in Russian)](https://arenadata-events.timepad.ru/event/1465249/) on November 10, 2020.

View File

@ -3,7 +3,6 @@
/// Macros for convenient usage of Poco logger.
#include <fmt/format.h>
#include <fmt/ostream.h>
#include <Poco/Logger.h>
#include <Poco/Message.h>
#include <Common/CurrentThread.h>

View File

@ -0,0 +1,19 @@
#define _GNU_SOURCE
#include <sys/socket.h>
#include <errno.h>
#include <fcntl.h>
#include "syscall.h"
int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int flg)
{
if (!flg) return accept(fd, addr, len);
int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0);
if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret;
ret = accept(fd, addr, len);
if (ret<0) return ret;
if (flg & SOCK_CLOEXEC)
__syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
if (flg & SOCK_NONBLOCK)
__syscall(SYS_fcntl, ret, F_SETFL, O_NONBLOCK);
return ret;
}

View File

@ -0,0 +1,37 @@
#include <sys/epoll.h>
#include <signal.h>
#include <errno.h>
#include "syscall.h"
int epoll_create(int size)
{
return epoll_create1(0);
}
int epoll_create1(int flags)
{
int r = __syscall(SYS_epoll_create1, flags);
#ifdef SYS_epoll_create
if (r==-ENOSYS && !flags) r = __syscall(SYS_epoll_create, 1);
#endif
return __syscall_ret(r);
}
int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev)
{
return syscall(SYS_epoll_ctl, fd, op, fd2, ev);
}
int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs)
{
int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8);
#ifdef SYS_epoll_wait
if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to);
#endif
return __syscall_ret(r);
}
int epoll_wait(int fd, struct epoll_event *ev, int cnt, int to)
{
return epoll_pwait(fd, ev, cnt, to, 0);
}

View File

@ -0,0 +1,23 @@
#include <sys/eventfd.h>
#include <unistd.h>
#include <errno.h>
#include "syscall.h"
int eventfd(unsigned int count, int flags)
{
int r = __syscall(SYS_eventfd2, count, flags);
#ifdef SYS_eventfd
if (r==-ENOSYS && !flags) r = __syscall(SYS_eventfd, count);
#endif
return __syscall_ret(r);
}
int eventfd_read(int fd, eventfd_t *value)
{
return (sizeof(*value) == read(fd, value, sizeof(*value))) ? 0 : -1;
}
int eventfd_write(int fd, eventfd_t value)
{
return (sizeof(value) == write(fd, &value, sizeof(value))) ? 0 : -1;
}

View File

@ -0,0 +1,45 @@
#include <sys/auxv.h>
#include <unistd.h> // __environ
#include <errno.h>
// We don't have libc struct available here. Compute aux vector manually.
static unsigned long * __auxv = NULL;
static unsigned long __auxv_secure = 0;
static size_t __find_auxv(unsigned long type)
{
size_t i;
for (i = 0; __auxv[i]; i += 2)
{
if (__auxv[i] == type)
return i + 1;
}
return (size_t) -1;
}
__attribute__((constructor)) static void __auxv_init()
{
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
}
unsigned long getauxval(unsigned long type)
{
if (type == AT_SECURE)
return __auxv_secure;
if (__auxv)
{
size_t index = __find_auxv(type);
if (index != ((size_t) -1))
return __auxv[index];
}
errno = ENOENT;
return 0;
}

View File

@ -0,0 +1,8 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <sys/auxv.h>
char * secure_getenv(const char * name)
{
return getauxval(AT_SECURE) ? NULL : getenv(name);
}

View File

@ -13,3 +13,11 @@ long __syscall(syscall_arg_t, ...);
__attribute__((visibility("hidden")))
void *__vdsosym(const char *, const char *);
#define syscall(...) __syscall_ret(__syscall(__VA_ARGS__))
#define socketcall(...) __syscall_ret(__socketcall(__VA_ARGS__))
#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f)
#define socketcall_cp socketcall

View File

@ -40,24 +40,10 @@ static int checkver(Verdef *def, int vsym, const char *vername, char *strings)
#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
extern char** environ;
static Ehdr *eh = NULL;
void *__vdsosym(const char *vername, const char *name);
// We don't have libc struct available here. Compute aux vector manually.
__attribute__((constructor)) static void auxv_init()
{
size_t i, *auxv;
for (i=0; environ[i]; i++);
auxv = (void *)(environ+i+1);
for (i=0; auxv[i] != AT_SYSINFO_EHDR; i+=2)
if (!auxv[i]) return;
if (!auxv[i+1]) return;
eh = (void *)auxv[i+1];
}
void *__vdsosym(const char *vername, const char *name)
{
size_t i;
Ehdr * eh = (void *) getauxval(AT_SYSINFO_EHDR);
if (!eh) return 0;
Phdr *ph = (void *)((char *)eh + eh->e_phoff);
size_t *dynv=0, base=-1;

View File

@ -1,44 +0,0 @@
# - Try to find btrie headers and libraries.
#
# Usage of this module as follows:
#
# find_package(btrie)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# BTRIE_ROOT_DIR Set this variable to the root installation of
# btrie if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# BTRIE_FOUND System has btrie libs/headers
# BTRIE_LIBRARIES The btrie library/libraries
# BTRIE_INCLUDE_DIR The location of btrie headers
find_path(BTRIE_ROOT_DIR
NAMES include/btrie.h
)
find_library(BTRIE_LIBRARIES
NAMES btrie
PATHS ${BTRIE_ROOT_DIR}/lib ${BTRIE_LIBRARIES_PATHS}
)
find_path(BTRIE_INCLUDE_DIR
NAMES btrie.h
PATHS ${BTRIE_ROOT_DIR}/include ${BTRIE_INCLUDE_PATHS}
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(btrie DEFAULT_MSG
BTRIE_LIBRARIES
BTRIE_INCLUDE_DIR
)
mark_as_advanced(
BTRIE_ROOT_DIR
BTRIE_LIBRARIES
BTRIE_INCLUDE_DIR
)

View File

@ -6,11 +6,9 @@ Defines the following variables:
The include directories of the gRPC framework, including the include directories of the C++ wrapper.
``gRPC_LIBRARIES``
The libraries of the gRPC framework.
``gRPC_UNSECURE_LIBRARIES``
The libraries of the gRPC framework without SSL.
``_gRPC_CPP_PLUGIN``
``gRPC_CPP_PLUGIN``
The plugin for generating gRPC client and server C++ stubs from `.proto` files
``_gRPC_PYTHON_PLUGIN``
``gRPC_PYTHON_PLUGIN``
The plugin for generating gRPC client and server Python stubs from `.proto` files
The following :prop_tgt:`IMPORTED` targets are also defined:
@ -19,6 +17,13 @@ The following :prop_tgt:`IMPORTED` targets are also defined:
``grpc_cpp_plugin``
``grpc_python_plugin``
Set the following variables to adjust the behaviour of this script:
``gRPC_USE_UNSECURE_LIBRARIES``
if set gRPC_LIBRARIES will be filled with the unsecure version of the libraries (i.e. without SSL)
instead of the secure ones.
``gRPC_DEBUG`
if set the debug message will be printed.
Add custom commands to process ``.proto`` files to C++::
protobuf_generate_grpc_cpp(<SRCS> <HDRS>
[DESCRIPTORS <DESC>] [EXPORT_MACRO <MACRO>] [<ARGN>...])
@ -242,6 +247,7 @@ find_library(gRPC_LIBRARY NAMES grpc)
find_library(gRPC_CPP_LIBRARY NAMES grpc++)
find_library(gRPC_UNSECURE_LIBRARY NAMES grpc_unsecure)
find_library(gRPC_CPP_UNSECURE_LIBRARY NAMES grpc++_unsecure)
find_library(gRPC_CARES_LIBRARY NAMES cares)
set(gRPC_LIBRARIES)
if(gRPC_USE_UNSECURE_LIBRARIES)
@ -259,6 +265,7 @@ else()
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_LIBRARY})
endif()
endif()
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CARES_LIBRARY})
# Restore the original find library ordering.
if(gRPC_USE_STATIC_LIBS)
@ -278,11 +285,11 @@ else()
endif()
# Get full path to plugin.
find_program(_gRPC_CPP_PLUGIN
find_program(gRPC_CPP_PLUGIN
NAMES grpc_cpp_plugin
DOC "The plugin for generating gRPC client and server C++ stubs from `.proto` files")
find_program(_gRPC_PYTHON_PLUGIN
find_program(gRPC_PYTHON_PLUGIN
NAMES grpc_python_plugin
DOC "The plugin for generating gRPC client and server Python stubs from `.proto` files")
@ -317,14 +324,14 @@ endif()
#include(FindPackageHandleStandardArgs.cmake)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(gRPC
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR _gRPC_CPP_PLUGIN _gRPC_PYTHON_PLUGIN)
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY gRPC_CARES_LIBRARY
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR gRPC_CPP_PLUGIN gRPC_PYTHON_PLUGIN)
if(gRPC_FOUND)
if(gRPC_DEBUG)
message(STATUS "gRPC: INCLUDE_DIRS=${gRPC_INCLUDE_DIRS}")
message(STATUS "gRPC: LIBRARIES=${gRPC_LIBRARIES}")
message(STATUS "gRPC: CPP_PLUGIN=${_gRPC_CPP_PLUGIN}")
message(STATUS "gRPC: PYTHON_PLUGIN=${_gRPC_PYTHON_PLUGIN}")
message(STATUS "gRPC: CPP_PLUGIN=${gRPC_CPP_PLUGIN}")
message(STATUS "gRPC: PYTHON_PLUGIN=${gRPC_PYTHON_PLUGIN}")
endif()
endif()

View File

@ -1,3 +1,4 @@
# Needed when using Apache Avro serialization format
option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES})
if (NOT ENABLE_AVRO)

View File

@ -37,8 +37,8 @@ if(NOT USE_INTERNAL_GRPC_LIBRARY)
if(NOT gRPC_INCLUDE_DIRS OR NOT gRPC_LIBRARIES)
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system gRPC library")
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
elseif(NOT _gRPC_CPP_PLUGIN)
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grcp_cpp_plugin")
elseif(NOT gRPC_CPP_PLUGIN)
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grpc_cpp_plugin")
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
else()
set(EXTERNAL_GRPC_LIBRARY_FOUND 1)
@ -53,8 +53,8 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
else()
set(gRPC_LIBRARIES grpc grpc++)
endif()
set(_gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
set(_gRPC_PROTOC_EXECUTABLE $<TARGET_FILE:protobuf::protoc>)
set(gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
set(gRPC_PYTHON_PLUGIN $<TARGET_FILE:grpc_python_plugin>)
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
@ -62,4 +62,4 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
set(USE_GRPC 1)
endif()
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${_gRPC_CPP_PLUGIN}")
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${gRPC_CPP_PLUGIN}")

View File

@ -1,3 +1,5 @@
# Needed when securely connecting to an external server, e.g.
# clickhouse-client --host ... --secure
option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
if(NOT ENABLE_SSL)

View File

@ -66,10 +66,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
add_subdirectory (libfarmhash)
endif ()
if (USE_INTERNAL_BTRIE_LIBRARY)
add_subdirectory (libbtrie)
endif ()
if (USE_INTERNAL_ZLIB_LIBRARY)
set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")

1
contrib/abseil-cpp vendored Submodule

@ -0,0 +1 @@
Subproject commit 4f3b686f86c3ebaba7e4e926e62a79cb1c659a54

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit a6570b863cf76c9699580ba51c7827d5bffaac43
Subproject commit 7436366ceb341ba5c00ea29f1645e02a2b70bf93

View File

@ -1,6 +1,7 @@
set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
# Use re2 from ClickHouse contrib, not from gRPC third_party.
if(NOT RE2_INCLUDE_DIR)
message(FATAL_ERROR " grpc: The location of the \"re2\" library is unknown")
endif()
@ -8,6 +9,7 @@ set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_RE2_INCLUDE_DIR "${RE2_INCLUDE_DIR}")
set(_gRPC_RE2_LIBRARIES "${RE2_LIBRARY}")
# Use zlib from ClickHouse contrib, not from gRPC third_party.
if(NOT ZLIB_INCLUDE_DIRS)
message(FATAL_ERROR " grpc: The location of the \"zlib\" library is unknown")
endif()
@ -15,6 +17,7 @@ set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_INCLUDE_DIRS}")
set(_gRPC_ZLIB_LIBRARIES "${ZLIB_LIBRARIES}")
# Use protobuf from ClickHouse contrib, not from gRPC third_party.
if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY)
message(FATAL_ERROR " grpc: The location of the \"protobuf\" library is unknown")
elseif (NOT Protobuf_PROTOC_EXECUTABLE)
@ -29,21 +32,33 @@ set(_gRPC_PROTOBUF_PROTOC "protoc")
set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}")
set(_gRPC_PROTOBUF_PROTOC_LIBRARIES "${Protobuf_PROTOC_LIBRARY}")
# Use OpenSSL from ClickHouse contrib, not from gRPC third_party.
set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR})
set(_gRPC_SSL_LIBRARIES ${OPENSSL_LIBRARIES})
# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party.
set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt")
message(FATAL_ERROR " grpc: submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
endif()
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
# Choose to build static or shared library for c-ares.
if (MAKE_STATIC_LIBRARIES)
set(CARES_STATIC ON CACHE BOOL "" FORCE)
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
else ()
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
set(CARES_SHARED ON CACHE BOOL "" FORCE)
endif ()
# We don't want to build C# extensions.
set(gRPC_BUILD_CSHARP_EXT OFF)
# We don't want to build abseil tests, so we temporarily switch BUILD_TESTING off.
set(_gRPC_ORIG_BUILD_TESTING ${BUILD_TESTING})
set(BUILD_TESTING OFF)
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
set(BUILD_TESTING ${_gRPC_ORIG_BUILD_TESTING})
# The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes,
# so we need to redefine it back.
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")

View File

@ -1,6 +0,0 @@
add_library(btrie
src/btrie.c
include/btrie.h
)
target_include_directories (btrie SYSTEM PUBLIC include)

View File

@ -1,23 +0,0 @@
Copyright (c) 2013, CobbLiu
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,160 +0,0 @@
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
#include <stdlib.h>
#include <stdint.h>
/**
* In btrie, each leaf means one bit in ip tree.
* Left means 0, and right means 1.
*/
#define BTRIE_NULL (uintptr_t) -1
#if !defined(BTRIE_MAX_PAGES)
/// 54 ip per page. 8 bytes memory per page when empty
#define BTRIE_MAX_PAGES 1024 * 2048 /// 128m ips , ~16mb ram when empty
// #define BTRIE_MAX_PAGES 1024 * 65535 /// 4g ips (whole ipv4), ~512mb ram when empty
#endif
typedef struct btrie_node_s btrie_node_t;
struct btrie_node_s {
btrie_node_t *right;
btrie_node_t *left;
btrie_node_t *parent;
uintptr_t value;
};
typedef struct btrie_s {
btrie_node_t *root;
btrie_node_t *free; /* free list of btrie */
char *start;
size_t size;
/*
* memory pool.
* memory management(esp free) will be so easy by using this facility.
*/
char *pools[BTRIE_MAX_PAGES];
size_t len;
} btrie_t;
/**
* Create an empty btrie
*
* @Return:
* An ip radix_tree created.
* NULL if creation failed.
*/
btrie_t *btrie_create();
/**
* Destroy the ip radix_tree
*
* @Return:
* OK if deletion succeed.
* ERROR if error occurs while deleting.
*/
int btrie_destroy(btrie_t *tree);
/**
* Count the nodes in the radix tree.
*/
size_t btrie_count(btrie_t *tree);
/**
* Return the allocated number of bytes.
*/
size_t btrie_allocated(btrie_t *tree);
/**
* Add an ipv4 into btrie
*
* @Args:
* key: ip address
* mask: key's mask
* value: value of this IP, may be NULL.
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
uintptr_t value);
/**
* Delete an ipv4 from btrie
*
* @Args:
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
/**
* Find an ipv4 from btrie
*
* @Args:
*
* @Return:
* Value if succeed.
* NULL if failed.
*/
uintptr_t btrie_find(btrie_t *tree, uint32_t key);
/**
* Add an ipv6 into btrie
*
* @Args:
* key: ip address
* mask: key's mask
* value: value of this IP, may be NULL.
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
uintptr_t value);
/**
* Delete an ipv6 from btrie
*
* @Args:
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
/**
* Find an ipv6 from btrie
*
* @Args:
*
* @Return:
* Value if succeed.
* NULL if failed.
*/
uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
#if defined (__cplusplus)
}
#endif

View File

@ -1,460 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include <btrie.h>
#define PAGE_SIZE 4096
static btrie_node_t *
btrie_alloc(btrie_t *tree)
{
btrie_node_t *p;
if (tree->free) {
p = tree->free;
tree->free = tree->free->right;
return p;
}
if (tree->size < sizeof(btrie_node_t)) {
tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
if (tree->start == NULL) {
return NULL;
}
tree->pools[tree->len++] = tree->start;
tree->size = PAGE_SIZE;
}
p = (btrie_node_t *) tree->start;
tree->start += sizeof(btrie_node_t);
tree->size -= sizeof(btrie_node_t);
return p;
}
btrie_t *
btrie_create()
{
btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
if (tree == NULL) {
return NULL;
}
tree->free = NULL;
tree->start = NULL;
tree->size = 0;
memset(tree->pools, 0, sizeof(btrie_t *) * BTRIE_MAX_PAGES);
tree->len = 0;
tree->root = btrie_alloc(tree);
if (tree->root == NULL) {
return NULL;
}
tree->root->right = NULL;
tree->root->left = NULL;
tree->root->parent = NULL;
tree->root->value = BTRIE_NULL;
return tree;
}
static size_t
subtree_weight(btrie_node_t *node)
{
size_t weight = 1;
if (node->left) {
weight += subtree_weight(node->left);
}
if (node->right) {
weight += subtree_weight(node->right);
}
return weight;
}
size_t
btrie_count(btrie_t *tree)
{
if (tree->root == NULL) {
return 0;
}
return subtree_weight(tree->root);
}
size_t
btrie_allocated(btrie_t *tree)
{
return tree->len * PAGE_SIZE;
}
int
btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
uintptr_t value)
{
uint32_t bit;
btrie_node_t *node, *next;
bit = 0x80000000;
node = tree->root;
next = tree->root;
while (bit & mask) {
if (key & bit) {
next = node->right;
} else {
next = node->left;
}
if (next == NULL) {
break;
}
bit >>= 1;
node = next;
}
if (next) {
if (node->value != BTRIE_NULL) {
return -1;
}
node->value = value;
return 0;
}
while (bit & mask) {
next = btrie_alloc(tree);
if (next == NULL) {
return -1;
}
next->right = NULL;
next->left = NULL;
next->parent = node;
next->value = BTRIE_NULL;
if (key & bit) {
node->right = next;
} else {
node->left = next;
}
bit >>= 1;
node = next;
}
node->value = value;
return 0;
}
int
btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
{
uint32_t bit;
btrie_node_t *node;
bit = 0x80000000;
node = tree->root;
while (node && (bit & mask)) {
if (key & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
}
if (node == NULL) {
return -1;
}
if (node->right || node->left) {
if (node->value != BTRIE_NULL) {
node->value = BTRIE_NULL;
return 0;
}
return -1;
}
for ( ;; ) {
if (node->parent->right == node) {
node->parent->right = NULL;
} else {
node->parent->left = NULL;
}
node->right = tree->free;
tree->free = node;
node = node->parent;
if (node->right || node->left) {
break;
}
if (node->value != BTRIE_NULL) {
break;
}
if (node->parent == NULL) {
break;
}
}
return 0;
}
uintptr_t
btrie_find(btrie_t *tree, uint32_t key)
{
uint32_t bit;
uintptr_t value;
btrie_node_t *node;
bit = 0x80000000;
value = BTRIE_NULL;
node = tree->root;
while (node) {
if (node->value != BTRIE_NULL) {
value = node->value;
}
if (key & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
}
return value;
}
int
btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
uintptr_t value)
{
uint8_t bit;
unsigned int i;
btrie_node_t *node, *next;
i = 0;
bit = 0x80;
node = tree->root;
next = tree->root;
while (bit & mask[i]) {
if (key[i] & bit) {
next = node->right;
} else {
next = node->left;
}
if (next == NULL) {
break;
}
bit >>= 1;
node = next;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
if (next) {
if (node->value != BTRIE_NULL) {
return -1;
}
node->value = value;
return 0;
}
while (bit & mask[i]) {
next = btrie_alloc(tree);
if (next == NULL) {
return -1;
}
next->right = NULL;
next->left = NULL;
next->parent = node;
next->value = BTRIE_NULL;
if (key[i] & bit) {
node->right = next;
} else {
node->left = next;
}
bit >>= 1;
node = next;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
node->value = value;
return 0;
}
int
btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
{
uint8_t bit;
unsigned int i;
btrie_node_t *node;
i = 0;
bit = 0x80;
node = tree->root;
while (node && (bit & mask[i])) {
if (key[i] & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
if (node == NULL) {
return -1;
}
if (node->right || node->left) {
if (node->value != BTRIE_NULL) {
node->value = BTRIE_NULL;
return 0;
}
return -1;
}
for ( ;; ) {
if (node->parent->right == node) {
node->parent->right = NULL;
} else {
node->parent->left = NULL;
}
node->right = tree->free;
tree->free = node;
node = node->parent;
if (node->right || node->left) {
break;
}
if (node->value != BTRIE_NULL) {
break;
}
if (node->parent == NULL) {
break;
}
}
return 0;
}
uintptr_t
btrie_find_a6(btrie_t *tree, const uint8_t *key)
{
uint8_t bit;
uintptr_t value;
unsigned int i;
btrie_node_t *node;
i = 0;
bit = 0x80;
value = BTRIE_NULL;
node = tree->root;
while (node) {
if (node->value != BTRIE_NULL) {
value = node->value;
}
if (key[i] & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
if (bit == 0) {
i++;
bit = 0x80;
}
}
return value;
}
int
btrie_destroy(btrie_t *tree)
{
size_t i;
/* free memory pools */
for (i = 0; i < tree->len; i++) {
free(tree->pools[i]);
}
free(tree);
return 0;
}

View File

@ -1,103 +0,0 @@
#include <stdio.h>
#include <btrie.h>
int main()
{
btrie_t *it;
int ret;
uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
it = btrie_create();
if (it == NULL) {
printf("create error!\n");
return 0;
}
//add 101.45.69.50/16
ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
if (ret != 0) {
printf("insert 1 error.\n");
goto error;
}
//add 10.45.69.50/16
ret = btrie_insert(it, 170738994, 0xffff0000, 1);
if (ret != 0) {
printf("insert 2 error.\n");
goto error;
}
//add 10.45.79.50/16
ret = btrie_insert(it, 170741554, 0xffff0000, 1);
if (ret == 0) {
printf("insert 3 error.\n");
goto error;
}
//add 102.45.79.50/24
ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
if (ret != 0) {
printf("insert 4 error.\n");
goto error;
}
ret = btrie_find(it, 170741554);
if (ret == 1) {
printf("test case 1 passed\n");
} else {
printf("test case 1 error\n");
}
ret = btrie_find(it, 170786817);
if (ret != 1) {
printf("test case 2 passed\n");
} else {
printf("test case 2 error\n");
}
ret = btrie_delete(it, 1714245426, 0xffffff00);
if (ret != 0) {
printf("delete 1 error\n");
goto error;
}
ret = btrie_find(it, 1714245426);
if (ret != 1) {
printf("test case 3 passed\n");
} else {
printf("test case 3 error\n");
}
//add dead:beef::/32
ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
if (ret != 0) {
printf("insert 5 error\n");
goto error;
}
ret = btrie_find_a6(it, ip_v6);
if (ret == 1) {
printf("test case 4 passed\n");
} else {
printf("test case 4 error\n");
}
// insert 4m ips
for (size_t ip = 1; ip < 1024 * 1024 * 4; ++ip) {
ret = btrie_insert(it, ip, 0xffffffff, 1);
if (ret != 0) {
printf("insert 5 error (%d) (%zu) .\n", ret, ip);
goto error;
}
}
return 0;
error:
btrie_destroy(it);
printf("test failed\n");
return 1;
}

View File

@ -22,7 +22,16 @@ set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-st
set(LIBUNWIND_ASM_SOURCES
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S)
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1]
# Workaround these two issues by compiling as C.
#
# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771
if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
else()
enable_language(ASM)
endif()
set(LIBUNWIND_SOURCES
${LIBUNWIND_CXX_SOURCES}

2
contrib/protobuf vendored

@ -1 +1 @@
Subproject commit 445d1ae73a450b1e94622e7040989aa2048402e3
Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee

View File

@ -67,26 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
fi
is_running()
{
pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
}
wait_for_done()
{
timeout=$1
attempts=0
while is_running; do
attempts=$(($attempts + 1))
if [ -n "$timeout" ] && [ $attempts -gt $timeout ]; then
return 1
fi
sleep 1
done
}
die()
{
echo $1 >&2
@ -105,49 +85,7 @@ check_config()
initdb()
{
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then
die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}";
fi
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
else
CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR
fi
if ! getent passwd ${CLICKHOUSE_USER} >/dev/null; then
echo "Can't chown to non-existing user ${CLICKHOUSE_USER}"
return
fi
if ! getent group ${CLICKHOUSE_GROUP} >/dev/null; then
echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}"
return
fi
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then
echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]"
fi
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then
if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") = "/" ]; then
echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown."
else
if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then
echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
fi
echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
fi
fi
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then
echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/*
echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]"
chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
fi
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
@ -171,17 +109,7 @@ restart()
forcestop()
{
local EXIT_STATUS
EXIT_STATUS=0
echo -n "Stop forcefully $PROGRAM service: "
kill -KILL $(cat "$CLICKHOUSE_PIDFILE")
wait_for_done
echo "DONE"
return $EXIT_STATUS
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
}
@ -261,16 +189,16 @@ main()
service_or_func restart
;;
condstart)
is_running || service_or_func start
service_or_func start
;;
condstop)
is_running && service_or_func stop
service_or_func stop
;;
condrestart)
is_running && service_or_func restart
service_or_func restart
;;
condreload)
is_running && service_or_func restart
service_or_func restart
;;
initdb)
initdb
@ -293,17 +221,7 @@ main()
status()
{
if is_running; then
echo "$PROGRAM service is running"
exit 0
else
if is_cron_disabled; then
echo "$PROGRAM service is stopped";
else
echo "$PROGRAM: process unexpectedly terminated"
fi
exit 3
fi
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
}

View File

@ -56,6 +56,7 @@ RUN apt-get update \
libprotoc-dev \
libgrpc++-dev \
protobuf-compiler-grpc \
libc-ares-dev \
rapidjson-dev \
libsnappy-dev \
libparquet-dev \

View File

@ -104,223 +104,248 @@ function start_server
function clone_root
{
git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
(
cd "$FASTTEST_SOURCE"
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
git checkout FETCH_HEAD
echo 'Clonned merge head'
else
git fetch
git checkout "$COMMIT_SHA"
echo 'Checked out to commit'
fi
else
if [ -v COMMIT_SHA ]; then
git checkout "$COMMIT_SHA"
fi
fi
)
(
cd "$FASTTEST_SOURCE"
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
git checkout FETCH_HEAD
echo 'Clonned merge head'
else
git fetch
git checkout "$COMMIT_SHA"
echo 'Checked out to commit'
fi
else
if [ -v COMMIT_SHA ]; then
git checkout "$COMMIT_SHA"
fi
fi
)
}
function clone_submodules
{
(
cd "$FASTTEST_SOURCE"
(
cd "$FASTTEST_SOURCE"
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring contrib/miniselect contrib/xz)
SUBMODULES_TO_UPDATE=(
contrib/boost
contrib/zlib-ng
contrib/libxml2
contrib/poco
contrib/libunwind
contrib/ryu
contrib/fmtlib
contrib/base64
contrib/cctz
contrib/libcpuid
contrib/double-conversion
contrib/libcxx
contrib/libcxxabi
contrib/libc-headers
contrib/lz4
contrib/zstd
contrib/fastops
contrib/rapidjson
contrib/re2
contrib/sparsehash-c11
contrib/croaring
contrib/miniselect
contrib/xz
)
git submodule sync
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
git submodule foreach git reset --hard
git submodule foreach git checkout @ -f
git submodule foreach git clean -xfd
)
git submodule sync
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
git submodule foreach git reset --hard
git submodule foreach git checkout @ -f
git submodule foreach git clean -xfd
)
}
function run_cmake
{
CMAKE_LIBS_CONFIG=(
"-DENABLE_LIBRARIES=0"
"-DENABLE_TESTS=0"
"-DENABLE_UTILS=0"
"-DENABLE_EMBEDDED_COMPILER=0"
"-DENABLE_THINLTO=0"
"-DUSE_UNWIND=1"
)
CMAKE_LIBS_CONFIG=(
"-DENABLE_LIBRARIES=0"
"-DENABLE_TESTS=0"
"-DENABLE_UTILS=0"
"-DENABLE_EMBEDDED_COMPILER=0"
"-DENABLE_THINLTO=0"
"-DUSE_UNWIND=1"
)
# TODO remove this? we don't use ccache anyway. An option would be to download it
# from S3 simultaneously with cloning.
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
export CCACHE_NOHASHDIR=true
export CCACHE_COMPILERCHECK=content
export CCACHE_MAXSIZE=15G
# TODO remove this? we don't use ccache anyway. An option would be to download it
# from S3 simultaneously with cloning.
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
export CCACHE_NOHASHDIR=true
export CCACHE_COMPILERCHECK=content
export CCACHE_MAXSIZE=15G
ccache --show-stats ||:
ccache --zero-stats ||:
ccache --show-stats ||:
ccache --zero-stats ||:
mkdir "$FASTTEST_BUILD" ||:
mkdir "$FASTTEST_BUILD" ||:
(
cd "$FASTTEST_BUILD"
cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
)
(
cd "$FASTTEST_BUILD"
cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
)
}
function build
{
(
cd "$FASTTEST_BUILD"
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
fi
ccache --show-stats ||:
)
(
cd "$FASTTEST_BUILD"
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
fi
ccache --show-stats ||:
)
}
function configure
{
clickhouse-client --version
clickhouse-test --help
clickhouse-client --version
clickhouse-test --help
mkdir -p "$FASTTEST_DATA"{,/client-config}
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
# doesn't support SSL
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
mkdir -p "$FASTTEST_DATA"{,/client-config}
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
# doesn't support SSL
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
}
function run_tests
{
clickhouse-server --version
clickhouse-test --help
clickhouse-server --version
clickhouse-test --help
# Kill the server in case we are running locally and not in docker
stop_server ||:
start_server
TESTS_TO_SKIP=(
00105_shard_collations
00109_shard_totals_after_having
00110_external_sort
00302_http_compression
00417_kill_query
00436_convert_charset
00490_special_line_separators_and_characters_outside_of_bmp
00652_replicated_mutations_zookeeper
00682_empty_parts_merge
00701_rollup
00834_cancel_http_readonly_queries_on_client_close
00911_tautological_compare
00926_multimatch
00929_multi_match_edit_distance
01031_mutations_interpreter_and_context
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
01083_expressions_in_engine_arguments
01092_memory_profiler
01098_msgpack_format
01098_temporary_and_external_tables
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
01193_metadata_loading
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01251_dict_is_in_infinite_loop
01259_dictionary_custom_settings_ddl
01268_dictionary_direct_layout
01280_ssd_complex_key_dictionary
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01318_encrypt # Depends on OpenSSL
01318_decrypt # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
01305_replica_create_drop_zookeeper
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
_orc_
arrow
avro
base64
brotli
capnproto
client
ddl_dictionaries
h3
hashing
hdfs
java_hash
json
limit_memory
live_view
memory_leak
memory_limit
mysql
odbc
parallel_alter
parquet
protobuf
secure
sha256
xz
# Not sure why these two fail even in sequential mode. Disabled for now
# to make some progress.
00646_url_engine
00974_query_profiler
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
01504_rocksdb
# Look at DistributedFilesToInsert, so cannot run in parallel.
01460_DistributedFilesToInsert
01541_max_memory_usage_for_user
# Require python libraries like scipy, pandas and numpy
01322_ttest_scipy
01545_system_errors
# Checks system.errors
01563_distributed_query_finish
)
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
# substr is to remove semicolon after test name
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
# We will rerun sequentially any tests that have failed during parallel run.
# They might have failed because there was some interference from other tests
# running concurrently. If they fail even in seqential mode, we will report them.
# FIXME All tests that require exclusive access to the server must be
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
# run them in a separate group after all other tests. This is faster and also
# explicit instead of guessing.
if [[ -n "${FAILED_TESTS[*]}" ]]
then
# Kill the server in case we are running locally and not in docker
stop_server ||:
# Clean the data so that there is no interference from the previous test run.
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
start_server
echo "Going to run again: ${FAILED_TESTS[*]}"
TESTS_TO_SKIP=(
00105_shard_collations
00109_shard_totals_after_having
00110_external_sort
00302_http_compression
00417_kill_query
00436_convert_charset
00490_special_line_separators_and_characters_outside_of_bmp
00652_replicated_mutations_zookeeper
00682_empty_parts_merge
00701_rollup
00834_cancel_http_readonly_queries_on_client_close
00911_tautological_compare
00926_multimatch
00929_multi_match_edit_distance
01031_mutations_interpreter_and_context
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
01083_expressions_in_engine_arguments
01092_memory_profiler
01098_msgpack_format
01098_temporary_and_external_tables
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
01193_metadata_loading
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01251_dict_is_in_infinite_loop
01259_dictionary_custom_settings_ddl
01268_dictionary_direct_layout
01280_ssd_complex_key_dictionary
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01318_encrypt # Depends on OpenSSL
01318_decrypt # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
01305_replica_create_drop_zookeeper
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
_orc_
arrow
avro
base64
brotli
capnproto
client
ddl_dictionaries
h3
hashing
hdfs
java_hash
json
limit_memory
live_view
memory_leak
memory_limit
mysql
odbc
parallel_alter
parquet
protobuf
secure
sha256
xz
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
else
echo "No failed tests"
fi
# Not sure why these two fail even in sequential mode. Disabled for now
# to make some progress.
00646_url_engine
00974_query_profiler
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
01504_rocksdb
# Look at DistributedFilesToInsert, so cannot run in parallel.
01460_DistributedFilesToInsert
01541_max_memory_usage_for_user
# Require python libraries like scipy, pandas and numpy
01322_ttest_scipy
01561_mann_whitney_scipy
01545_system_errors
# Checks system.errors
01563_distributed_query_finish
)
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
# substr is to remove semicolon after test name
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
# We will rerun sequentially any tests that have failed during parallel run.
# They might have failed because there was some interference from other tests
# running concurrently. If they fail even in seqential mode, we will report them.
# FIXME All tests that require exclusive access to the server must be
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
# run them in a separate group after all other tests. This is faster and also
# explicit instead of guessing.
if [[ -n "${FAILED_TESTS[*]}" ]]
then
stop_server ||:
# Clean the data so that there is no interference from the previous test run.
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
start_server
echo "Going to run again: ${FAILED_TESTS[*]}"
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
else
echo "No failed tests"
fi
}
case "$stage" in

View File

@ -415,4 +415,4 @@ if not args.keep_created_tables and not args.use_existing_tables:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
reportStageEnd('drop-2')
reportStageEnd('drop-2')

View File

@ -10,6 +10,11 @@ RUN apt-get update --yes \
gpg-agent \
debsig-verify \
strace \
protobuf-compiler \
protobuf-compiler-grpc \
libprotoc-dev \
libgrpc++-dev \
libc-ares-dev \
--yes --no-install-recommends
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
@ -33,7 +38,8 @@ RUN set -x \
&& dpkg -i "${PKG_VERSION}.deb"
CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF && ninja re2_st \
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \
&& ninja re2_st clickhouse_grpc_protos \
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \
plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log

View File

@ -13,9 +13,9 @@ cmake .. \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DCLICKHOUSE_SPLIT_BINARY=ON \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \
-DENABLE_TESTS=OFF
```

View File

@ -17,7 +17,6 @@ toc_title: Third-Party Libraries Used
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -273,13 +273,15 @@ SELECT
sum(Duration) AS Duration
FROM UAct
GROUP BY UserID
```text
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┐
│ 4324182021466249494 │ 6 │ 185 │
└─────────────────────┴───────────┴──────────┘
```
``` sqk
``` sql
select count() FROM UAct
```

View File

@ -53,6 +53,42 @@ Example of setting the addresses of the ZooKeeper cluster:
</zookeeper>
```
ClickHouse also supports to store replicas meta information in the auxiliary ZooKeeper cluster by providing ZooKeeper cluster name and path as engine arguments.
In other word, it supports to store the metadata of differnt tables in different ZooKeeper clusters.
Example of setting the addresses of the auxiliary ZooKeeper cluster:
``` xml
<auxiliary_zookeepers>
<zookeeper2>
<node index="1">
<host>example_2_1</host>
<port>2181</port>
</node>
<node index="2">
<host>example_2_2</host>
<port>2181</port>
</node>
<node index="3">
<host>example_2_3</host>
<port>2181</port>
</node>
</zookeeper2>
<zookeeper3>
<node index="1">
<host>example_3_1</host>
<port>2181</port>
</node>
</zookeeper3>
</auxiliary_zookeepers>
```
To store table datameta in a auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with
ReplicatedMergeTree engine as follow:
```
CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('zookeeper_name_configured_in_auxiliary_zookeepers:path', 'replica_name') ...
```
You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table).
If ZooKeeper isnt set in the config file, you cant create replicated tables, and any existing replicated tables will be read-only.
@ -152,7 +188,7 @@ You can specify default arguments for `Replicated` table engine in the server co
```xml
<default_replica_path>/clickhouse/tables/{shard}/{database}/{table}</default_replica_path>
<default_replica_name>{replica}</default_replica_path>
<default_replica_name>{replica}</default_replica_name>
```
In this case, you can omit arguments when creating tables:

View File

@ -11,7 +11,7 @@ By going through this tutorial, youll learn how to set up a simple ClickHouse
## Single Node Setup {#single-node-setup}
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do not support them.
For example, you have chosen `deb` packages and executed:

View File

@ -44,11 +44,10 @@ stages, such as query planning or distributed queries.
To be useful, the tracing information has to be exported to a monitoring system
that supports OpenTelemetry, such as Jaeger or Prometheus. ClickHouse avoids
a dependency on a particular monitoring system, instead only
providing the tracing data conforming to the standard. A natural way to do so
in an SQL RDBMS is a system table. OpenTelemetry trace span information
a dependency on a particular monitoring system, instead only providing the
tracing data through a system table. OpenTelemetry trace span information
[required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span)
is stored in the system table called `system.opentelemetry_span_log`.
is stored in the `system.opentelemetry_span_log` table.
The table must be enabled in the server configuration, see the `opentelemetry_span_log`
element in the default config file `config.xml`. It is enabled by default.
@ -67,3 +66,31 @@ The table has the following columns:
The tags or attributes are saved as two parallel arrays, containing the keys
and values. Use `ARRAY JOIN` to work with them.
## Integration with monitoring systems
At the moment, there is no ready tool that can export the tracing data from
ClickHouse to a monitoring system.
For testing, it is possible to setup the export using a materialized view with the URL engine over the `system.opentelemetry_span_log` table, which would push the arriving log data to an HTTP endpoint of a trace collector. For example, to push the minimal span data to a Zipkin instance running at `http://localhost:9411`, in Zipkin v2 JSON format:
```sql
CREATE MATERIALIZED VIEW default.zipkin_spans
ENGINE = URL('http://127.0.0.1:9411/api/v2/spans', 'JSONEachRow')
SETTINGS output_format_json_named_tuples_as_objects = 1,
output_format_json_array_of_rows = 1 AS
SELECT
lower(hex(reinterpretAsFixedString(trace_id))) AS traceId,
lower(hex(parent_span_id)) AS parentId,
lower(hex(span_id)) AS id,
operation_name AS name,
start_time_us AS timestamp,
finish_time_us - start_time_us AS duration,
cast(tuple('clickhouse'), 'Tuple(serviceName text)') AS localEndpoint,
cast(tuple(
attribute.values[indexOf(attribute.names, 'db.statement')]),
'Tuple("db.statement" text)') AS tags
FROM system.opentelemetry_span_log
```
In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive.

View File

@ -4,4 +4,59 @@ toc_priority: 5
# avg {#agg_function-avg}
Calculates the average. Only works for numbers. The result is always Float64.
Calculates the arithmetic mean.
**Syntax**
``` sql
avgWeighted(x)
```
**Parameter**
- `x` — Values.
`x` must be
[Integer](../../../sql-reference/data-types/int-uint.md),
[floating-point](../../../sql-reference/data-types/float.md), or
[Decimal](../../../sql-reference/data-types/decimal.md).
**Returned value**
- `NaN` if the supplied parameter is empty.
- Mean otherwise.
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT avg(x) FROM values('x Int8', 0, 1, 2, 3, 4, 5)
```
Result:
``` text
┌─avg(x)─┐
│ 2.5 │
└────────┘
```
**Example**
Query:
``` sql
CREATE table test (t UInt8) ENGINE = Memory;
SELECT avg(t) FROM test
```
Result:
``` text
┌─avg(x)─┐
│ nan │
└────────┘
```

View File

@ -14,17 +14,21 @@ avgWeighted(x, weight)
**Parameters**
- `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
- `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
- `x` — Values.
- `weight` — Weights of the values.
Type of `x` and `weight` must be the same.
`x` and `weight` must both be
[Integer](../../../sql-reference/data-types/int-uint.md),
[floating-point](../../../sql-reference/data-types/float.md), or
[Decimal](../../../sql-reference/data-types/decimal.md),
but may have different types.
**Returned value**
- Weighted mean.
- `NaN`. If all the weights are equal to 0.
- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty.
- Weighted mean otherwise.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
**Example**
@ -42,3 +46,54 @@ Result:
│ 8 │
└────────────────────────┘
```
**Example**
Query:
``` sql
SELECT avgWeighted(x, w)
FROM values('x Int8, w Float64', (4, 1), (1, 0), (10, 2))
```
Result:
``` text
┌─avgWeighted(x, weight)─┐
│ 8 │
└────────────────────────┘
```
**Example**
Query:
``` sql
SELECT avgWeighted(x, w)
FROM values('x Int8, w Int8', (0, 0), (1, 0), (10, 0))
```
Result:
``` text
┌─avgWeighted(x, weight)─┐
│ nan │
└────────────────────────┘
```
**Example**
Query:
``` sql
CREATE table test (t UInt8) ENGINE = Memory;
SELECT avgWeighted(t) FROM test
```
Result:
``` text
┌─avgWeighted(x, weight)─┐
│ nan │
└────────────────────────┘
```

View File

@ -25,7 +25,7 @@ SELECT [DISTINCT] expr_list
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m] [WITH TIES]
[UNION ALL ...]
[UNION ...]
[INTO OUTFILE filename]
[FORMAT format]
```
@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li
- [SELECT clause](#select-clause)
- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md)
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
- [UNION ALL clause](../../../sql-reference/statements/select/union-all.md)
- [UNION clause](../../../sql-reference/statements/select/union-all.md)
- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md)
- [FORMAT clause](../../../sql-reference/statements/select/format.md)

View File

@ -1,5 +1,5 @@
---
toc_title: UNION ALL
toc_title: UNION
---
# UNION ALL Clause {#union-all-clause}
@ -25,10 +25,13 @@ Type casting is performed for unions. For example, if two queries being combined
Queries that are parts of `UNION ALL` cant be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
## Limitations {#limitations}
# UNION DISTINCT Clause {#union-distinct-clause}
The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`.
# UNION Clause {#union-clause}
By default, `UNION` has the same behavior as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception.
Only `UNION ALL` is supported. The regular `UNION` (`UNION DISTINCT`) is not supported. If you need `UNION DISTINCT`, you can write `SELECT DISTINCT` from a subquery containing `UNION ALL`.
## Implementation Details {#implementation-details}
Queries that are parts of `UNION ALL` can be run simultaneously, and their results can be mixed together.
Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together.

View File

@ -19,7 +19,6 @@ toc_title: Bibliotecas de terceros utilizadas
| Más información | [Licencia de 3 cláusulas BSD](https://github.com/google/googletest/blob/master/LICENSE) |
| H3 | [Licencia Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [Licencia de 3 cláusulas BSD](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [Licencia BSD de 2 cláusulas](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Licencia Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [Información adicional](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -21,7 +21,6 @@ toc_title: "\u06A9\u062A\u0627\u0628\u062E\u0627\u0646\u0647 \u0647\u0627\u06CC
| googletest | [لیسانس 3 بند](https://github.com/google/googletest/blob/master/LICENSE) |
| اچ 3 | [نمایی مجوز 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [لیسانس 3 بند](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| لیبتری | [لیسانس 2 بند](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| شکنجه نوجوان | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| لیبیدوید | [مجوز زلب](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| نوشیدن شراب | [الجی پی ال2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -19,7 +19,6 @@ toc_title: "Biblioth\xE8ques Tierces Utilis\xE9es"
| googletest | [Licence BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Licence Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [Licence BSD 3-Clause](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [Licence BSD 2-Clause](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Licence Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -20,7 +20,6 @@ toc_title: "\u30B5\u30FC\u30C9\u30D1\u30FC\u30C6\u30A3\u88FD\u30E9\u30A4\u30D6\u
| googletest | [BSD3条項ライセンス](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Apacheライセンス2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD3条項ライセンス](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD2条項ライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlibライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -18,7 +18,6 @@ toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -19,7 +19,6 @@ toc_title: "Kullan\u0131lan \xDC\xE7\xFCnc\xFC Taraf K\xFCt\xFCphaneleri"
| googletest | [BSD 3-Clause Lisansı](https://github.com/google/googletest/blob/master/LICENSE) |
| h33 | [Apache Lic 2.0ense 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD 3-Clause Lisansı](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD 2-Clause Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2. 1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -11,7 +11,6 @@
| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
| googletest | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE) |
| 超扫描 | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -6,15 +6,16 @@ toc_title: "\u5BFC\u8A00"
# 示例数据集 {#example-datasets}
本节介绍如何获取示例数据集并将其导入ClickHouse。
本节介绍如何获取示例数据集并将其导入ClickHouse。对于某些数据集还可以使用示例查询。
对于某些数据集示例查询也可用。
- [脱敏的Yandex.Metrica数据集](metrica.md)
- [星型基准测试](star-schema.md)
- [维基访问数据](wikistat.md)
- [Criteo TB级别点击日志](criteo.md)
- [AMPLab大数据基准测试](amplab-benchmark.md)
- [纽约出租车数据](nyc-taxi.md)
- [航班飞行数据](ontime.md)
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -1,17 +1,17 @@
---
toc_priority: 21
toc_title: "Yandex\u6885\u7279\u91CC\u5361\u6570\u636E"
toc_priority: 15
toc_title: Yandex.Metrica Data
---
# 脱敏的Yandex.Metrica数据集 {#anonymized-yandex-metrica-data}
# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data}
Dataset由两个表组成其中包含有关命中的匿名数据 (`hits_v1`)和访问 (`visits_v1`的Yandex的。梅特里卡 你可以阅读更多关于Yandex的。梅特里卡 [ClickHouse历史](../../introduction/history.md) 科
数据集由两个表组成包含关于Yandex.Metrica的hits(`hits_v1`)和visit(`visits_v1`)的匿名数据。你可以阅读更多关于Yandex的信息。在[ClickHouse历史](../../introduction/history.md)的Metrica部分
数据集由两个表组成,其中任何一个都可以作为压缩表下载 `tsv.xz` 文件或作为准备的分区。 除此之外,该扩展版本 `hits` 包含1亿行的表可作为TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz 并作为准备的分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
数据集由两个表组成,他们中的任何一个都可以下载作为一个压缩`tsv.xz`的文件或准备的分区。除此之外,一个扩展版的`hits`表包含1亿行TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz准备分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz。
## 从准备好的分区获取表 {#obtaining-tables-from-prepared-partitions}
下载和导入点击表:
下载和导入`hits`表:
``` bash
curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar
@ -21,7 +21,7 @@ sudo service clickhouse-server restart
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
```
下载和导入访问:
下载和导入`visits`表:
``` bash
curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar
@ -31,9 +31,9 @@ sudo service clickhouse-server restart
clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
```
## 从压缩TSV文件获取表 {#obtaining-tables-from-compressed-tsv-file}
## 从TSV压缩文件获取表 {#obtaining-tables-from-compressed-tsv-file}
压缩的TSV文件下载并导入命中:
TSV压缩文件下载并导入`hits`:
``` bash
curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
@ -47,7 +47,7 @@ clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL"
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
```
从压缩tsv文件下载和导入访问:
从压缩tsv文件下载和导入`visits`:
``` bash
curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv
@ -63,6 +63,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
## 查询示例 {#example-queries}
[点击教程](../../getting-started/tutorial.md) 是基于Yandex的。Metrica数据集和开始使用此数据集的推荐方式是通过教程。
[使用教程](../../getting-started/tutorial.md)是以Yandex.Metrica数据集开始教程。
查询这些表的其他示例可以在 [有状态测试](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) ClickHouse的它们被命名为 `test.hists``test.visits` 那里)
可以在ClickHouse的[stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) 中找到对这些表的查询的其他示例(它们被命名为`test.hists`和`test.visits`)

View File

@ -1,3 +1,10 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u5BFC\u8A00"
toc_priority: 2
---
# 入门 {#ru-men}
如果您是ClickHouse的新手并希望亲身体验它的性能首先您需要通过 [安装过程](install.md).

View File

@ -1,34 +1,46 @@
---
toc_priority: 11
toc_title: 安装部署
---
# 安装 {#clickhouse-an-zhuang}
## 系统要求 {#xi-tong-yao-qiu}
ClickHouse可以在任何具有x86_64AArch64或PowerPC64LE CPU架构的LinuxFreeBSD或Mac OS X上运行。
虽然预构建的二进制文件通常是为x86  _64编译并利用SSE 4.2指令集但除非另有说明否则使用支持它的CPU将成为额外的系统要求。这是检查当前CPU是否支持SSE 4.2的命令:
官方预构建的二进制文件通常针对x86_64进行编译并利用`SSE 4.2`指令集因此除非另有说明支持它的CPU使用将成为额外的系统需求。下面是检查当前CPU是否支持SSE 4.2的命令:
``` bash
$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
```
要在不支持SSE 4.2或具有AArch64或PowerPC64LE体系结构的处理器上运行ClickHouse您应该[通过源构建ClickHouse](#from-sources)进行适当的配置调整
要在不支持`SSE 4.2`或`AArch64``PowerPC64LE`架构的处理器上运行ClickHouse您应该通过适当的配置调整从[源代码构建ClickHouse](#from-sources)
## 可用的安装选项 {#install-from-deb-packages}
## 可用安装包 {#install-from-deb-packages}
建议为Debian或Ubuntu使用官方的预编译`deb`软件包。 运行以下命令以安装软件包:
### `DEB`安装包
然后运行:
建议使用Debian或Ubuntu的官方预编译`deb`软件包。运行以下命令来安装包:
``` bash
{% include 'install/deb.sh' %}
```
你也可以从这里手动下载安装包https://repo.clickhouse.tech/deb/stable/main/
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)
如果你想使用最新的测试版本,请使用`testing`替换`stable`
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/deb/stable/main/)
### 来自RPM包 {#from-rpm-packages}
安装包列表:
Yandex ClickHouse团队建议使用官方预编译的`rpm`软件包用于CentOSRedHat和所有其他基于rpm的Linux发行版。
- `clickhouse-common-static` — ClickHouse编译的二进制文件。
- `clickhouse-server` — 创建`clickhouse-server`软连接,并安装默认配置服务
- `clickhouse-client` — 创建`clickhouse-client`客户端工具软连接,并安装客户端配置文件。
- `clickhouse-common-static-dbg` — 带有调试信息的ClickHouse二进制文件。
### `RPM`安装包 {#from-rpm-packages}
推荐使用CentOS、RedHat和所有其他基于rpm的Linux发行版的官方预编译`rpm`包。
首先,您需要添加官方存储库:
@ -38,84 +50,120 @@ sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
```
如果您想使用最新版本,请将`stable`替换为`testing`(建议您在测试环境中使用)
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用
然后运行这些命令以实际安装
然后运行命令安装:
``` bash
sudo yum install clickhouse-server clickhouse-client
```
您也可以从此处手动下载和安装软件包https://repo.clickhouse.tech/rpm/stable/x86_64
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/rpm/stable/x86_64)
### 来自Docker {#from-docker-image}
### `Tgz`安装包 {#from-tgz-archives}
要在Docker中运行ClickHouse请遵循[码头工人中心](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。那些图像使用官方的`deb`包。
如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。
所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.tech/tgz/`下载。
下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新版本的安装示例:
``` bash
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
sudo /etc/init.d/clickhouse-server start
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
```
对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它它以后缀`-stable`标志。
### `Docker`安装包 {#from-docker-image}
要在Docker中运行ClickHouse请遵循[Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。它是官方的`deb`安装包。
### 其他环境安装包 {#from-other}
对于非linux操作系统和Arch64 CPU架构ClickHouse将会以`master`分支的最新提交的进行编译提供(它将会有几小时的延迟)。
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
下载后,您可以使用`clickhouse client`连接服务,或者使用`clickhouse local`模式处理数据不过您必须要额外在GitHub下载[server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml)和[users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml)配置文件。
不建议在生产环境中使用这些构建版本因为它们没有经过充分的测试但是您可以自行承担这样做的风险。它们只是ClickHouse功能的一个部分。
### 使用源码安装 {#from-sources}
具体编译方式可以参考build.md。
要手动编译ClickHouse, 请遵循[Linux](../development/build.md)或[Mac OS X](../development/build-osx.md)说明
你可以编译并安装它们。
你也可以直接使用而不进行安装。
您可以编译并安装它们,也可以使用不安装包的程序。通过手动构建,您可以禁用`SSE 4.2`或`AArch64 cpu`。
``` text
Client: programs/clickhouse-client
Server: programs/clickhouse-server
```
Client: programs/clickhouse-client
Server: programs/clickhouse-server
在服务器中为数据创建如下目录:
您需要创建一个数据和元数据文件夹,并为所需的用户`chown`授权。它们的路径可以在服务器配置(`src/programs/server/config.xml`)中改变,默认情况下它们是:
``` text
/opt/clickhouse/data/default/
/opt/clickhouse/metadata/default/
```
/opt/clickhouse/data/default/
/opt/clickhouse/metadata/default/
(它们可以在server config中配置。)
为需要的用户运行chown
日志的路径可以在server config (src/programs/server/config.xml)中配置。
在Gentoo上你可以使用`emerge clickhouse`从源代码安装ClickHouse。
## 启动 {#qi-dong}
可以运行如下命令在后台启动服务:
如果没有`service`可以运行如下命令在后台启动服务:
``` bash
sudo service clickhouse-server start
$ sudo /etc/init.d/clickhouse-server start
```
可以在`/var/log/clickhouse-server/`目录中查看日志。
日志文件将输出在`/var/log/clickhouse-server/`文件夹
如果服务没有启动,请检查配置文件 `/etc/clickhouse-server/config.xml`
如果服务器没有启动,检查`/etc/clickhouse-server/config.xml`中的配置
你也可以在控制台中直接启动服务:
您也可以手动从控制台启动服务器:
``` bash
clickhouse-server --config-file=/etc/clickhouse-server/config.xml
```bash
$ clickhouse-server --config-file=/etc/clickhouse-server/config.xml
```
在这种情况下,日志将被打印到控制台中,这在开发过程中很方便。
如果配置文件在当前目录中你可以不指定config-file参数。它默认使用./config.xml
在这种情况下,日志将被打印到控制台,这在开发过程中很方便。
你可以使用命令行客户端连接到服务:
如果配置文件在当前目录中,则不需要指定`——config-file`参数。默认情况下,它的路径为`./config.xml`。
ClickHouse支持访问限制设置。它们位于`users.xml`文件(与`config.xml`同级目录)。
默认情况下,允许`default`用户从任何地方访问,不需要密码。可查看`user/default/networks`。
更多信息,请参见[Configuration Files](../operations/configuration-files.md)。
启动服务后,您可以使用命令行客户端连接到它:
``` bash
clickhouse-client
$ clickhouse-client
```
默认情况下它使用default用户无密码的与localhost:9000服务建立连接。
客户端也可以用于连接远程服务,例如:
默认情况下,使用`default`用户并不携带密码连接到`localhost:9000`。还可以使用`--host`参数连接到指定服务器。
终端必须使用UTF-8编码。
更多信息,请参阅[Command-line client](../interfaces/cli.md)。
示例:
``` bash
clickhouse-client --host=example.com
```
有关更多信息请参考«Command-line client»部分。
检查系统是否工作:
``` bash
milovidov@hostname:~/work/metrica/src/src/Client$ ./clickhouse-client
$ ./clickhouse-client
ClickHouse client version 0.0.18749.
Connecting to localhost:9000.
Connected to ClickHouse server version 0.0.18749.
@ -135,6 +183,6 @@ SELECT 1
**恭喜,系统已经工作了!**
为了继续进行实验,你可以尝试下载测试数据集。
为了继续进行实验,你可以尝试下载测试数据集或查看[教程](https://clickhouse.tech/tutorial.html)
[原始文章](https://clickhouse.tech/docs/en/getting_started/install/) <!--hide-->

View File

@ -1,19 +1,19 @@
---
toc_priority: 12
toc_title: "\u6559\u7A0B"
toc_title: 使用教程
---
# 点击教程 {#clickhouse-tutorial}
# ClickHouse教程 {#clickhouse-tutorial}
## 从本教程中可以期待什么? {#what-to-expect-from-this-tutorial}
## 从本教程中可以获得什么? {#what-to-expect-from-this-tutorial}
通过本教程您将学习如何设置一个简单的ClickHouse集群。 它会很小,但却是容错和可扩展的。 然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。
通过学习本教程您将了解如何设置一个简单的ClickHouse集群。它会很小但是可以容错和扩展。然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。
## 单节点设置 {#single-node-setup}
为了推迟分布式环境的复杂性我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从[deb](install.md#install-from-deb-packages) [rpm](install.md#from-rpm-packages) 包安装,但对于不支持它们的操作系统也有 [替代方法](install.md#from-docker-image)
为了延迟演示分布式环境的复杂性我们将首先在单个服务器或虚拟机上部署ClickHouse。ClickHouse通常是从[deb](install.md#install-from-deb-packages)或[rpm](install.md#from-rpm-packages)包安装,但对于不支持它们的操作系统也有[其他方法](install.md#from-docker-image)
例如,您选择了从 `deb`安装,执行:
例如,您选择`deb`安装,执行:
``` bash
{% include 'install/deb.sh' %}
@ -21,13 +21,13 @@ toc_title: "\u6559\u7A0B"
在我们安装的软件中包含这些包:
- `clickhouse-client` 包,包含 [clickhouse-client](../interfaces/cli.md) 应用程序它是交互式ClickHouse控制台客户端。
- `clickhouse-client` 包,包含[clickhouse-client](../interfaces/cli.md)客户端它是交互式ClickHouse控制台客户端。
- `clickhouse-common`包含一个ClickHouse可执行文件。
- `clickhouse-server`包含要作为服务端运行的ClickHouse配置文件。
服务端配置文件位于 `/etc/clickhouse-server/`。 在进一步讨论之前,请注意 `config.xml`文件中的`<path>` 元素. Path决定了数据存储的位置因此该位置应该位于磁盘容量较大的卷上默认值为 `/var/lib/clickhouse/`。 如果你想调整配置,考虑到它可能会在未来的软件包更新中被重写,直接编辑`config.xml` 文件并不方便。 推荐的方法是在[配置文件](../operations/configuration-files.md)目录创建文件作为config.xml文件的“补丁”用以复写配置元素
服务器配置文件位于`/etc/clickhouse-server/`。在继续之前,请注意`config.xml`中的`<path>`元素。它决定了数据存储的位置,因此它应该位于磁盘容量的卷上;默认值是`/var/lib/clickhouse/`。如果你想调整配置直接编辑config是不方便的。考虑到它可能会在将来的包更新中被重写。建议重写配置元素的方法是在配置中创建[config.d文件夹](../operations/configuration-files.md)作为config.xml的重写方式
你可能已经注意到了, `clickhouse-server` 安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样:
你可能已经注意到了,`clickhouse-server`安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样:
``` bash
sudo service clickhouse-server start
@ -39,9 +39,9 @@ sudo service clickhouse-server start
sudo /etc/init.d/clickhouse-server start
```
服务端日志的默认位置是 `/var/log/clickhouse-server/`。当服务端在日志中记录 `Ready for connections` 消息,即表示服务端已准备好处理客户端连接。
服务端日志的默认位置是`/var/log/clickhouse-server/`。当服务端在日志中记录`Ready for connections`消息,即表示服务端已准备好处理客户端连接。
一旦 `clickhouse-server` 启动并运行,我们可以利用 `clickhouse-client` 连接到服务端,并运行一些测试查询,如 `SELECT "Hello, world!";`.
一旦`clickhouse-server`启动并运行,我们可以利用`clickhouse-client`连接到服务端,并运行一些测试查询,如`SELECT "Hello, world!";`.
<details markdown="1">
@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
## 导入示例数据集 {#import-sample-dataset}
现在是时候用一些示例数据填充我们的ClickHouse服务端。 在本教程中我们将使用Yandex.Metrica的匿名数据它是在ClickHouse成为开源之前作为生产环境运行的第一个服务关于这一点的更多内容请参阅[ClickHouse历史](../introduction/history.md))。[多种导入Yandex.Metrica数据集的的方法](example-datasets/metrica.md),为了本教程,我们将使用最现实的一个。
现在是时候用一些示例数据填充我们的ClickHouse服务端。 在本教程中我们将使用Yandex.Metrica的匿名数据它是在ClickHouse成为开源之前作为生产环境运行的第一个服务关于这一点的更多内容请参阅[ClickHouse历史](../introduction/history.md))。[多种导入Yandex.Metrica数据集方法](example-datasets/metrica.md),为了本教程,我们将使用最现实的一个。
### 下载并提取表数据 {#download-and-extract-table-data}
@ -93,17 +93,17 @@ curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unx
### 创建表 {#create-tables}
与大多数数据库管理系统一样ClickHouse在逻辑上将表分组为数据库。包含一个 `default` 数据库,但我们将创建一个新的数据库 `tutorial`:
与大多数数据库管理系统一样ClickHouse在逻辑上将表分组为数据库。包含一个`default`数据库,但我们将创建一个新的数据库`tutorial`:
``` bash
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial"
```
与创建数据库相比,创建表的语法要复杂得多(请参阅 [参考资料](../sql-reference/statements/create.md). 一般 `CREATE TABLE` 声明必须指定三个关键的事情:
与创建数据库相比,创建表的语法要复杂得多(请参阅[参考资料](../sql-reference/statements/create.md). 一般`CREATE TABLE`声明必须指定三个关键的事情:
1. 要创建的表的名称。
2. 表结构,例如:列名和对应的[数据类型](../sql-reference/data-types/index.md)。
3. [表引擎](../engines/table-engines/index.md) 及其设置,这决定了对此表的查询操作是如何在物理层面执行的所有细节。
3. [表引擎](../engines/table-engines/index.md)及其设置,这决定了对此表的查询操作是如何在物理层面执行的所有细节。
Yandex.Metrica是一个网络分析服务样本数据集不包括其全部功能因此只有两个表可以创建:
@ -455,11 +455,11 @@ SETTINGS index_granularity = 8192
您可以使用`clickhouse-client`的交互模式执行这些查询(只需在终端中启动它,而不需要提前指定查询)。或者如果你愿意,可以尝试一些[替代接口](../interfaces/index.md)。
正如我们所看到的, `hits_v1` 使用 [基本的MergeTree引擎](../engines/table-engines/mergetree-family/mergetree.md),而 `visits_v1` 使用 [折叠树](../engines/table-engines/mergetree-family/collapsingmergetree.md) 变体
正如我们所看到的, `hits_v1`使用 [MergeTree引擎](../engines/table-engines/mergetree-family/mergetree.md),而`visits_v1`使用 [Collapsing](../engines/table-engines/mergetree-family/collapsingmergetree.md)引擎
### 导入数据 {#import-data}
数据导入到ClickHouse是通过以下方式完成的 [INSERT INTO](../sql-reference/statements/insert-into.md) 查询像许多其他SQL数据库。 然而,数据通常是在一个提供 [支持的序列化格式](../interfaces/formats.md) 而不是 `VALUES` 子句(也支持)。
数据导入到ClickHouse是通过[INSERT INTO](../sql-reference/statements/insert-into.md)方式完成的查询类似许多SQL数据库。然而数据通常是在一个提供[支持序列化格式](../interfaces/formats.md)而不是`VALUES`子句(也支持)。
我们之前下载的文件是以制表符分隔的格式,所以这里是如何通过控制台客户端导入它们:
@ -468,7 +468,7 @@ clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert
clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv
```
ClickHouse有很多 [要调整的设置](../operations/settings/index.md) 在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的 `--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询 `system.settings` 表:
ClickHouse有很多[要调整的设置](../operations/settings/index.md)在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的`--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询`system.settings` 表:
``` sql
SELECT name, value, changed, description
@ -479,14 +479,14 @@ FORMAT TSV
max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion."
```
您也可以 [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) 导入后的表。 使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储或至少检查是否有意义。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行:
您也可以[OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize)导入后的表。使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储或至少检查是否有意义。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行:
``` bash
clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL"
clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL"
```
这些查询开始一个I/O和CPU密集型操作所以如果表一直接收到新数据最好不要管它让合并在后台运行。
这些查询开始I/O和CPU密集型操作所以如果表一直接收到新数据最好不要管它让合并在后台运行。
现在我们可以检查表导入是否成功:
@ -524,9 +524,9 @@ ClickHouse集群是一个同质集群。 设置步骤:
1. 在群集的所有机器上安装ClickHouse服务端
2. 在配置文件中设置群集配置
3. 在每个实例上创建本地表
4. 创建一个 [分布式表](../engines/table-engines/special/distributed.md)
4. 创建一个[分布式表](../engines/table-engines/special/distributed.md)
[分布式表](../engines/table-engines/special/distributed.md) 实际上是一种 “视图”映射到ClickHouse集群的本地表。 从分布式表中执行 **SELECT** 查询会使用集群所有分片的资源。 您可以为多个集群指定configs并创建多个分布式表为不同的集群提供视图。
[分布式表](../engines/table-engines/special/distributed.md)实际上是一种`view`映射到ClickHouse集群的本地表。 从分布式表中执行**SELECT**查询会使用集群所有分片的资源。 您可以为多个集群指定configs并创建多个分布式表为不同的集群提供视图。
具有三个分片,每个分片一个副本的集群的示例配置:
@ -555,7 +555,7 @@ ClickHouse集群是一个同质集群。 设置步骤:
</remote_servers>
```
为了进一步演示,让我们使用和创建 `hits_v1` 表相同的 `CREATE TABLE` 语句创建一个新的本地表,但表名不同:
为了进一步演示,让我们使用和创建`hits_v1`表相同的`CREATE TABLE`语句创建一个新的本地表,但表名不同:
``` sql
CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ...
@ -568,9 +568,9 @@ CREATE TABLE tutorial.hits_all AS tutorial.hits_local
ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
```
常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表 [远程](../sql-reference/table-functions/remote.md) 表功能。
常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表[远程](../sql-reference/table-functions/remote.md)表功能。
让我们运行 [INSERT SELECT](../sql-reference/statements/insert-into.md) 将该表传播到多个服务器。
让我们运行[INSERT SELECT](../sql-reference/statements/insert-into.md)将该表传播到多个服务器。
``` sql
INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
@ -609,10 +609,10 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
</remote_servers>
```
启用本机复制 [Zookeeper](http://zookeeper.apache.org/) 是必需的。 ClickHouse负责所有副本的数据一致性并在失败后自动运行恢复过程。 建议将ZooKeeper集群部署在单独的服务器上其中没有其他进程包括运行的ClickHouse
启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上其中没有其他进程包括运行的ClickHouse
!!! note "注"
ZooKeeper不是一个严格的要求在某些简单的情况下您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是 **不** 建议的在这种情况下ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
!!! note "注"
ZooKeeper不是一个严格的要求在某些简单的情况下您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的在这种情况下ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
ZooKeeper位置在配置文件中指定:
@ -653,12 +653,12 @@ ENGINE = ReplcatedMergeTree(
...
```
在这里,我们使用 [ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md) 表引擎。 在参数中我们指定包含分片和副本标识符的ZooKeeper路径。
在这里,我们使用[ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md)表引擎。 在参数中我们指定包含分片和副本标识符的ZooKeeper路径。
``` sql
INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local;
```
复制在多主机模式下运行。 数据可以加载到任何副本中,然后系统自动将其与其他实例同步。 复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。 至少应有一个副本允许数据摄取。 其他人将同步数据和修复一致性,一旦他们将再次变得活跃。 请注意,这种方法允许最近插入的数据丢失的可能性很低。
复制在多主机模式下运行。数据可以加载到任何副本中,然后系统自动将其与其他实例同步。复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。至少应该有一个副本允许数据摄入。另一些则会在重新激活后同步数据并修复一致性。请注意,这种方法允许最近插入的数据丢失的可能性很低。
[原始文章](https://clickhouse.tech/docs/en/getting_started/tutorial/) <!--hide-->

View File

@ -4,53 +4,50 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
在传统的行式数据库系统中,数据按如下顺序存储:
| row | watchID | JavaEnable | title | GoodEvent | EventTime |
|-----|-------------|------------|------------|-----------|---------------------|
| #0 | 89354350662 | 1 | 投资者关系 | 1 | 2016-05-18 05:19:20 |
| #1 | 90329509958 | 0 | 联系我们 | 1 | 2016-05-18 08:10:20 |
| #2 | 89953706054 | 1 | 任务 | 1 | 2016-05-18 07:38:00 |
| #N | … | … | … | … | … |
| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime |
|-----|-------------|------------|--------------------|-----------|---------------------|
| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
| #N | … | … | … | … | … |
处于同一行中的数据总是被物理的存储在一起。
常见的行式数据库系统有: MySQL、Postgres和MS SQL Server。
{: .灰色 }
常见的行式数据库系统有:`MySQL`、`Postgres`和`MS SQL Server`。
在列式数据库系统中,数据按如下的顺序存储:
| row: | #0 | #1 | #2 | #N |
| Row: | #0 | #1 | #2 | #N |
|-------------|---------------------|---------------------|---------------------|-----|
| watchID: | 89354350662 | 90329509958 | 89953706054 | … |
| WatchID: | 89354350662 | 90329509958 | 89953706054 | … |
| JavaEnable: | 1 | 0 | 1 | … |
| title: | 投资者关系 | 联系我们 | 任务 | … |
| Title: | Investor Relations | Contact us | Mission | … |
| GoodEvent: | 1 | 1 | 1 | … |
| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
该示例中只展示了数据在列式数据库中数据的排列方式。
对于存储而言,列式数据库总是将同一列的数据存储在一起,不同列的数据也总是分开存储。
这些示例只显示了数据的排列顺序。来自不同列的值被单独存储,来自同一列的数据被存储在一起。
常见的列式数据库有: Vertica、 Paraccel (Actian MatrixAmazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。
{: .灰色 }
不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例; 每种查询读取多少数据————行、列和字节;读取数据和写入数据之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。
不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例;每种类型的查询(行、列和字节)读取多少数据;读取数据和更新之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。
系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有明显不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率?
系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率?
## OLAP场景的关键特征 {#olapchang-jing-de-guan-jian-te-zheng}
- 大多数是读请求
- 数据总是以相当大的批(\> 1000 rows)进行写入
- 不修改已添加的数据
- 每次查询都从数据库中读取大量的行,但是同时又仅需要少量的列
- 大多数是读请求
- 数据以相当大的批次(\> 1000行)更新,而不是单行更新;或者根本没有更新。
- 已添加到数据库的数据不能修改。
- 对于读取,从数据库中提取相当多的行,但只提取列的一小部分。
- 宽表,即每个表包含着大量的列
- 较少的查询(通常每台服务器每秒数百个查询或更少)
- 查询相对较少(通常每台服务器每秒查询数百次或更少)
- 对于简单查询允许延迟大约50毫秒
- 列中的数据相对较小: 数字和短字符串(例如每个URL 60个字节)
- 处理单个查询时需要高吞吐量(每个服务器每秒高达数十亿行)
- 列中的数据相对较小:数字和短字符串(例如每个URL 60个字节)
- 处理单个查询时需要高吞吐量(每台服务器每秒可达数十亿行)
- 事务不是必须的
- 对数据一致性要求低
- 每一个查询除了一个大表外都很小
- 查询结果明显小于源数据,换句话说,数据被过滤或聚合后能够被盛放在单台服务器的内存
- 每个查询有一个大表。除了他意以外,其他的都很小。
- 查询结果明显小于源数据。换句话说数据经过过滤或聚合因此结果适合于单个服务器的RAM
很容易可以看出OLAP场景与其他通常业务场景(例如,OLTP或K/V)有很大的不同, 因此想要使用OLTP或Key-Value数据库去高效的处理分析查询场景并不是非常完美的适用方案。例如使用OLAP数据库去处理分析请求通常要优于使用MongoDB或Redis去处理分析请求。

View File

@ -1,6 +1,6 @@
---
toc_priority: 8
toc_title: "\u91C7\u7528\u8005"
toc_priority: 5
toc_title: "ClickHouse用户"
---
# ClickHouse用户 {#clickhouse-adopters}

View File

@ -1,3 +1,8 @@
---
toc_priority: 2
toc_title: ClickHouse的特性
---
# ClickHouse的特性 {#clickhouse-de-te-xing}
## 真正的列式数据库管理系统 {#zhen-zheng-de-lie-shi-shu-ju-ku-guan-li-xi-tong}
@ -12,9 +17,13 @@
在一些列式数据库管理系统中(例如InfiniDB CE 和 MonetDB) 并没有使用数据压缩。但是, 若想达到比较优异的性能,数据压缩确实起到了至关重要的作用。
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create/table.md#create-query-specialized-codecs)这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
## 数据的磁盘存储 {#shu-ju-de-ci-pan-cun-chu}
许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作这种方式会造成比实际更多的设备预算。ClickHouse被设计用于工作在传统磁盘上的系统它提供每GB更低的存储成本但如果有可以使用SSD和内存它也会合理的利用这些资源。
许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作,这种方式会造成比实际更多的设备预算。
ClickHouse被设计用于工作在传统磁盘上的系统它提供每GB更低的存储成本但如果可以使用SSD和内存它也会合理的利用这些资源。
## 多核心并行处理 {#duo-he-xin-bing-xing-chu-li}
@ -27,9 +36,11 @@ ClickHouse会使用服务器上一切可用的资源从而以最自然的方
## 支持SQL {#zhi-chi-sql}
ClickHouse支持基于SQL的声明式查询语言该语言大部分情况下是与SQL标准兼容的。
支持的查询包括 GROUP BYORDER BYINJOIN以及非相关子查询。
不支持窗口函数和相关子查询。
ClickHouse支持一种[基于SQL的声明式查询语言](../sql-reference/index.md),它在许多情况下与[ANSI SQL标准](../sql-reference/ansi.md)相同。
支持的查询[GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md), [IN](../sql-reference/operators/in.md)以及非相关子查询。
相关(依赖性)子查询和窗口函数暂不受支持,但将来会被实现。
## 向量引擎 {#xiang-liang-yin-qing}
@ -55,12 +66,20 @@ ClickHouse提供各种各样在允许牺牲数据精度的情况下对查询进
2. 基于数据的部分样本进行近似查询。这时,仅会从磁盘检索少部分比例的数据。
3. 不使用全部的聚合条件,通过随机选择有限个数据聚合条件进行聚合。这在数据聚合条件满足某些分布条件下,在提供相当准确的聚合结果的同时降低了计算资源的使用。
## Adaptive Join Algorithm {#adaptive-join-algorithm}
ClickHouse支持自定义[JOIN](../sql-reference/statements/select/join.md)多个表,它更倾向于散列连接算法,如果有多个大表,则使用合并-连接算法
## 支持数据复制和数据完整性 {#zhi-chi-shu-ju-fu-zhi-he-shu-ju-wan-zheng-xing}
ClickHouse使用异步的多主复制技术。当数据被写入任何一个可用副本后系统会在后台将数据分发给其他副本以保证系统在不同副本上保持相同的数据。在大多数情况下ClickHouse能在故障后自动恢复在一些少数的复杂情况下需要手动恢复。
更多信息,参见 [数据复制](../engines/table-engines/mergetree-family/replication.md)。
## 角色的访问控制 {#role-based-access-control}
ClickHouse使用SQL查询实现用户帐户管理并允许[角色的访问控制](../operations/access-rights.md)类似于ANSI SQL标准和流行的关系数据库管理系统。
# 限制 {#clickhouseke-xian-zhi}
1. 没有完整的事务支持。

View File

@ -1,3 +1,8 @@
---
toc_priority: 4
toc_title: ClickHouse历史
---
# ClickHouse历史 {#clickhouseli-shi}
ClickHouse最初是为 [YandexMetrica](https://metrica.yandex.com/) [世界第二大Web分析平台](http://w3techs.com/technologies/overview/traffic_analysis/all) 而开发的。多年来一直作为该系统的核心组件被该系统持续使用着。目前为止该系统在ClickHouse中有超过13万亿条记录并且每天超过200多亿个事件被处理。它允许直接从原始数据中动态查询并生成报告。本文简要介绍了ClickHouse在其早期发展阶段的目标。

View File

@ -1,3 +1,8 @@
---
toc_priority: 3
toc_title: ClickHouse性能
---
# 性能 {#performance}
根据Yandex的内部测试结果ClickHouse表现出了比同类可比较产品更优的性能。你可以在 [这里](https://clickhouse.tech/benchmark/dbms/) 查看具体的测试结果。

View File

@ -3,18 +3,18 @@ toc_priority: 60
toc_title: clickhouse-local
---
# ツ环板-ョツ嘉ッツ偲 {#clickhouse-local}
# ClickHouse Local {#clickhouse-local}
`clickhouse-local` 程序使您能够对本地文件执行快速处理而无需部署和配置ClickHouse服务器。
`clickhouse-local`模式可以使您能够对本地文件执行快速处理而无需部署和配置ClickHouse服务器。
接受表示表的数据并使用以下方式查询它们 [ツ环板ECTョツ嘉ッツ偲](../../operations/utilities/clickhouse-local.md).
[ClickHouse SQL语法](../../operations/utilities/clickhouse-local.md)支持对表格数据的查询.
`clickhouse-local` 使用与ClickHouse server相同的核心因此它支持大多数功能以及相同的格式和表引擎。
`clickhouse-local`使用与ClickHouse Server相同的核心因此它支持大多数功能以及相同的格式和表引擎。
默认情况下 `clickhouse-local` 不能访问同一主机上的数据,但它支持使用以下方式加载服务器配置 `--config-file` 争论
默认情况下`clickhouse-local`不能访问同一主机上的数据,但它支持使用`--config-file`方式加载服务器配置
!!! warning "警告"
不建议将生产服务器配置加载到 `clickhouse-local` 因为数据可以在人为错误的情况下被损坏。
不建议将生产服务器配置加载到`clickhouse-local`因为数据可以在人为错误的情况下被损坏。
## 用途 {#usage}
@ -26,21 +26,21 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin
参数:
- `-S`, `--structure`table structure for input data.
- `-if`, `--input-format`input format, `TSV` 默认情况下
- `-f`, `--file`path to data, `stdin` 默认情况下
- `-q` `--query`queries to execute with `;` 如delimeter
- `-N`, `--table`table name where to put output data, `table` 默认情况下
- `-of`, `--format`, `--output-format`output format, `TSV` 默认情况下
- `--stacktrace`whether to dump debug output in case of exception.
- `--verbose`more details on query execution.
- `-s`disables `stderr` 记录
- `--config-file`path to configuration file in same format as for ClickHouse server, by default the configuration empty.
- `--help`arguments references for `clickhouse-local`.
- `-S`, `--structure`输入数据的表结构。
- `-if`, `--input-format`输入格式化类型, 默认是`TSV`
- `-f`, `--file`数据路径, 默认是`stdin`
- `-q` `--query`要查询的SQL语句使用`;`做分隔符
- `-N`, `--table`数据输出的表名,默认是`table`
- `-of`, `--format`, `--output-format`输出格式化类型, 默认是`TSV`
- `--stacktrace`是否在出现异常时输出栈信息。
- `--verbose`debug显示查询的详细信息。
- `-s`禁用`stderr`输出信息
- `--config-file`与ClickHouse服务器格式相同配置文件的路径默认情况下配置为空。
- `--help``clickhouse-local`使用帮助信息。
还有每个ClickHouse配置变量的参数这些变量更常用而不是 `--config-file`.
对于每个ClickHouse配置的参数也可以单独使用可以不使用`--config-file`指定。
## 例 {#examples}
## 例 {#examples}
``` bash
echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table"
@ -49,7 +49,7 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
3 4
```
前面的例子是一样的:
另一个示例,类似上一个使用示例:
``` bash
$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table"
@ -58,7 +58,22 @@ Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec.
3 4
```
现在让我们为每个Unix用户输出内存用户:
你可以使用`stdin`或`--file`参数, 打开任意数量的文件来使用多个文件[`file` table function](../../sql-reference/table-functions/file.md):
```bash
$ echo 1 | tee 1.tsv
1
$ echo 2 | tee 2.tsv
2
$ clickhouse-local --query "
select * from file('1.tsv', TSV, 'a int') t1
cross join file('2.tsv', TSV, 'b int') t2"
1 2
```
现在让我们查询每个Unix用户使用内存:
``` bash
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"

View File

@ -43,13 +43,81 @@ else ()
${ENABLE_CLICKHOUSE_ALL})
endif ()
message(STATUS "ClickHouse modes:")
if (NOT ENABLE_CLICKHOUSE_SERVER)
message(WARNING "ClickHouse server mode is not going to be built.")
else()
message(STATUS "Server mode: ON")
endif()
if (NOT ENABLE_CLICKHOUSE_CLIENT)
message(WARNING "ClickHouse client mode is not going to be built. You won't be able to connect to the server and run
tests")
else()
message(STATUS "Client mode: ON")
endif()
if (ENABLE_CLICKHOUSE_LOCAL)
message(STATUS "Local mode: ON")
else()
message(STATUS "Local mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_BENCHMARK)
message(STATUS "Benchmark mode: ON")
else()
message(STATUS "Benchmark mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
message(STATUS "Extract from config mode: ON")
else()
message(STATUS "Extract from config mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_COMPRESSOR)
message(STATUS "Compressor mode: ON")
else()
message(STATUS "Compressor mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_COPIER)
message(STATUS "Copier mode: ON")
else()
message(STATUS "Copier mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_FORMAT)
message(STATUS "Format mode: ON")
else()
message(STATUS "Format mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_OBFUSCATOR)
message(STATUS "Obfuscator mode: ON")
else()
message(STATUS "Obfuscator mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
message(STATUS "ODBC bridge mode: ON")
else()
message(STATUS "ODBC bridge mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_INSTALL)
message(STATUS "ClickHouse install: ON")
else()
message(STATUS "ClickHouse install: OFF")
endif()
if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES))
set(CLICKHOUSE_ONE_SHARED ON)
endif()
configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h)
macro(clickhouse_target_link_split_lib target name)
if(NOT CLICKHOUSE_ONE_SHARED)
target_link_libraries(${target} PRIVATE clickhouse-${name}-lib)

View File

@ -2515,7 +2515,7 @@ public:
{
std::string traceparent = options["opentelemetry-traceparent"].as<std::string>();
std::string error;
if (!context.getClientInfo().parseTraceparentHeader(
if (!context.getClientInfo().client_trace_context.parseTraceparentHeader(
traceparent, error))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
@ -2526,7 +2526,7 @@ public:
if (options.count("opentelemetry-tracestate"))
{
context.getClientInfo().opentelemetry_tracestate =
context.getClientInfo().client_trace_context.tracestate =
options["opentelemetry-tracestate"].as<std::string>();
}

View File

@ -62,6 +62,9 @@ decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries)
{
std::exception_ptr exception;
if (max_tries == 0)
throw Exception("Cannot perform zero retries", ErrorCodes::LOGICAL_ERROR);
for (UInt64 try_number = 1; try_number <= max_tries; ++try_number)
{
try
@ -605,7 +608,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
settings_push.replication_alter_partitions_sync = 2;
query_alter_ast_string += " ALTER TABLE " + getQuotedTable(original_table) +
" ATTACH PARTITION " + partition_name +
((partition_name == "'all'") ? " ATTACH PARTITION ID " : " ATTACH PARTITION ") + partition_name +
" FROM " + getQuotedTable(helping_table);
LOG_DEBUG(log, "Executing ALTER query: {}", query_alter_ast_string);
@ -636,7 +639,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
if (!task_table.isReplicatedTable())
{
query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) +
" PARTITION " + partition_name + " DEDUPLICATE;";
((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;";
LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string);
@ -807,7 +810,7 @@ bool ClusterCopier::tryDropPartitionPiece(
DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number));
String query = "ALTER TABLE " + getQuotedTable(helping_table);
query += " DROP PARTITION " + task_partition.name + "";
query += ((task_partition.name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + task_partition.name + "";
/// TODO: use this statement after servers will be updated up to 1.1.54310
// query += " DROP PARTITION ID '" + task_partition.name + "'";
@ -1567,7 +1570,7 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT
DatabaseAndTableName original_table = task_table.table_push;
DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number));
String query = "ALTER TABLE " + getQuotedTable(helping_table) + " DROP PARTITION " + partition_name;
String query = "ALTER TABLE " + getQuotedTable(helping_table) + ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + partition_name;
const ClusterPtr & cluster_push = task_table.cluster_push;
Settings settings_push = task_cluster->settings_push;
@ -1670,14 +1673,24 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout
std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard)
{
std::set<String> res;
createShardInternalTables(timeouts, task_shard, false);
TaskTable & task_table = task_shard.task_table;
const String & partition_name = queryToString(task_table.engine_push_partition_key_ast);
if (partition_name == "'all'")
{
res.emplace("'all'");
return res;
}
String query;
{
WriteBufferFromOwnString wb;
wb << "SELECT DISTINCT " << queryToString(task_table.engine_push_partition_key_ast) << " AS partition FROM"
wb << "SELECT DISTINCT " << partition_name << " AS partition FROM"
<< " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC";
query = wb.str();
}
@ -1692,7 +1705,6 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
local_context.setSettings(task_cluster->settings_pull);
Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_ast, local_context)->execute().getInputStream());
std::set<String> res;
if (block)
{
ColumnWithTypeAndName & column = block.getByPosition(0);
@ -1803,7 +1815,7 @@ UInt64 ClusterCopier::executeQueryOnCluster(
if (execution_mode == ClusterExecutionMode::ON_EACH_NODE)
max_successful_executions_per_shard = 0;
std::atomic<size_t> origin_replicas_number;
std::atomic<size_t> origin_replicas_number = 0;
/// We need to execute query on one replica at least
auto do_for_shard = [&] (UInt64 shard_index, Settings shard_settings)

View File

@ -21,6 +21,7 @@
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/MMapReadBufferFromFile.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/copyData.h>
#include <IO/Operators.h>
@ -70,7 +71,7 @@ namespace po = boost::program_options;
namespace fs = std::filesystem;
auto executeScript(const std::string & command, bool throw_on_error = false)
static auto executeScript(const std::string & command, bool throw_on_error = false)
{
auto sh = ShellCommand::execute(command);
WriteBufferFromFileDescriptor wb_stdout(STDOUT_FILENO);
@ -87,7 +88,7 @@ auto executeScript(const std::string & command, bool throw_on_error = false)
return sh->tryWait();
}
bool ask(std::string question)
static bool ask(std::string question)
{
while (true)
{
@ -104,6 +105,16 @@ bool ask(std::string question)
}
}
static bool filesEqual(std::string path1, std::string path2)
{
MMapReadBufferFromFile in1(path1, 0);
MMapReadBufferFromFile in2(path2, 0);
/// memcmp is faster than hashing and comparing hashes
return in1.buffer().size() == in2.buffer().size()
&& 0 == memcmp(in1.buffer().begin(), in2.buffer().begin(), in1.buffer().size());
}
int mainEntryClickHouseInstall(int argc, char ** argv)
{
@ -143,57 +154,89 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary from {}, file doesn't exist",
binary_self_path.string());
fs::path binary_self_canonical_path = fs::canonical(binary_self_path);
/// Copy binary to the destination directory.
/// TODO An option to link instead of copy - useful for developers.
/// TODO Check if the binary is the same.
size_t binary_size = fs::file_size(binary_self_path);
fs::path prefix = fs::path(options["prefix"].as<std::string>());
fs::path bin_dir = prefix / fs::path(options["binary-path"].as<std::string>());
size_t available_space = fs::space(bin_dir).available;
if (available_space < binary_size)
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
fs::path main_bin_path = bin_dir / "clickhouse";
fs::path main_bin_tmp_path = bin_dir / "clickhouse.new";
fs::path main_bin_old_path = bin_dir / "clickhouse.old";
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
size_t binary_size = fs::file_size(binary_self_path);
try
bool old_binary_exists = fs::exists(main_bin_path);
bool already_installed = false;
/// Check if the binary is the same file (already installed).
if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
{
ReadBufferFromFile in(binary_self_path.string());
WriteBufferFromFile out(main_bin_tmp_path.string());
copyData(in, out);
out.sync();
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
out.finalize();
already_installed = true;
fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
}
catch (const Exception & e)
/// Check if binary has the same content.
else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
{
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
throw;
fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
main_bin_path.string());
if (filesEqual(binary_self_path.string(), main_bin_path.string()))
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
main_bin_path.string(), binary_self_canonical_path.string());
}
}
if (fs::exists(main_bin_path))
if (already_installed)
{
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
main_bin_path.string(), main_bin_old_path.string());
/// There is file exchange operation in Linux but it's not portable.
fs::rename(main_bin_path, main_bin_old_path);
if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
}
else
{
size_t available_space = fs::space(bin_dir).available;
if (available_space < binary_size)
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
fs::rename(main_bin_tmp_path, main_bin_path);
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
try
{
ReadBufferFromFile in(binary_self_path.string());
WriteBufferFromFile out(main_bin_tmp_path.string());
copyData(in, out);
out.sync();
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
out.finalize();
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
throw;
}
if (old_binary_exists)
{
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
main_bin_path.string(), main_bin_old_path.string());
/// There is file exchange operation in Linux but it's not portable.
fs::rename(main_bin_path, main_bin_old_path);
}
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
fs::rename(main_bin_tmp_path, main_bin_path);
}
/// Create symlinks.
@ -401,8 +444,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
if (!configuration->getString("users.default.password", "").empty()
|| configuration->getString("users.default.password_sha256_hex", "").empty()
|| configuration->getString("users.default.password_double_sha1_hex", "").empty())
|| !configuration->getString("users.default.password_sha256_hex", "").empty()
|| !configuration->getString("users.default.password_double_sha1_hex", "").empty())
{
has_password_for_default_user = true;
}
@ -576,7 +619,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
" This is optional. Taskstats accounting will be disabled."
" To enable taskstats accounting you may add the required capability later manually.\"",
"/tmp/test_setcap.sh", main_bin_path.string());
"/tmp/test_setcap.sh", fs::canonical(main_bin_path).string());
fmt::print(" {}\n", command);
executeScript(command);
#endif
@ -597,10 +640,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
}
}
std::string maybe_sudo;
if (getuid() != 0)
maybe_sudo = "sudo ";
std::string maybe_password;
if (has_password_for_default_user)
maybe_password = " --password";
@ -608,10 +647,19 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nStart clickhouse-server with:\n"
" {}clickhouse start\n"
" sudo clickhouse start\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
maybe_sudo, maybe_password);
maybe_password);
}
catch (const fs::filesystem_error &)
{
std::cerr << getCurrentExceptionMessage(false) << '\n';
if (getuid() != 0)
std::cerr << "\nRun with sudo.\n";
return getCurrentExceptionCode();
}
catch (...)
{
@ -783,17 +831,20 @@ namespace
return pid;
}
int stop(const fs::path & pid_file)
int stop(const fs::path & pid_file, bool force)
{
UInt64 pid = isRunning(pid_file);
if (!pid)
return 0;
if (0 == kill(pid, 15)) /// Terminate
fmt::print("Sent termination signal.\n", pid);
int signal = force ? SIGKILL : SIGTERM;
const char * signal_name = force ? "kill" : "terminate";
if (0 == kill(pid, signal))
fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid);
else
throwFromErrno("Cannot send termination signal", ErrorCodes::SYSTEM_ERROR);
throwFromErrno(fmt::format("Cannot send {} signal", signal_name), ErrorCodes::SYSTEM_ERROR);
size_t try_num = 0;
constexpr size_t num_tries = 60;
@ -869,6 +920,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
desc.add_options()
("help,h", "produce help message")
("pid-path", po::value<std::string>()->default_value("/var/run/clickhouse-server"), "directory for pid file")
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
;
po::variables_map options;
@ -887,7 +939,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
{
fs::path pid_file = fs::path(options["pid-path"].as<std::string>()) / "clickhouse-server.pid";
return stop(pid_file);
return stop(pid_file, options["force"].as<bool>());
}
catch (...)
{
@ -940,6 +992,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
("config-path", po::value<std::string>()->default_value("/etc/clickhouse-server"), "directory with configs")
("pid-path", po::value<std::string>()->default_value("/var/run/clickhouse-server"), "directory for pid file")
("user", po::value<std::string>()->default_value("clickhouse"), "clickhouse user")
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
;
po::variables_map options;
@ -962,7 +1015,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
fs::path config = fs::path(options["config-path"].as<std::string>()) / "config.xml";
fs::path pid_file = fs::path(options["pid-path"].as<std::string>()) / "clickhouse-server.pid";
if (int res = stop(pid_file))
if (int res = stop(pid_file, options["force"].as<bool>()))
return res;
return start(user, executable, config, pid_file);
}

View File

@ -109,6 +109,14 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options)
.argument("err-log-path")
.binding("logger.errorlog"));
options.addOption(Poco::Util::Option("stdout-path", "", "stdout log path, default console")
.argument("stdout-path")
.binding("logger.stdout"));
options.addOption(Poco::Util::Option("stderr-path", "", "stderr log path, default console")
.argument("stderr-path")
.binding("logger.stderr"));
using Me = std::decay_t<decltype(*this)>;
options.addOption(Poco::Util::Option("help", "", "produce this help message")
.binding("help")
@ -127,6 +135,27 @@ void ODBCBridge::initialize(Application & self)
config().setString("logger", "ODBCBridge");
/// Redirect stdout, stderr to specified files.
/// Some libraries and sanitizers write to stderr in case of errors.
const auto stdout_path = config().getString("logger.stdout", "");
if (!stdout_path.empty())
{
if (!freopen(stdout_path.c_str(), "a+", stdout))
throw Poco::OpenFileException("Cannot attach stdout to " + stdout_path);
/// Disable buffering for stdout.
setbuf(stdout, nullptr);
}
const auto stderr_path = config().getString("logger.stderr", "");
if (!stderr_path.empty())
{
if (!freopen(stderr_path.c_str(), "a+", stderr))
throw Poco::OpenFileException("Cannot attach stderr to " + stderr_path);
/// Disable buffering for stderr.
setbuf(stderr, nullptr);
}
buildLoggers(config(), logger(), self.commandName());
BaseDaemon::logRevision();

View File

@ -64,6 +64,7 @@
#include <Common/ThreadFuzzer.h>
#include <Server/MySQLHandlerFactory.h>
#include <Server/PostgreSQLHandlerFactory.h>
#include <Server/ProtocolServerAdapter.h>
#if !defined(ARCADIA_BUILD)
@ -84,6 +85,11 @@
# include <Poco/Net/SecureServerSocket.h>
#endif
#if USE_GRPC
# include <Server/GRPCServer.h>
#endif
namespace CurrentMetrics
{
extern const Metric Revision;
@ -806,7 +812,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
http_params->setTimeout(settings.http_receive_timeout);
http_params->setKeepAliveTimeout(keep_alive_timeout);
std::vector<std::unique_ptr<Poco::Net::TCPServer>> servers;
std::vector<ProtocolServerAdapter> servers;
std::vector<std::string> listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host");
@ -1035,6 +1041,15 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "Listening for PostgreSQL compatibility protocol: " + address.toString());
});
#if USE_GRPC
create_server("grpc_port", [&](UInt16 port)
{
Poco::Net::SocketAddress server_address(listen_host, port);
servers.emplace_back(std::make_unique<GRPCServer>(*this, make_socket_address(listen_host, port)));
LOG_INFO(log, "Listening for gRPC protocol: " + server_address.toString());
});
#endif
/// Prometheus (if defined and not setup yet with http_port)
create_server("prometheus.port", [&](UInt16 port)
{
@ -1056,7 +1071,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->enableNamedSessions();
for (auto & server : servers)
server->start();
server.start();
{
String level_str = config().getString("text_log.level", "");
@ -1088,8 +1103,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
int current_connections = 0;
for (auto & server : servers)
{
server->stop();
current_connections += server->currentConnections();
server.stop();
current_connections += server.currentConnections();
}
if (current_connections)
@ -1109,7 +1124,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
{
current_connections = 0;
for (auto & server : servers)
current_connections += server->currentConnections();
current_connections += server.currentConnections();
if (!current_connections)
break;
sleep_current_ms += sleep_one_ms;

View File

@ -134,6 +134,34 @@
<max_connections>4096</max_connections>
<keep_alive_timeout>3</keep_alive_timeout>
<!-- gRPC protocol (see src/Server/grpc_protos/clickhouse_grpc.proto for the API)
<grpc_port>9100</grpc_port>
<grpc>
<enable_ssl>true</enable_ssl> -->
<!-- The following two files are used only if enable_ssl=1
<ssl_cert_file>/path/to/ssl_cert_file</ssl_cert_file>
<ssl_key_file>/path/to/ssl_key_file</ssl_key_file> -->
<!-- Whether server will request client for a certificate
<ssl_require_client_auth>true</ssl_require_client_auth> -->
<!-- The following file is used only if ssl_require_client_auth=1
<ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file> -->
<!-- Default compression algorithm (applied if client doesn't specify another algorithm).
Supported algorithms: none, deflate, gzip, stream_gzip
<compression>gzip</compression> -->
<!-- Default compression level (applied if client doesn't specify another level).
Supported levels: none, low, medium, high
<compression_level>high</compression_level> -->
<!-- Send/receive message size limits in bytes. -1 means unlimited
<max_send_message_size>-1</max_send_message_size>
<max_receive_message_size>4194304</max_receive_message_size>
</grpc> -->
<!-- Maximum number of concurrent queries. -->
<max_concurrent_queries>100</max_concurrent_queries>
@ -678,7 +706,7 @@
<!-- Configuration of external dictionaries. See:
https://clickhouse.yandex/docs/en/dicts/external_dicts/
https://clickhouse.tech/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts
-->
<dictionaries_config>*_dictionary.xml</dictionaries_config>

View File

@ -1,3 +1,4 @@
#include <memory>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionAvg.h>
#include <AggregateFunctions/Helpers.h>
@ -13,43 +14,37 @@ namespace ErrorCodes
namespace
{
template <typename T>
struct Avg
bool allowType(const DataTypePtr& type) noexcept
{
using FieldType = std::conditional_t<IsDecimalNumber<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
NearestFieldType<T>>;
// using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType, UInt64>>;
};
template <typename T>
using AggregateFuncAvg = typename Avg<T>::Function;
const WhichDataType t(type);
return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
}
AggregateFunctionPtr createAggregateFunctionAvg(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
AggregateFunctionPtr res;
DataTypePtr data_type = argument_types[0];
if (isDecimal(data_type))
res.reset(createWithDecimalType<AggregateFuncAvg>(*data_type, *data_type, argument_types));
else
res.reset(createWithNumericType<AggregateFuncAvg>(*data_type, argument_types));
const DataTypePtr& data_type = argument_types[0];
if (!allowType(data_type))
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
AggregateFunctionPtr res;
if (isDecimal(data_type))
res.reset(createWithDecimalType<AggregateFunctionAvg>(
*data_type, argument_types, getDecimalScale(*data_type)));
else
res.reset(createWithNumericType<AggregateFunctionAvg>(*data_type, argument_types));
if (!res)
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
}
void registerAggregateFunctionAvg(AggregateFunctionFactory & factory)
{
factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -1,78 +1,102 @@
#pragma once
#include <type_traits>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include "Core/DecimalFunctions.h"
namespace DB
{
namespace ErrorCodes
{
}
template <class T>
using DecimalOrVectorCol = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
template <typename T, typename Denominator>
struct AggregateFunctionAvgData
{
using NumeratorType = T;
using DenominatorType = Denominator;
template <class T> constexpr bool DecimalOrExtendedInt =
IsDecimalNumber<T>
|| std::is_same_v<T, Int128>
|| std::is_same_v<T, Int256>
|| std::is_same_v<T, UInt128>
|| std::is_same_v<T, UInt256>;
T numerator{0};
/**
* Helper class to encapsulate values conversion for avg and avgWeighted.
*/
template <class Numerator, class Denominator>
struct AvgFraction
{
Numerator numerator{0};
Denominator denominator{0};
template <typename ResultT>
ResultT NO_SANITIZE_UNDEFINED result() const
/// Allow division by zero as sometimes we need to return NaN.
/// Invoked only is either Numerator or Denominator are Decimal.
Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale) const
{
if constexpr (std::is_floating_point_v<ResultT>)
if constexpr (std::numeric_limits<ResultT>::is_iec559)
{
if constexpr (is_big_int_v<Denominator>)
return static_cast<ResultT>(numerator) / static_cast<ResultT>(denominator);
else
return static_cast<ResultT>(numerator) / denominator; /// allow division by zero
}
if constexpr (IsDecimalNumber<Numerator> && IsDecimalNumber<Denominator>)
{
// According to the docs, num(S1) / denom(S2) would have scale S1
if (denominator == static_cast<Denominator>(0))
return static_cast<ResultT>(0);
if constexpr (std::is_same_v<Numerator, Decimal256> && std::is_same_v<Denominator, Decimal128>)
///Special case as Decimal256 / Decimal128 = compile error (as Decimal128 is not parametrized by a wide
///int), but an __int128 instead
return DecimalUtils::convertTo<Float64>(
numerator / (denominator.template convertTo<Decimal256>()), num_scale);
else
return DecimalUtils::convertTo<Float64>(numerator / denominator, num_scale);
}
if constexpr (std::is_same_v<T, Decimal256>)
return static_cast<ResultT>(numerator / static_cast<T>(denominator));
/// Numerator is always casted to Float64 to divide correctly if the denominator is not Float64.
Float64 num_converted;
if constexpr (IsDecimalNumber<Numerator>)
num_converted = DecimalUtils::convertTo<Float64>(numerator, num_scale);
else
return static_cast<ResultT>(numerator / denominator);
num_converted = static_cast<Float64>(numerator); /// all other types, including extended integral.
std::conditional_t<DecimalOrExtendedInt<Denominator>,
Float64, Denominator> denom_converted;
if constexpr (IsDecimalNumber<Denominator>)
denom_converted = DecimalUtils::convertTo<Float64>(denominator, denom_scale);
else if constexpr (DecimalOrExtendedInt<Denominator>)
/// no way to divide Float64 and extended integral type without an explicit cast.
denom_converted = static_cast<Float64>(denominator);
else
denom_converted = denominator; /// can divide on float, no cast required.
return num_converted / denom_converted;
}
Float64 NO_SANITIZE_UNDEFINED divide() const
{
if constexpr (DecimalOrExtendedInt<Denominator>) /// if extended int
return static_cast<Float64>(numerator) / static_cast<Float64>(denominator);
else
return static_cast<Float64>(numerator) / denominator;
}
};
/// Calculates arithmetic mean of numbers.
template <typename T, typename Data, typename Derived>
class AggregateFunctionAvgBase : public IAggregateFunctionDataHelper<Data, Derived>
/**
* @tparam Derived When deriving from this class, use the child class name as in CRTP, e.g.
* class Self : Agg<char, bool, bool, Self>.
*/
template <class Numerator, class Denominator, class Derived>
class AggregateFunctionAvgBase : public
IAggregateFunctionDataHelper<AvgFraction<Numerator, Denominator>, Derived>
{
public:
using ResultType = std::conditional_t<IsDecimalNumber<T>, T, Float64>;
using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<T>, DataTypeNumber<Float64>>;
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<Float64>>;
using Fraction = AvgFraction<Numerator, Denominator>;
using Base = IAggregateFunctionDataHelper<Fraction, Derived>;
/// ctor for native types
AggregateFunctionAvgBase(const DataTypes & argument_types_) : IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(0) {}
explicit AggregateFunctionAvgBase(const DataTypes & argument_types_,
UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
: Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {}
/// ctor for Decimals
AggregateFunctionAvgBase(const IDataType & data_type, const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(getDecimalScale(data_type))
{
}
DataTypePtr getReturnType() const override
{
if constexpr (IsDecimalNumber<T>)
return std::make_shared<ResultDataType>(ResultDataType::maxPrecision(), scale);
else
return std::make_shared<ResultDataType>();
}
DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
@ -84,7 +108,7 @@ public:
{
writeBinary(this->data(place).numerator, buf);
if constexpr (std::is_unsigned_v<typename Data::DenominatorType>)
if constexpr (std::is_unsigned_v<Denominator>)
writeVarUInt(this->data(place).denominator, buf);
else /// Floating point denominator type can be used
writeBinary(this->data(place).denominator, buf);
@ -94,7 +118,7 @@ public:
{
readBinary(this->data(place).numerator, buf);
if constexpr (std::is_unsigned_v<typename Data::DenominatorType>)
if constexpr (std::is_unsigned_v<Denominator>)
readVarUInt(this->data(place).denominator, buf);
else /// Floating point denominator type can be used
readBinary(this->data(place).denominator, buf);
@ -102,29 +126,34 @@ public:
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{
auto & column = static_cast<ColVecResult &>(to);
column.getData().push_back(this->data(place).template result<ResultType>());
if constexpr (IsDecimalNumber<Numerator> || IsDecimalNumber<Denominator>)
static_cast<ColumnVector<Float64> &>(to).getData().push_back(
this->data(place).divideIfAnyDecimal(num_scale, denom_scale));
else
static_cast<ColumnVector<Float64> &>(to).getData().push_back(this->data(place).divide());
}
protected:
UInt32 scale;
private:
UInt32 num_scale;
UInt32 denom_scale;
};
template <typename T, typename Data>
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>
template <class T>
using AvgFieldType = std::conditional_t<IsDecimalNumber<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
NearestFieldType<T>>;
template <class T>
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>
{
public:
using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>::AggregateFunctionAvgBase;
using AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>::AggregateFunctionAvgBase;
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const final
{
const auto & column = static_cast<const ColVecType &>(*columns[0]);
this->data(place).numerator += column.getData()[row_num];
this->data(place).denominator += 1;
this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
++this->data(place).denominator;
}
String getName() const override { return "avg"; }
String getName() const final { return "avg"; }
};
}

View File

@ -1,3 +1,5 @@
#include <memory>
#include <type_traits>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionAvgWeighted.h>
#include <AggregateFunctions/Helpers.h>
@ -13,47 +15,91 @@ namespace ErrorCodes
namespace
{
template <typename T>
struct AvgWeighted
bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
{
using FieldType = std::conditional_t<IsDecimalNumber<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
NearestFieldType<T>>;
// using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using Function = AggregateFunctionAvgWeighted<T, AggregateFunctionAvgData<FieldType, FieldType>>;
};
const WhichDataType l_dt(left), r_dt(right);
template <typename T>
using AggregateFuncAvgWeighted = typename AvgWeighted<T>::Function;
constexpr auto allow = [](WhichDataType t)
{
return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
};
return allow(l_dt) && allow(r_dt);
}
#define AT_SWITCH(LINE) \
switch (which.idx) \
{ \
LINE(Int8); LINE(Int16); LINE(Int32); LINE(Int64); LINE(Int128); LINE(Int256); \
LINE(UInt8); LINE(UInt16); LINE(UInt32); LINE(UInt64); LINE(UInt128); LINE(UInt256); \
LINE(Decimal32); LINE(Decimal64); LINE(Decimal128); LINE(Decimal256); \
LINE(Float32); LINE(Float64); \
default: return nullptr; \
}
template <class First, class ... TArgs>
static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
{
const WhichDataType which(second_type);
#define LINE(Type) \
case TypeIndex::Type: return new AggregateFunctionAvgWeighted<First, Type>(std::forward<TArgs>(args)...)
AT_SWITCH(LINE)
#undef LINE
}
// Not using helper functions because there are no templates for binary decimal/numeric function.
template <class... TArgs>
static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
{
const WhichDataType which(first_type);
#define LINE(Type) \
case TypeIndex::Type: return create<Type, TArgs...>(second_type, std::forward<TArgs>(args)...)
AT_SWITCH(LINE)
#undef LINE
}
AggregateFunctionPtr createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertNoParameters(name, parameters);
assertBinary(name, argument_types);
AggregateFunctionPtr res;
const auto data_type = static_cast<const DataTypePtr>(argument_types[0]);
const auto data_type_weight = static_cast<const DataTypePtr>(argument_types[1]);
if (!data_type->equals(*data_type_weight))
throw Exception("Different types " + data_type->getName() + " and " + data_type_weight->getName() + " of arguments for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (isDecimal(data_type))
res.reset(createWithDecimalType<AggregateFuncAvgWeighted>(*data_type, *data_type, argument_types));
if (!allowTypes(data_type, data_type_weight))
throw Exception(
"Types " + data_type->getName() +
" and " + data_type_weight->getName() +
" are non-conforming as arguments for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
AggregateFunctionPtr ptr;
const bool left_decimal = isDecimal(data_type);
const bool right_decimal = isDecimal(data_type_weight);
if (left_decimal && right_decimal)
ptr.reset(create(*data_type, *data_type_weight,
argument_types,
getDecimalScale(*data_type), getDecimalScale(*data_type_weight)));
else if (left_decimal)
ptr.reset(create(*data_type, *data_type_weight, argument_types,
getDecimalScale(*data_type)));
else if (right_decimal)
ptr.reset(create(*data_type, *data_type_weight, argument_types,
// numerator is not decimal, so its scale is 0
0, getDecimalScale(*data_type_weight)));
else
res.reset(createWithNumericType<AggregateFuncAvgWeighted>(*data_type, argument_types));
ptr.reset(create(*data_type, *data_type_weight, argument_types));
if (!res)
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
return ptr;
}
}
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory & factory)
{
factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted, AggregateFunctionFactory::CaseSensitive);
}
}

View File

@ -1,26 +1,44 @@
#pragma once
#include <type_traits>
#include <AggregateFunctions/AggregateFunctionAvg.h>
namespace DB
{
template <typename T, typename Data>
class AggregateFunctionAvgWeighted final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>
template <class T>
using AvgWeightedFieldType = std::conditional_t<IsDecimalNumber<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
std::conditional_t<DecimalOrExtendedInt<T>,
Float64, // no way to do UInt128 * UInt128, better cast to Float64
NearestFieldType<T>>>;
template <class T, class U>
using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType<T>) > sizeof(AvgWeightedFieldType<U>)),
AvgWeightedFieldType<T>, AvgWeightedFieldType<U>>;
template <class Value, class Weight>
class AggregateFunctionAvgWeighted final :
public AggregateFunctionAvgBase<
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>
{
public:
using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>::AggregateFunctionAvgBase;
using Base = AggregateFunctionAvgBase<
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
using Base::Base;
using ValueT = MaxFieldType<Value, Weight>;
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto & values = static_cast<const ColVecType &>(*columns[0]);
const auto & weights = static_cast<const ColVecType &>(*columns[1]);
const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);
this->data(place).numerator += static_cast<typename Data::NumeratorType>(values.getData()[row_num]) * weights.getData()[row_num];
this->data(place).denominator += weights.getData()[row_num];
this->data(place).numerator += static_cast<ValueT>(
static_cast<const DecimalOrVectorCol<Value> &>(*columns[0]).getData()[row_num]) *
static_cast<ValueT>(weights.getData()[row_num]);
this->data(place).denominator += static_cast<AvgWeightedFieldType<Weight>>(weights.getData()[row_num]);
}
String getName() const override { return "avgWeighted"; }
};
}

View File

@ -21,7 +21,8 @@ class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
using DataTypes = std::vector<DataTypePtr>;
/** Creator have arguments: name of aggregate function, types of arguments, values of parameters.
/**
* The invoker has arguments: name of aggregate function, types of arguments, values of parameters.
* Parameters are for "parametric" aggregate functions.
* For example, in quantileWeighted(0.9)(x, weight), 0.9 is "parameter" and x, weight are "arguments".
*/
@ -87,7 +88,6 @@ private:
std::optional<AggregateFunctionProperties> tryGetPropertiesImpl(const String & name) const;
private:
using AggregateFunctions = std::unordered_map<String, Value>;
AggregateFunctions aggregate_functions;

View File

@ -0,0 +1,37 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionMannWhitney.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include "registerAggregateFunctions.h"
#include <AggregateFunctions/Helpers.h>
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace DB
{
namespace
{
AggregateFunctionPtr createAggregateFunctionMannWhitneyUTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertBinary(name, argument_types);
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
return std::make_shared<AggregateFunctionMannWhitney>(argument_types, parameters);
}
}
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory & factory)
{
factory.registerFunction("mannWhitneyUTest", createAggregateFunctionMannWhitneyUTest);
}
}

View File

@ -0,0 +1,246 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/StatCommon.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <Common/assert_cast.h>
#include <Common/FieldVisitors.h>
#include <Common/PODArray_fwd.h>
#include <common/types.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <limits>
#include <DataTypes/DataTypeArray.h>
#include <Common/ArenaAllocator.h>
#include <iostream>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
struct MannWhitneyData : public StatisticalSample<Float64, Float64>
{
/*Since null hypothesis is "for randomly selected values X and Y from two populations,
*the probability of X being greater than Y is equal to the probability of Y being greater than X".
*Or "the distribution F of first sample equals to the distribution G of second sample".
*Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */
enum class Alternative
{
TwoSided,
Less,
Greater
};
/// The behaviour equals to the similar function from scipy.
/// https://github.com/scipy/scipy/blob/ab9e9f17e0b7b2d618c4d4d8402cd4c0c200d6c0/scipy/stats/stats.py#L6978
std::pair<Float64, Float64> getResult(Alternative alternative, bool continuity_correction)
{
ConcatenatedSamples both(this->x, this->y);
RanksArray ranks;
Float64 tie_correction;
/// Compute ranks according to both samples.
std::tie(ranks, tie_correction) = computeRanksAndTieCorrection(both);
const Float64 n1 = this->size_x;
const Float64 n2 = this->size_y;
Float64 r1 = 0;
for (size_t i = 0; i < n1; ++i)
r1 += ranks[i];
const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1;
const Float64 u2 = n1 * n2 - u1;
/// The distribution of U-statistic under null hypothesis H0 is symmetric with respect to meanrank.
const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction;
const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0);
Float64 u = 0;
if (alternative == Alternative::TwoSided)
/// There is no difference which u_i to take as u, because z will be differ only in sign and we take std::abs() from it.
u = std::max(u1, u2);
else if (alternative == Alternative::Less)
u = u1;
else if (alternative == Alternative::Greater)
u = u2;
Float64 z = (u - meanrank) / sd;
if (alternative == Alternative::TwoSided)
z = std::abs(z);
/// In fact cdf is a probability function, so it is intergral of density from (-inf, z].
/// But since standard normal distribution is symmetric, cdf(0) = 0.5 and we have to compute integral from [0, z].
const Float64 cdf = integrateSimpson(0, z, [] (Float64 t) { return std::pow(M_E, -0.5 * t * t) / std::sqrt(2 * M_PI);});
Float64 p_value = 0;
if (alternative == Alternative::TwoSided)
p_value = 1 - 2 * cdf;
else
p_value = 0.5 - cdf;
return {u2, p_value};
}
private:
using Sample = typename StatisticalSample<Float64, Float64>::SampleX;
/// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
class ConcatenatedSamples
{
public:
ConcatenatedSamples(const Sample & first_, const Sample & second_)
: first(first_), second(second_) {}
const Float64 & operator[](size_t ind) const
{
if (ind < first.size())
return first[ind];
return second[ind % first.size()];
}
size_t size() const
{
return first.size() + second.size();
}
private:
const Sample & first;
const Sample & second;
};
};
class AggregateFunctionMannWhitney final:
public IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney>
{
private:
using Alternative = typename MannWhitneyData::Alternative;
Alternative alternative;
bool continuity_correction{true};
public:
explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params)
:IAggregateFunctionDataHelper<MannWhitneyData, AggregateFunctionMannWhitney> ({arguments}, {})
{
if (params.size() > 2)
throw Exception("Aggregate function " + getName() + " require two parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (params.empty())
{
alternative = Alternative::TwoSided;
return;
}
if (params[0].getType() != Field::Types::String)
throw Exception("Aggregate function " + getName() + " require require first parameter to be a String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto param = params[0].get<String>();
if (param == "two-sided")
alternative = Alternative::TwoSided;
else if (param == "less")
alternative = Alternative::Less;
else if (param == "greater")
alternative = Alternative::Greater;
else
throw Exception("Unknown parameter in aggregate function " + getName() +
". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
if (params.size() != 2)
return;
if (params[1].getType() != Field::Types::UInt64)
throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
continuity_correction = static_cast<bool>(params[1].get<UInt64>());
}
String getName() const override
{
return "mannWhitneyUTest";
}
DataTypePtr getReturnType() const override
{
DataTypes types
{
std::make_shared<DataTypeNumber<Float64>>(),
std::make_shared<DataTypeNumber<Float64>>(),
};
Strings names
{
"u_statistic",
"p_value"
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
Float64 value = columns[0]->getFloat64(row_num);
UInt8 is_second = columns[1]->getUInt(row_num);
if (is_second)
this->data(place).addY(value, arena);
else
this->data(place).addX(value, arena);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & a = this->data(place);
auto & b = this->data(rhs);
a.merge(b, arena);
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).write(buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
{
this->data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{
if (!this->data(place).size_x || !this->data(place).size_y)
throw Exception("Aggregate function " + getName() + " require both samples to be non empty", ErrorCodes::BAD_ARGUMENTS);
auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction);
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
column_stat.getData().push_back(u_statistic);
column_value.getData().push_back(p_value);
}
};
};

View File

@ -21,23 +21,10 @@ AggregateFunctionPtr createAggregateFunctionRankCorrelation(const std::string &
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
AggregateFunctionPtr res;
if (isDecimal(argument_types[0]) || isDecimal(argument_types[1]))
{
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
}
else
{
res.reset(createWithTwoNumericTypes<AggregateFunctionRankCorrelation>(*argument_types[0], *argument_types[1], argument_types));
}
if (!res)
{
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
}
return res;
return std::make_shared<AggregateFunctionRankCorrelation>(argument_types);
}
}

View File

@ -1,73 +1,56 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/StatCommon.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <Common/assert_cast.h>
#include <Common/FieldVisitors.h>
#include <Common/PODArray_fwd.h>
#include <common/types.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <limits>
#include <DataTypes/DataTypeArray.h>
#include <Common/ArenaAllocator.h>
#include <type_traits>
namespace DB
{
template <template <typename> class Comparator>
struct ComparePairFirst final
struct RankCorrelationData : public StatisticalSample<Float64, Float64>
{
template <typename X, typename Y>
bool operator()(const std::pair<X, Y> & lhs, const std::pair<X, Y> & rhs) const
Float64 getResult()
{
return Comparator<X>{}(lhs.first, rhs.first);
RanksArray ranks_x;
std::tie(ranks_x, std::ignore) = computeRanksAndTieCorrection(this->x);
RanksArray ranks_y;
std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
/// In our case sizes of both samples are equal.
const auto size = this->size_x;
/// Count d^2 sum
Float64 answer = 0;
for (size_t j = 0; j < size; ++j)
answer += (ranks_x[j] - ranks_y[j]) * (ranks_x[j] - ranks_y[j]);
answer *= 6;
answer /= size * (size * size - 1);
answer = 1 - answer;
return answer;
}
};
template <template <typename> class Comparator>
struct ComparePairSecond final
{
template <typename X, typename Y>
bool operator()(const std::pair<X, Y> & lhs, const std::pair<X, Y> & rhs) const
{
return Comparator<Y>{}(lhs.second, rhs.second);
}
};
template <typename X = Float64, typename Y = Float64>
struct AggregateFunctionRankCorrelationData final
{
size_t size_x = 0;
using Allocator = MixedAlignedArenaAllocator<alignof(std::pair<X, Y>), 4096>;
using Array = PODArray<std::pair<X, Y>, 32, Allocator>;
Array values;
};
template <typename X, typename Y>
class AggregateFunctionRankCorrelation :
public IAggregateFunctionDataHelper<AggregateFunctionRankCorrelationData<X, Y>, AggregateFunctionRankCorrelation<X, Y>>
public IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation>
{
using Data = AggregateFunctionRankCorrelationData<X, Y>;
using Allocator = MixedAlignedArenaAllocator<alignof(std::pair<Float64, Float64>), 4096>;
using Array = PODArray<std::pair<Float64, Float64>, 32, Allocator>;
public:
explicit AggregateFunctionRankCorrelation(const DataTypes & arguments)
:IAggregateFunctionDataHelper<AggregateFunctionRankCorrelationData<X, Y>,AggregateFunctionRankCorrelation<X, Y>> ({arguments}, {})
:IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation> ({arguments}, {})
{}
String getName() const override
@ -80,24 +63,12 @@ public:
return std::make_shared<DataTypeNumber<Float64>>();
}
void insert(Data & a, const std::pair<X, Y> & x, Arena * arena) const
{
++a.size_x;
a.values.push_back(x, arena);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & a = this->data(place);
auto new_x = assert_cast<const ColumnVector<X> &>(*columns[0]).getData()[row_num];
auto new_y = assert_cast<const ColumnVector<Y> &>(*columns[1]).getData()[row_num];
auto new_arg = std::make_pair(new_x, new_y);
a.size_x += 1;
a.values.push_back(new_arg, arena);
Float64 new_x = columns[0]->getFloat64(row_num);
Float64 new_y = columns[1]->getFloat64(row_num);
this->data(place).addX(new_x, arena);
this->data(place).addY(new_y, arena);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
@ -105,116 +76,22 @@ public:
auto & a = this->data(place);
auto & b = this->data(rhs);
if (b.size_x)
for (size_t i = 0; i < b.size_x; ++i)
insert(a, b.values[i], arena);
a.merge(b, arena);
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
const auto & value = this->data(place).values;
size_t size = this->data(place).size_x;
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
this->data(place).write(buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
auto & value = this->data(place).values;
value.resize(size, arena);
buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
this->data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{
const auto & value = this->data(place).values;
size_t size = this->data(place).size_x;
// create a copy of values not to format data
PODArrayWithStackMemory<std::pair<Float64, Float64>, 32> tmp_values;
tmp_values.resize(size);
for (size_t j = 0; j < size; ++ j)
tmp_values[j] = static_cast<std::pair<Float64, Float64>>(value[j]);
// sort x_values
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairFirst<std::greater>{});
for (size_t j = 0; j < size;)
{
// replace x_values with their ranks
size_t rank = j + 1;
size_t same = 1;
size_t cur_sum = rank;
size_t cur_start = j;
while (j < size - 1)
{
if (tmp_values[j].first == tmp_values[j + 1].first)
{
// rank of (j + 1)th number
rank += 1;
++same;
cur_sum += rank;
++j;
}
else
break;
}
// insert rank is calculated as average of ranks of equal values
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
for (size_t i = cur_start; i <= j; ++i)
tmp_values[i].first = insert_rank;
++j;
}
// sort y_values
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairSecond<std::greater>{});
// replace y_values with their ranks
for (size_t j = 0; j < size;)
{
// replace x_values with their ranks
size_t rank = j + 1;
size_t same = 1;
size_t cur_sum = rank;
size_t cur_start = j;
while (j < size - 1)
{
if (tmp_values[j].second == tmp_values[j + 1].second)
{
// rank of (j + 1)th number
rank += 1;
++same;
cur_sum += rank;
++j;
}
else
{
break;
}
}
// insert rank is calculated as average of ranks of equal values
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
for (size_t i = cur_start; i <= j; ++i)
tmp_values[i].second = insert_rank;
++j;
}
// count d^2 sum
Float64 answer = static_cast<Float64>(0);
for (size_t j = 0; j < size; ++ j)
answer += (tmp_values[j].first - tmp_values[j].second) * (tmp_values[j].first - tmp_values[j].second);
answer *= 6;
answer /= size * (size * size - 1);
answer = 1 - answer;
auto answer = this->data(place).getResult();
auto & column = static_cast<ColumnVector<Float64> &>(to);
column.getData().push_back(answer);

View File

@ -8,6 +8,7 @@
#include <IO/ReadHelpers.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/Moments.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
@ -30,310 +31,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int DECIMAL_OVERFLOW;
}
/**
Calculating univariate central moments
Levels:
level 2 (pop & samp): var, stddev
level 3: skewness
level 4: kurtosis
References:
https://en.wikipedia.org/wiki/Moment_(mathematics)
https://en.wikipedia.org/wiki/Skewness
https://en.wikipedia.org/wiki/Kurtosis
*/
template <typename T, size_t _level>
struct VarMoments
{
T m[_level + 1]{};
void add(T x)
{
++m[0];
m[1] += x;
m[2] += x * x;
if constexpr (_level >= 3) m[3] += x * x * x;
if constexpr (_level >= 4) m[4] += x * x * x * x;
}
void merge(const VarMoments & rhs)
{
m[0] += rhs.m[0];
m[1] += rhs.m[1];
m[2] += rhs.m[2];
if constexpr (_level >= 3) m[3] += rhs.m[3];
if constexpr (_level >= 4) m[4] += rhs.m[4];
}
void write(WriteBuffer & buf) const
{
writePODBinary(*this, buf);
}
void read(ReadBuffer & buf)
{
readPODBinary(*this, buf);
}
T getPopulation() const
{
if (m[0] == 0)
return std::numeric_limits<T>::quiet_NaN();
/// Due to numerical errors, the result can be slightly less than zero,
/// but it should be impossible. Trim to zero.
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / m[0]);
}
T getSample() const
{
if (m[0] <= 1)
return std::numeric_limits<T>::quiet_NaN();
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / (m[0] - 1));
}
T getMoment3() const
{
if (m[0] == 0)
return std::numeric_limits<T>::quiet_NaN();
// to avoid accuracy problem
if (m[0] == 1)
return 0;
return (m[3]
- (3 * m[2]
- 2 * m[1] * m[1] / m[0]
) * m[1] / m[0]
) / m[0];
}
T getMoment4() const
{
if (m[0] == 0)
return std::numeric_limits<T>::quiet_NaN();
// to avoid accuracy problem
if (m[0] == 1)
return 0;
return (m[4]
- (4 * m[3]
- (6 * m[2]
- 3 * m[1] * m[1] / m[0]
) * m[1] / m[0]
) * m[1] / m[0]
) / m[0];
}
};
template <typename T, size_t _level>
class VarMomentsDecimal
{
public:
using NativeType = typename T::NativeType;
void add(NativeType x)
{
++m0;
getM(1) += x;
NativeType tmp;
bool overflow = common::mulOverflow(x, x, tmp) || common::addOverflow(getM(2), tmp, getM(2));
if constexpr (_level >= 3)
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(3), tmp, getM(3));
if constexpr (_level >= 4)
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(4), tmp, getM(4));
if (overflow)
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
}
void merge(const VarMomentsDecimal & rhs)
{
m0 += rhs.m0;
getM(1) += rhs.getM(1);
bool overflow = common::addOverflow(getM(2), rhs.getM(2), getM(2));
if constexpr (_level >= 3)
overflow = overflow || common::addOverflow(getM(3), rhs.getM(3), getM(3));
if constexpr (_level >= 4)
overflow = overflow || common::addOverflow(getM(4), rhs.getM(4), getM(4));
if (overflow)
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
}
void write(WriteBuffer & buf) const { writePODBinary(*this, buf); }
void read(ReadBuffer & buf) { readPODBinary(*this, buf); }
Float64 getPopulation(UInt32 scale) const
{
if (m0 == 0)
return std::numeric_limits<Float64>::infinity();
NativeType tmp;
if (common::mulOverflow(getM(1), getM(1), tmp) ||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / m0), scale));
}
Float64 getSample(UInt32 scale) const
{
if (m0 == 0)
return std::numeric_limits<Float64>::quiet_NaN();
if (m0 == 1)
return std::numeric_limits<Float64>::infinity();
NativeType tmp;
if (common::mulOverflow(getM(1), getM(1), tmp) ||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / (m0 - 1)), scale));
}
Float64 getMoment3(UInt32 scale) const
{
if (m0 == 0)
return std::numeric_limits<Float64>::infinity();
NativeType tmp;
if (common::mulOverflow(2 * getM(1), getM(1), tmp) ||
common::subOverflow(3 * getM(2), NativeType(tmp / m0), tmp) ||
common::mulOverflow(tmp, getM(1), tmp) ||
common::subOverflow(getM(3), NativeType(tmp / m0), tmp))
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
}
Float64 getMoment4(UInt32 scale) const
{
if (m0 == 0)
return std::numeric_limits<Float64>::infinity();
NativeType tmp;
if (common::mulOverflow(3 * getM(1), getM(1), tmp) ||
common::subOverflow(6 * getM(2), NativeType(tmp / m0), tmp) ||
common::mulOverflow(tmp, getM(1), tmp) ||
common::subOverflow(4 * getM(3), NativeType(tmp / m0), tmp) ||
common::mulOverflow(tmp, getM(1), tmp) ||
common::subOverflow(getM(4), NativeType(tmp / m0), tmp))
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
}
private:
UInt64 m0{};
NativeType m[_level]{};
NativeType & getM(size_t i) { return m[i - 1]; }
const NativeType & getM(size_t i) const { return m[i - 1]; }
};
/**
Calculating multivariate central moments
Levels:
level 2 (pop & samp): covar
References:
https://en.wikipedia.org/wiki/Moment_(mathematics)
*/
template <typename T>
struct CovarMoments
{
T m0{};
T x1{};
T y1{};
T xy{};
void add(T x, T y)
{
++m0;
x1 += x;
y1 += y;
xy += x * y;
}
void merge(const CovarMoments & rhs)
{
m0 += rhs.m0;
x1 += rhs.x1;
y1 += rhs.y1;
xy += rhs.xy;
}
void write(WriteBuffer & buf) const
{
writePODBinary(*this, buf);
}
void read(ReadBuffer & buf)
{
readPODBinary(*this, buf);
}
T NO_SANITIZE_UNDEFINED getPopulation() const
{
return (xy - x1 * y1 / m0) / m0;
}
T NO_SANITIZE_UNDEFINED getSample() const
{
if (m0 == 0)
return std::numeric_limits<T>::quiet_NaN();
return (xy - x1 * y1 / m0) / (m0 - 1);
}
};
template <typename T>
struct CorrMoments
{
T m0{};
T x1{};
T y1{};
T xy{};
T x2{};
T y2{};
void add(T x, T y)
{
++m0;
x1 += x;
y1 += y;
xy += x * y;
x2 += x * x;
y2 += y * y;
}
void merge(const CorrMoments & rhs)
{
m0 += rhs.m0;
x1 += rhs.x1;
y1 += rhs.y1;
xy += rhs.xy;
x2 += rhs.x2;
y2 += rhs.y2;
}
void write(WriteBuffer & buf) const
{
writePODBinary(*this, buf);
}
void read(ReadBuffer & buf)
{
readPODBinary(*this, buf);
}
T NO_SANITIZE_UNDEFINED get() const
{
return (m0 * xy - x1 * y1) / sqrt((m0 * x2 - x1 * x1) * (m0 * y2 - y1 * y1));
}
};
enum class StatisticsFunctionKind
{
varPop, varSamp,

View File

@ -0,0 +1,77 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionTTest.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Moments.h>
#include "registerAggregateFunctions.h"
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace DB
{
namespace
{
/** Student T-test applies to two samples of independent random variables
* that have normal distributions with equal (but unknown) variances.
* It allows to answer the question whether means of the distributions differ.
*
* If variances are not considered equal, Welch T-test should be used instead.
*/
struct StudentTTestData : public TTestMoments<Float64>
{
static constexpr auto name = "studentTTest";
std::pair<Float64, Float64> getResult() const
{
Float64 mean_x = x1 / nx;
Float64 mean_y = y1 / ny;
/// To estimate the variance we first estimate two means.
/// That's why the number of degrees of freedom is the total number of values of both samples minus 2.
Float64 degrees_of_freedom = nx + ny - 2;
/// Calculate s^2
/// The original formulae looks like
/// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2}
/// But we made some mathematical transformations not to store original sequences.
/// Also we dropped sqrt, because later it will be squared later.
Float64 all_x = x2 + nx * mean_x * mean_x - 2 * mean_x * x1;
Float64 all_y = y2 + ny * mean_y * mean_y - 2 * mean_y * y1;
Float64 s2 = (all_x + all_y) / degrees_of_freedom;
Float64 std_err2 = s2 * (1. / nx + 1. / ny);
/// t-statistic
Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2);
return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)};
}
};
AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS);
return std::make_shared<AggregateFunctionTTest<StudentTTestData>>(argument_types);
}
}
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory)
{
factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest);
}
}

View File

@ -0,0 +1,154 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/StatCommon.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <Common/assert_cast.h>
#include <Core/Types.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <cmath>
/// This function is used in implementations of different T-Tests.
/// On Darwin it's unavailable in math.h but actually exists in the library (can be linked successfully).
#if defined(OS_DARWIN)
extern "C"
{
double lgamma_r(double x, int * signgamp);
}
#endif
namespace DB
{
class ReadBuffer;
class WriteBuffer;
/**
* If you have a cumulative distribution function F, then calculating the p-value for given statistic T is simply 1F(T)
* In our case p-value is two-sided, so we multiply it by 2.
* So cumulative distribution function F equals to
* \[ F(t) = \int_{-\infty}^{t} f(u)du = 1 - \frac{1}{2} I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \]
* where \[ x(t) = \frac{v}{t^2 + v} \]: https://en.wikipedia.org/wiki/Student%27s_t-distribution#Cumulative_distribution_function
*
* so our resulting \[ p-value = I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \].
*
* And I is regularized incomplete beta function: https://en.wikipedia.org/wiki/Beta_function#Incomplete_beta_function
*
* Keepenig in mind that \[ \mathrm {B} (x;a,b)=\int _{0}^{x}r^{a-1}\,(1-r)^{b-1}\,\mathrm {d} r.\! \]
* and
* \[ \mathrm {B} (x,y)={\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}}=\
* \exp(\ln {\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}})=\exp((\ln(\Gamma (x))+\ln(\Gamma (y))-\ln(\Gamma (x+y))) \]
*
* p-value can be calculated in terms of gamma functions and integrals more simply:
* \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}r^{{\frac {\nu }{2}}-1}\,(1-r)^{-0.5}\,\mathrm {d} r}\
* {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \]
*
* which simplifies to:
*
* \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}{\frac {r^{{\frac {\nu }{2}}-1}}{\sqrt {1-r}}}\,\mathrm {d} r}\
* {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \]
*
* Read here for details https://rosettacode.org/wiki/Welch%27s_t-test#
*
* Both WelchTTest and StudentTTest have t-statistric with Student distribution but with different degrees of freedom.
* So the procedure of computing p-value is the same.
*/
static inline Float64 getPValue(Float64 degrees_of_freedom, Float64 t_stat2)
{
Float64 numerator = integrateSimpson(0, degrees_of_freedom / (t_stat2 + degrees_of_freedom),
[degrees_of_freedom](double x) { return std::pow(x, degrees_of_freedom / 2 - 1) / std::sqrt(1 - x); });
int unused;
Float64 denominator = std::exp(
lgamma_r(degrees_of_freedom / 2, &unused)
+ lgamma_r(0.5, &unused)
- lgamma_r(degrees_of_freedom / 2 + 0.5, &unused));
return std::min(1.0, std::max(0.0, numerator / denominator));
}
/// Returns tuple of (t-statistic, p-value)
/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
template <typename Data>
class AggregateFunctionTTest :
public IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>
{
public:
AggregateFunctionTTest(const DataTypes & arguments)
: IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>({arguments}, {})
{
}
String getName() const override
{
return Data::name;
}
DataTypePtr getReturnType() const override
{
DataTypes types
{
std::make_shared<DataTypeNumber<Float64>>(),
std::make_shared<DataTypeNumber<Float64>>(),
};
Strings names
{
"t_statistic",
"p_value"
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
Float64 value = columns[0]->getFloat64(row_num);
UInt8 is_second = columns[1]->getUInt(row_num);
if (is_second)
this->data(place).addY(value);
else
this->data(place).addX(value);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).write(buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
this->data(place).read(buf);
}
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{
auto [t_statistic, p_value] = this->data(place).getResult();
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
column_stat.getData().push_back(t_statistic);
column_value.getData().push_back(p_value);
}
};
};

View File

@ -1,35 +0,0 @@
#include "AggregateFunctionTimeSeriesGroupSum.h"
#include "AggregateFunctionFactory.h"
#include "FactoryHelpers.h"
#include "Helpers.h"
#include "registerAggregateFunctions.h"
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
template <bool rate>
AggregateFunctionPtr createAggregateFunctionTimeSeriesGroupSum(const std::string & name, const DataTypes & arguments, const Array & params)
{
assertNoParameters(name, params);
if (arguments.size() < 3)
throw Exception("Not enough event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return std::make_shared<AggregateFunctionTimeSeriesGroupSum<rate>>(arguments);
}
}
void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory & factory)
{
factory.registerFunction("timeSeriesGroupSum", createAggregateFunctionTimeSeriesGroupSum<false>);
factory.registerFunction("timeSeriesGroupRateSum", createAggregateFunctionTimeSeriesGroupSum<true>);
}
}

View File

@ -1,291 +0,0 @@
#pragma once
#include <bitset>
#include <map>
#include <queue>
#include <unordered_set>
#include <utility>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <ext/range.h>
#include "IAggregateFunction.h"
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <bool rate>
struct AggregateFunctionTimeSeriesGroupSumData
{
using DataPoint = std::pair<Int64, Float64>;
struct Points
{
using Dps = std::queue<DataPoint>;
Dps dps;
void add(Int64 t, Float64 v)
{
dps.push(std::make_pair(t, v));
if (dps.size() > 2)
dps.pop();
}
Float64 getval(Int64 t)
{
Int64 t1, t2;
Float64 v1, v2;
if (rate)
{
if (dps.size() < 2)
return 0;
t1 = dps.back().first;
t2 = dps.front().first;
v1 = dps.back().second;
v2 = dps.front().second;
return (v1 - v2) / Float64(t1 - t2);
}
else
{
if (dps.size() == 1 && t == dps.front().first)
return dps.front().second;
t1 = dps.back().first;
t2 = dps.front().first;
v1 = dps.back().second;
v2 = dps.front().second;
return v2 + ((v1 - v2) * Float64(t - t2)) / Float64(t1 - t2);
}
}
};
typedef std::map<UInt64, Points> Series;
typedef PODArrayWithStackMemory<DataPoint, 128> AggSeries;
Series ss;
AggSeries result;
void add(UInt64 uid, Int64 t, Float64 v)
{ //suppose t is coming asc
typename Series::iterator it_ss;
if (ss.count(uid) == 0)
{ //time series not exist, insert new one
Points tmp;
tmp.add(t, v);
ss.emplace(uid, tmp);
it_ss = ss.find(uid);
}
else
{
it_ss = ss.find(uid);
it_ss->second.add(t, v);
}
if (result.size() > 0 && t < result.back().first)
throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc.", ErrorCodes::LOGICAL_ERROR};
if (result.size() > 0 && t == result.back().first)
{
//do not add new point
if (rate)
result.back().second += it_ss->second.getval(t);
else
result.back().second += v;
}
else
{
if (rate)
result.emplace_back(std::make_pair(t, it_ss->second.getval(t)));
else
result.emplace_back(std::make_pair(t, v));
}
ssize_t i = result.size() - 1;
//reverse find out the index of timestamp that more than previous timestamp of t
while (result[i].first > it_ss->second.dps.front().first && i >= 0)
i--;
i++;
while (i < ssize_t(result.size()) - 1)
{
result[i].second += it_ss->second.getval(result[i].first);
i++;
}
}
void merge(const AggregateFunctionTimeSeriesGroupSumData & other)
{
//if ts has overlap, then aggregate two series by interpolation;
AggSeries tmp;
tmp.reserve(other.result.size() + result.size());
size_t i = 0, j = 0;
Int64 t1, t2;
Float64 v1, v2;
while (i < result.size() && j < other.result.size())
{
if (result[i].first < other.result[j].first)
{
if (j == 0)
{
tmp.emplace_back(result[i]);
}
else
{
t1 = other.result[j].first;
t2 = other.result[j - 1].first;
v1 = other.result[j].second;
v2 = other.result[j - 1].second;
Float64 value = result[i].second + v2 + (v1 - v2) * (Float64(result[i].first - t2)) / Float64(t1 - t2);
tmp.emplace_back(std::make_pair(result[i].first, value));
}
i++;
}
else if (result[i].first > other.result[j].first)
{
if (i == 0)
{
tmp.emplace_back(other.result[j]);
}
else
{
t1 = result[i].first;
t2 = result[i - 1].first;
v1 = result[i].second;
v2 = result[i - 1].second;
Float64 value = other.result[j].second + v2 + (v1 - v2) * (Float64(other.result[j].first - t2)) / Float64(t1 - t2);
tmp.emplace_back(std::make_pair(other.result[j].first, value));
}
j++;
}
else
{
tmp.emplace_back(std::make_pair(result[i].first, result[i].second + other.result[j].second));
i++;
j++;
}
}
while (i < result.size())
{
tmp.emplace_back(result[i]);
i++;
}
while (j < other.result.size())
{
tmp.push_back(other.result[j]);
j++;
}
swap(result, tmp);
}
void serialize(WriteBuffer & buf) const
{
size_t size = result.size();
writeVarUInt(size, buf);
if (size > 0)
{
buf.write(reinterpret_cast<const char *>(result.data()), size * sizeof(result[0]));
}
}
void deserialize(ReadBuffer & buf)
{
size_t size = 0;
readVarUInt(size, buf);
result.resize(size);
if (size > 0)
{
buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
}
}
};
template <bool rate>
class AggregateFunctionTimeSeriesGroupSum final
: public IAggregateFunctionDataHelper<AggregateFunctionTimeSeriesGroupSumData<rate>, AggregateFunctionTimeSeriesGroupSum<rate>>
{
private:
public:
String getName() const override { return rate ? "timeSeriesGroupRateSum" : "timeSeriesGroupSum"; }
AggregateFunctionTimeSeriesGroupSum(const DataTypes & arguments)
: IAggregateFunctionDataHelper<AggregateFunctionTimeSeriesGroupSumData<rate>, AggregateFunctionTimeSeriesGroupSum<rate>>(arguments, {})
{
if (!WhichDataType(arguments[0].get()).isUInt64())
throw Exception{"Illegal type " + arguments[0].get()->getName() + " of argument 1 of aggregate function " + getName()
+ ", must be UInt64",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (!WhichDataType(arguments[1].get()).isInt64())
throw Exception{"Illegal type " + arguments[1].get()->getName() + " of argument 2 of aggregate function " + getName()
+ ", must be Int64",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (!WhichDataType(arguments[2].get()).isFloat64())
throw Exception{"Illegal type " + arguments[2].get()->getName() + " of argument 3 of aggregate function " + getName()
+ ", must be Float64",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
DataTypePtr getReturnType() const override
{
auto datatypes = std::vector<DataTypePtr>();
datatypes.push_back(std::make_shared<DataTypeInt64>());
datatypes.push_back(std::make_shared<DataTypeFloat64>());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(datatypes));
}
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
{
auto uid = assert_cast<const ColumnVector<UInt64> *>(columns[0])->getData()[row_num];
auto ts = assert_cast<const ColumnVector<Int64> *>(columns[1])->getData()[row_num];
auto val = assert_cast<const ColumnVector<Float64> *>(columns[2])->getData()[row_num];
if (uid && ts && val)
{
this->data(place).add(uid, ts, val);
}
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); }
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).serialize(buf); }
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); }
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{
const auto & value = this->data(place).result;
size_t size = value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
size_t old_size = offsets_to.back();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnInt64::Container & ts_to
= assert_cast<ColumnInt64 &>(assert_cast<ColumnTuple &>(arr_to.getData()).getColumn(0)).getData();
typename ColumnFloat64::Container & val_to
= assert_cast<ColumnFloat64 &>(assert_cast<ColumnTuple &>(arr_to.getData()).getColumn(1)).getData();
ts_to.reserve(old_size + size);
val_to.reserve(old_size + size);
size_t i = 0;
while (i < this->data(place).result.size())
{
ts_to.push_back(this->data(place).result[i].first);
val_to.push_back(this->data(place).result[i].second);
i++;
}
}
}
bool allocatesMemoryInArena() const override { return true; }
};
}

View File

@ -0,0 +1,74 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionTTest.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Moments.h>
#include "registerAggregateFunctions.h"
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace DB
{
namespace
{
struct WelchTTestData : public TTestMoments<Float64>
{
static constexpr auto name = "welchTTest";
std::pair<Float64, Float64> getResult() const
{
Float64 mean_x = x1 / nx;
Float64 mean_y = y1 / ny;
/// s_x^2, s_y^2
/// The original formulae looks like \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2}
/// But we made some mathematical transformations not to store original sequences.
/// Also we dropped sqrt, because later it will be squared later.
Float64 sx2 = (x2 + nx * mean_x * mean_x - 2 * mean_x * x1) / (nx - 1);
Float64 sy2 = (y2 + ny * mean_y * mean_y - 2 * mean_y * y1) / (ny - 1);
/// t-statistic
Float64 t_stat = (mean_x - mean_y) / sqrt(sx2 / nx + sy2 / ny);
/// degrees of freedom
Float64 numerator_sqrt = sx2 / nx + sy2 / ny;
Float64 numerator = numerator_sqrt * numerator_sqrt;
Float64 denominator_x = sx2 * sx2 / (nx * nx * (nx - 1));
Float64 denominator_y = sy2 * sy2 / (ny * ny * (ny - 1));
Float64 degrees_of_freedom = numerator / (denominator_x + denominator_y);
return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)};
}
};
AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::BAD_ARGUMENTS);
return std::make_shared<AggregateFunctionTTest<WelchTTestData>>(argument_types);
}
}
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory)
{
factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest);
}
}

View File

@ -15,6 +15,7 @@
M(Float32) \
M(Float64)
// No UInt128 here because of the name conflict
#define FOR_NUMERIC_TYPES(M) \
M(UInt8) \
M(UInt16) \

View File

@ -0,0 +1,361 @@
#pragma once
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int DECIMAL_OVERFLOW;
}
/**
Calculating univariate central moments
Levels:
level 2 (pop & samp): var, stddev
level 3: skewness
level 4: kurtosis
References:
https://en.wikipedia.org/wiki/Moment_(mathematics)
https://en.wikipedia.org/wiki/Skewness
https://en.wikipedia.org/wiki/Kurtosis
*/
template <typename T, size_t _level>
struct VarMoments
{
T m[_level + 1]{};
void add(T x)
{
++m[0];
m[1] += x;
m[2] += x * x;
if constexpr (_level >= 3) m[3] += x * x * x;
if constexpr (_level >= 4) m[4] += x * x * x * x;
}
void merge(const VarMoments & rhs)
{
m[0] += rhs.m[0];
m[1] += rhs.m[1];
m[2] += rhs.m[2];
if constexpr (_level >= 3) m[3] += rhs.m[3];
if constexpr (_level >= 4) m[4] += rhs.m[4];
}
void write(WriteBuffer & buf) const
{
writePODBinary(*this, buf);
}
void read(ReadBuffer & buf)
{
readPODBinary(*this, buf);
}
T getPopulation() const
{
if (m[0] == 0)
return std::numeric_limits<T>::quiet_NaN();
/// Due to numerical errors, the result can be slightly less than zero,
/// but it should be impossible. Trim to zero.
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / m[0]);
}
T getSample() const
{
if (m[0] <= 1)
return std::numeric_limits<T>::quiet_NaN();
return std::max(T{}, (m[2] - m[1] * m[1] / m[0]) / (m[0] - 1));
}
T getMoment3() const
{
if (m[0] == 0)
return std::numeric_limits<T>::quiet_NaN();
// to avoid accuracy problem
if (m[0] == 1)
return 0;
/// \[ \frac{1}{m_0} (m_3 - (3 * m_2 - \frac{2 * {m_1}^2}{m_0}) * \frac{m_1}{m_0});\]
return (m[3]
- (3 * m[2]
- 2 * m[1] * m[1] / m[0]
) * m[1] / m[0]
) / m[0];
}
T getMoment4() const
{
if (m[0] == 0)
return std::numeric_limits<T>::quiet_NaN();
// to avoid accuracy problem
if (m[0] == 1)
return 0;
/// \[ \frac{1}{m_0}(m_4 - (4 * m_3 - (6 * m_2 - \frac{3 * m_1^2}{m_0} ) \frac{m_1}{m_0})\frac{m_1}{m_0})\]
return (m[4]
- (4 * m[3]
- (6 * m[2]
- 3 * m[1] * m[1] / m[0]
) * m[1] / m[0]
) * m[1] / m[0]
) / m[0];
}
};
template <typename T, size_t _level>
class VarMomentsDecimal
{
public:
using NativeType = typename T::NativeType;
void add(NativeType x)
{
++m0;
getM(1) += x;
NativeType tmp;
bool overflow = common::mulOverflow(x, x, tmp) || common::addOverflow(getM(2), tmp, getM(2));
if constexpr (_level >= 3)
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(3), tmp, getM(3));
if constexpr (_level >= 4)
overflow = overflow || common::mulOverflow(tmp, x, tmp) || common::addOverflow(getM(4), tmp, getM(4));
if (overflow)
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
}
void merge(const VarMomentsDecimal & rhs)
{
m0 += rhs.m0;
getM(1) += rhs.getM(1);
bool overflow = common::addOverflow(getM(2), rhs.getM(2), getM(2));
if constexpr (_level >= 3)
overflow = overflow || common::addOverflow(getM(3), rhs.getM(3), getM(3));
if constexpr (_level >= 4)
overflow = overflow || common::addOverflow(getM(4), rhs.getM(4), getM(4));
if (overflow)
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
}
void write(WriteBuffer & buf) const { writePODBinary(*this, buf); }
void read(ReadBuffer & buf) { readPODBinary(*this, buf); }
Float64 getPopulation(UInt32 scale) const
{
if (m0 == 0)
return std::numeric_limits<Float64>::infinity();
NativeType tmp;
if (common::mulOverflow(getM(1), getM(1), tmp) ||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / m0), scale));
}
Float64 getSample(UInt32 scale) const
{
if (m0 == 0)
return std::numeric_limits<Float64>::quiet_NaN();
if (m0 == 1)
return std::numeric_limits<Float64>::infinity();
NativeType tmp;
if (common::mulOverflow(getM(1), getM(1), tmp) ||
common::subOverflow(getM(2), NativeType(tmp / m0), tmp))
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
return std::max(Float64{}, DecimalUtils::convertTo<Float64>(T(tmp / (m0 - 1)), scale));
}
Float64 getMoment3(UInt32 scale) const
{
if (m0 == 0)
return std::numeric_limits<Float64>::infinity();
NativeType tmp;
if (common::mulOverflow(2 * getM(1), getM(1), tmp) ||
common::subOverflow(3 * getM(2), NativeType(tmp / m0), tmp) ||
common::mulOverflow(tmp, getM(1), tmp) ||
common::subOverflow(getM(3), NativeType(tmp / m0), tmp))
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
}
Float64 getMoment4(UInt32 scale) const
{
if (m0 == 0)
return std::numeric_limits<Float64>::infinity();
NativeType tmp;
if (common::mulOverflow(3 * getM(1), getM(1), tmp) ||
common::subOverflow(6 * getM(2), NativeType(tmp / m0), tmp) ||
common::mulOverflow(tmp, getM(1), tmp) ||
common::subOverflow(4 * getM(3), NativeType(tmp / m0), tmp) ||
common::mulOverflow(tmp, getM(1), tmp) ||
common::subOverflow(getM(4), NativeType(tmp / m0), tmp))
throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW);
return DecimalUtils::convertTo<Float64>(T(tmp / m0), scale);
}
private:
UInt64 m0{};
NativeType m[_level]{};
NativeType & getM(size_t i) { return m[i - 1]; }
const NativeType & getM(size_t i) const { return m[i - 1]; }
};
/**
Calculating multivariate central moments
Levels:
level 2 (pop & samp): covar
References:
https://en.wikipedia.org/wiki/Moment_(mathematics)
*/
template <typename T>
struct CovarMoments
{
T m0{};
T x1{};
T y1{};
T xy{};
void add(T x, T y)
{
++m0;
x1 += x;
y1 += y;
xy += x * y;
}
void merge(const CovarMoments & rhs)
{
m0 += rhs.m0;
x1 += rhs.x1;
y1 += rhs.y1;
xy += rhs.xy;
}
void write(WriteBuffer & buf) const
{
writePODBinary(*this, buf);
}
void read(ReadBuffer & buf)
{
readPODBinary(*this, buf);
}
T NO_SANITIZE_UNDEFINED getPopulation() const
{
return (xy - x1 * y1 / m0) / m0;
}
T NO_SANITIZE_UNDEFINED getSample() const
{
if (m0 == 0)
return std::numeric_limits<T>::quiet_NaN();
return (xy - x1 * y1 / m0) / (m0 - 1);
}
};
template <typename T>
struct CorrMoments
{
T m0{};
T x1{};
T y1{};
T xy{};
T x2{};
T y2{};
void add(T x, T y)
{
++m0;
x1 += x;
y1 += y;
xy += x * y;
x2 += x * x;
y2 += y * y;
}
void merge(const CorrMoments & rhs)
{
m0 += rhs.m0;
x1 += rhs.x1;
y1 += rhs.y1;
xy += rhs.xy;
x2 += rhs.x2;
y2 += rhs.y2;
}
void write(WriteBuffer & buf) const
{
writePODBinary(*this, buf);
}
void read(ReadBuffer & buf)
{
readPODBinary(*this, buf);
}
T NO_SANITIZE_UNDEFINED get() const
{
return (m0 * xy - x1 * y1) / sqrt((m0 * x2 - x1 * x1) * (m0 * y2 - y1 * y1));
}
};
/// Data for calculation of Student and Welch T-Tests.
template <typename T>
struct TTestMoments
{
T nx{};
T ny{};
T x1{};
T y1{};
T x2{};
T y2{};
void addX(T value)
{
++nx;
x1 += value;
x2 += value * value;
}
void addY(T value)
{
++ny;
y1 += value;
y2 += value * value;
}
void merge(const TTestMoments & rhs)
{
nx += rhs.nx;
ny += rhs.ny;
x1 += rhs.x1;
y1 += rhs.y1;
x2 += rhs.x2;
y2 += rhs.y2;
}
void write(WriteBuffer & buf) const
{
writePODBinary(*this, buf);
}
void read(ReadBuffer & buf)
{
readPODBinary(*this, buf);
}
};
}

View File

@ -114,7 +114,7 @@ class QuantileTDigest
static constexpr size_t PART_SIZE_BITS = 8;
using Transform = RadixSortFloatTransform<KeyBits>;
using Allocator = RadixSortMallocAllocator;
using Allocator = RadixSortAllocator;
/// The function to get the key from an array element.
static Key & extractKey(Element & elem) { return elem.mean; }

View File

@ -0,0 +1,114 @@
#pragma once
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <Common/ArenaAllocator.h>
#include <numeric>
#include <algorithm>
#include <utility>
namespace DB
{
template <typename F>
static Float64 integrateSimpson(Float64 a, Float64 b, F && func)
{
const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b) - std::round(a)));
const long double h = (b - a) / iterations;
Float64 sum_odds = 0.0;
for (size_t i = 1; i < iterations; i += 2)
sum_odds += func(a + i * h);
Float64 sum_evens = 0.0;
for (size_t i = 2; i < iterations; i += 2)
sum_evens += func(a + i * h);
return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3;
}
/// Because ranks are adjusted, we have to store each of them in Float type.
using RanksArray = std::vector<Float64>;
template <typename Values>
std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & values)
{
const size_t size = values.size();
/// Save initial positions, than sort indices according to the values.
std::vector<size_t> indexes(size);
std::iota(indexes.begin(), indexes.end(), 0);
std::sort(indexes.begin(), indexes.end(),
[&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
size_t left = 0;
Float64 tie_numenator = 0;
RanksArray out(size);
while (left < size)
{
size_t right = left;
while (right < size && values[indexes[left]] == values[indexes[right]])
++right;
auto adjusted = (left + right + 1.) / 2.;
auto count_equal = right - left;
tie_numenator += std::pow(count_equal, 3) - count_equal;
for (size_t iter = left; iter < right; ++iter)
out[indexes[iter]] = adjusted;
left = right;
}
return {out, 1 - (tie_numenator / (std::pow(size, 3) - size))};
}
template <typename X, typename Y>
struct StatisticalSample
{
using AllocatorXSample = MixedAlignedArenaAllocator<alignof(X), 4096>;
using SampleX = PODArray<X, 32, AllocatorXSample>;
using AllocatorYSample = MixedAlignedArenaAllocator<alignof(Y), 4096>;
using SampleY = PODArray<Y, 32, AllocatorYSample>;
SampleX x{};
SampleY y{};
size_t size_x{0};
size_t size_y{0};
void addX(X value, Arena * arena)
{
++size_x;
x.push_back(value, arena);
}
void addY(Y value, Arena * arena)
{
++size_y;
y.push_back(value, arena);
}
void merge(const StatisticalSample & rhs, Arena * arena)
{
size_x += rhs.size_x;
size_y += rhs.size_y;
x.insert(rhs.x.begin(), rhs.x.end(), arena);
y.insert(rhs.y.begin(), rhs.y.end(), arena);
}
void write(WriteBuffer & buf) const
{
writeVarUInt(size_x, buf);
writeVarUInt(size_y, buf);
buf.write(reinterpret_cast<const char *>(x.data()), size_x * sizeof(x[0]));
buf.write(reinterpret_cast<const char *>(y.data()), size_y * sizeof(y[0]));
}
void read(ReadBuffer & buf, Arena * arena)
{
readVarUInt(size_x, buf);
readVarUInt(size_y, buf);
x.resize(size_x, arena);
y.resize(size_y, arena);
buf.read(reinterpret_cast<char *>(x.data()), size_x * sizeof(x[0]));
buf.read(reinterpret_cast<char *>(y.data()), size_y * sizeof(y[0]));
}
};
}

View File

@ -32,16 +32,16 @@ void registerAggregateFunctionsBitmap(AggregateFunctionFactory &);
void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
void registerAggregateFunctionHistogram(AggregateFunctionFactory &);
void registerAggregateFunctionRetention(AggregateFunctionFactory &);
void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory &);
void registerAggregateFunctionMLMethod(AggregateFunctionFactory &);
void registerAggregateFunctionEntropy(AggregateFunctionFactory &);
void registerAggregateFunctionSimpleLinearRegression(AggregateFunctionFactory &);
void registerAggregateFunctionMoving(AggregateFunctionFactory &);
void registerAggregateFunctionCategoricalIV(AggregateFunctionFactory &);
void registerAggregateFunctionAggThrow(AggregateFunctionFactory &);
void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &);
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -86,7 +86,6 @@ void registerAggregateFunctions()
registerAggregateFunctionsMaxIntersections(factory);
registerAggregateFunctionHistogram(factory);
registerAggregateFunctionRetention(factory);
registerAggregateFunctionTimeSeriesGroupSum(factory);
registerAggregateFunctionMLMethod(factory);
registerAggregateFunctionEntropy(factory);
registerAggregateFunctionSimpleLinearRegression(factory);
@ -94,6 +93,9 @@ void registerAggregateFunctions()
registerAggregateFunctionCategoricalIV(factory);
registerAggregateFunctionAggThrow(factory);
registerAggregateFunctionRankCorrelation(factory);
registerAggregateFunctionMannWhitney(factory);
registerAggregateFunctionWelchTTest(factory);
registerAggregateFunctionStudentTTest(factory);
}
{

View File

@ -0,0 +1,27 @@
#include <IO/WriteBufferFromString.h>
#include <IO/ReadBufferFromString.h>
#include <Common/PODArray.h>
#include <AggregateFunctions/StatCommon.h>
#include <iostream>
#include <gtest/gtest.h>
TEST(Ranks, Simple)
{
using namespace DB;
RanksArray sample = {310, 195, 480, 530, 155, 530, 245, 385, 450, 450, 465, 545, 170, 180, 125, 180, 230, 170, 75, 430, 480, 495, 295};
RanksArray ranks;
Float64 t = 0;
std::tie(ranks, t) = computeRanksAndTieCorrection(sample);
RanksArray expected{12.0, 8.0, 18.5, 21.5, 3.0, 21.5, 10.0, 13.0, 15.5, 15.5, 17.0, 23.0, 4.5, 6.5, 2.0, 6.5, 9.0, 4.5, 1.0, 14.0, 18.5, 20.0, 11.0};
ASSERT_EQ(ranks.size(), expected.size());
for (size_t i = 0; i < ranks.size(); ++i)
ASSERT_DOUBLE_EQ(ranks[i], expected[i]);
ASSERT_DOUBLE_EQ(t, 0.9975296442687747);
}

View File

@ -29,6 +29,7 @@ SRCS(
AggregateFunctionHistogram.cpp
AggregateFunctionIf.cpp
AggregateFunctionMLMethod.cpp
AggregateFunctionMannWhitney.cpp
AggregateFunctionMaxIntersections.cpp
AggregateFunctionMerge.cpp
AggregateFunctionMinMaxAny.cpp
@ -43,13 +44,14 @@ SRCS(
AggregateFunctionState.cpp
AggregateFunctionStatistics.cpp
AggregateFunctionStatisticsSimple.cpp
AggregateFunctionStudentTTest.cpp
AggregateFunctionSum.cpp
AggregateFunctionSumMap.cpp
AggregateFunctionTimeSeriesGroupSum.cpp
AggregateFunctionTopK.cpp
AggregateFunctionUniq.cpp
AggregateFunctionUniqCombined.cpp
AggregateFunctionUniqUpTo.cpp
AggregateFunctionWelchTTest.cpp
AggregateFunctionWindowFunnel.cpp
UniqCombinedBiasData.cpp
UniqVariadicHash.cpp

View File

@ -223,7 +223,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW
Dictionaries/FlatDictionary.cpp
Dictionaries/HashedDictionary.cpp
Dictionaries/CacheDictionary.cpp
Dictionaries/TrieDictionary.cpp
Dictionaries/IPAddressDictionary.cpp
Dictionaries/RangeHashedDictionary.cpp
Dictionaries/ComplexKeyHashedDictionary.cpp
Dictionaries/ComplexKeyCacheDictionary.cpp
@ -305,7 +305,6 @@ endif()
dbms_target_link_libraries (
PRIVATE
${BTRIE_LIBRARIES}
boost::filesystem
boost::program_options
clickhouse_common_config
@ -382,6 +381,10 @@ if (USE_PROTOBUF)
dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR})
endif ()
if (USE_GRPC)
dbms_target_link_libraries (PUBLIC clickhouse_grpc_protos)
endif()
if (USE_HDFS)
target_link_libraries (clickhouse_common_io PUBLIC ${HDFS3_LIBRARY})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})

View File

@ -207,6 +207,12 @@ void Connection::receiveHello()
/// Receive hello packet.
UInt64 packet_type = 0;
/// Prevent read after eof in readVarUInt in case of reset connection
/// (Poco should throw such exception while reading from socket but
/// sometimes it doesn't for unknown reason)
if (in->eof())
throw Poco::Net::NetException("Connection reset by peer");
readVarUInt(packet_type, *in);
if (packet_type == Protocol::Server::Hello)
{

View File

@ -82,7 +82,7 @@ public:
bool isNumeric() const override { return false; }
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return true; }
bool isFixedAndContiguous() const final { return true; }
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
size_t size() const override { return data.size(); }

View File

@ -523,7 +523,10 @@
M(554, LZMA_STREAM_DECODER_FAILED) \
M(555, ROCKSDB_ERROR) \
M(556, SYNC_MYSQL_USER_ACCESS_ERROR)\
M(557, DATABASE_REPLICATION_FAILED) \
M(557, UNKNOWN_UNION) \
M(558, EXPECTED_ALL_OR_DISTINCT) \
M(559, INVALID_GRPC_QUERY_INFO) \
M(560, DATABASE_REPLICATION_FAILED) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -1,30 +1,35 @@
#include "IPv6ToBinary.h"
#include <Poco/Net/IPAddress.h>
#include <Poco/ByteOrder.h>
#include <cstring>
namespace DB
{
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res)
{
std::array<char, 16> res;
if (Poco::Net::IPAddress::IPv6 == address.family())
{
memcpy(res.data(), address.addr(), 16);
memcpy(res, address.addr(), 16);
}
else if (Poco::Net::IPAddress::IPv4 == address.family())
{
/// Convert to IPv6-mapped address.
memset(res.data(), 0, 10);
memset(res, 0, 10);
res[10] = '\xFF';
res[11] = '\xFF';
memcpy(&res[12], address.addr(), 4);
}
else
memset(res.data(), 0, 16);
memset(res, 0, 16);
}
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
{
std::array<char, 16> res;
IPv6ToRawBinary(address, res.data());
return res;
}

View File

@ -1,11 +1,16 @@
#pragma once
#include <array>
#include <common/types.h>
namespace Poco { namespace Net { class IPAddress; }}
namespace DB
{
/// Convert IP address to raw binary with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
/// Saves result into the first 16 bytes of `res`.
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res);
/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address);

View File

@ -0,0 +1,21 @@
#pragma once
namespace DB
{
// The runtime info we need to create new OpenTelemetry spans.
struct OpenTelemetryTraceContext
{
__uint128_t trace_id = 0;
UInt64 span_id = 0;
// The incoming tracestate header and the trace flags, we just pass them
// downstream. See https://www.w3.org/TR/trace-context/
String tracestate;
__uint8_t trace_flags = 0;
// Parse/compose OpenTelemetry traceparent header.
bool parseTraceparentHeader(const std::string & traceparent, std::string & error);
std::string composeTraceparentHeader() const;
};
}

View File

@ -97,9 +97,6 @@
M(DistributedConnectionStaleReplica, "") \
M(DistributedConnectionFailAtAll, "Total count when distributed connection fails after all retries finished") \
\
M(CompileAttempt, "Number of times a compilation of generated C++ code was initiated.") \
M(CompileSuccess, "Number of times a compilation of generated C++ code was successful.") \
\
M(CompileFunction, "Number of times a compilation of generated LLVM code (to create fused function for complex expressions) was initiated.") \
M(CompiledFunctionExecute, "Number of times a compiled function was executed.") \
M(CompileExpressionsMicroseconds, "Total time spent for compilation of expressions to LLVM code.") \

View File

@ -176,7 +176,7 @@ template class QueryProfilerBase<QueryProfilerReal>;
template class QueryProfilerBase<QueryProfilerCpu>;
QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period)
: QueryProfilerBase(thread_id, CLOCK_REALTIME, period, SIGUSR1)
: QueryProfilerBase(thread_id, CLOCK_MONOTONIC, period, SIGUSR1)
{}
void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)

View File

@ -35,16 +35,16 @@
/** Used as a template parameter. See below.
*/
struct RadixSortMallocAllocator
struct RadixSortAllocator
{
void * allocate(size_t size)
{
return malloc(size);
return ::operator new(size);
}
void deallocate(void * ptr, size_t /*size*/)
void deallocate(void * ptr, size_t size)
{
return free(ptr);
::operator delete(ptr, size);
}
};
@ -100,7 +100,7 @@ struct RadixSortFloatTraits
/// An object with the functions allocate and deallocate.
/// Can be used, for example, to allocate memory for a temporary array on the stack.
/// To do this, the allocator itself is created on the stack.
using Allocator = RadixSortMallocAllocator;
using Allocator = RadixSortAllocator;
/// The function to get the key from an array element.
static Key & extractKey(Element & elem) { return elem; }
@ -139,7 +139,7 @@ struct RadixSortUIntTraits
static constexpr size_t PART_SIZE_BITS = 8;
using Transform = RadixSortIdentityTransform<KeyBits>;
using Allocator = RadixSortMallocAllocator;
using Allocator = RadixSortAllocator;
static Key & extractKey(Element & elem) { return elem; }
static Result & extractResult(Element & elem) { return elem; }
@ -173,7 +173,7 @@ struct RadixSortIntTraits
static constexpr size_t PART_SIZE_BITS = 8;
using Transform = RadixSortSignedTransform<KeyBits>;
using Allocator = RadixSortMallocAllocator;
using Allocator = RadixSortAllocator;
static Key & extractKey(Element & elem) { return elem; }
static Result & extractResult(Element & elem) { return elem; }

Some files were not shown because too many files have changed in this diff Show More