mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Merge branch 'master' into test-distributed_buffer_cannot_find_column
This commit is contained in:
commit
ced739f231
@ -2,8 +2,7 @@
|
||||
name: Documentation issue
|
||||
about: Report something incorrect or missing in documentation
|
||||
title: ''
|
||||
labels: documentation
|
||||
assignees: BayoNet
|
||||
labels: comp-documentation
|
||||
|
||||
---
|
||||
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -124,3 +124,5 @@ website/package-lock.json
|
||||
|
||||
# Toolchains
|
||||
/cmake/toolchain/*
|
||||
|
||||
*.iml
|
||||
|
10
.gitmodules
vendored
10
.gitmodules
vendored
@ -44,6 +44,7 @@
|
||||
[submodule "contrib/protobuf"]
|
||||
path = contrib/protobuf
|
||||
url = https://github.com/ClickHouse-Extras/protobuf.git
|
||||
branch = v3.13.0.1
|
||||
[submodule "contrib/boost"]
|
||||
path = contrib/boost
|
||||
url = https://github.com/ClickHouse-Extras/boost.git
|
||||
@ -107,6 +108,7 @@
|
||||
[submodule "contrib/grpc"]
|
||||
path = contrib/grpc
|
||||
url = https://github.com/ClickHouse-Extras/grpc.git
|
||||
branch = v1.33.2
|
||||
[submodule "contrib/aws"]
|
||||
path = contrib/aws
|
||||
url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git
|
||||
@ -198,5 +200,9 @@
|
||||
url = https://github.com/facebook/rocksdb
|
||||
branch = v6.14.5
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
[submodule "contrib/abseil-cpp"]
|
||||
path = contrib/abseil-cpp
|
||||
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
|
||||
branch = lts_2020_02_25
|
||||
|
@ -154,17 +154,19 @@ endif ()
|
||||
# Make sure the final executable has symbols exported
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
|
||||
|
||||
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
|
||||
if (OBJCOPY_PATH)
|
||||
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
|
||||
if (OS_LINUX)
|
||||
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
|
||||
if (OBJCOPY_PATH)
|
||||
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
|
||||
|
||||
if (ARCH_AMD64)
|
||||
set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
|
||||
elseif (ARCH_AARCH64)
|
||||
set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
|
||||
if (ARCH_AMD64)
|
||||
set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
|
||||
elseif (ARCH_AARCH64)
|
||||
set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
|
||||
endif ()
|
||||
else ()
|
||||
message(FATAL_ERROR "Cannot find objcopy.")
|
||||
endif ()
|
||||
else ()
|
||||
message(FATAL_ERROR "Cannot find objcopy.")
|
||||
endif ()
|
||||
|
||||
if (OS_DARWIN)
|
||||
@ -475,9 +477,6 @@ find_contrib_lib(cityhash)
|
||||
|
||||
find_contrib_lib(farmhash)
|
||||
|
||||
set (USE_INTERNAL_BTRIE_LIBRARY ON CACHE INTERNAL "")
|
||||
find_contrib_lib(btrie)
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
include (cmake/find/gtest.cmake)
|
||||
endif ()
|
||||
|
@ -1,6 +1,6 @@
|
||||
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.tech)
|
||||
|
||||
ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.
|
||||
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real time.
|
||||
|
||||
## Useful Links
|
||||
|
||||
@ -14,9 +14,3 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
|
||||
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [The Second ClickHouse Meetup East (online)](https://www.eventbrite.com/e/the-second-clickhouse-meetup-east-tickets-126787955187) on October 31, 2020.
|
||||
* [ClickHouse for Enterprise Meetup (online in Russian)](https://arenadata-events.timepad.ru/event/1465249/) on November 10, 2020.
|
||||
|
||||
|
@ -3,7 +3,6 @@
|
||||
/// Macros for convenient usage of Poco logger.
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ostream.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Message.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
19
base/glibc-compatibility/musl/accept4.c
Normal file
19
base/glibc-compatibility/musl/accept4.c
Normal file
@ -0,0 +1,19 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <sys/socket.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int flg)
|
||||
{
|
||||
if (!flg) return accept(fd, addr, len);
|
||||
int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0);
|
||||
if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret;
|
||||
ret = accept(fd, addr, len);
|
||||
if (ret<0) return ret;
|
||||
if (flg & SOCK_CLOEXEC)
|
||||
__syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
|
||||
if (flg & SOCK_NONBLOCK)
|
||||
__syscall(SYS_fcntl, ret, F_SETFL, O_NONBLOCK);
|
||||
return ret;
|
||||
}
|
37
base/glibc-compatibility/musl/epoll.c
Normal file
37
base/glibc-compatibility/musl/epoll.c
Normal file
@ -0,0 +1,37 @@
|
||||
#include <sys/epoll.h>
|
||||
#include <signal.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int epoll_create(int size)
|
||||
{
|
||||
return epoll_create1(0);
|
||||
}
|
||||
|
||||
int epoll_create1(int flags)
|
||||
{
|
||||
int r = __syscall(SYS_epoll_create1, flags);
|
||||
#ifdef SYS_epoll_create
|
||||
if (r==-ENOSYS && !flags) r = __syscall(SYS_epoll_create, 1);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev)
|
||||
{
|
||||
return syscall(SYS_epoll_ctl, fd, op, fd2, ev);
|
||||
}
|
||||
|
||||
int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs)
|
||||
{
|
||||
int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8);
|
||||
#ifdef SYS_epoll_wait
|
||||
if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int epoll_wait(int fd, struct epoll_event *ev, int cnt, int to)
|
||||
{
|
||||
return epoll_pwait(fd, ev, cnt, to, 0);
|
||||
}
|
23
base/glibc-compatibility/musl/eventfd.c
Normal file
23
base/glibc-compatibility/musl/eventfd.c
Normal file
@ -0,0 +1,23 @@
|
||||
#include <sys/eventfd.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int eventfd(unsigned int count, int flags)
|
||||
{
|
||||
int r = __syscall(SYS_eventfd2, count, flags);
|
||||
#ifdef SYS_eventfd
|
||||
if (r==-ENOSYS && !flags) r = __syscall(SYS_eventfd, count);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int eventfd_read(int fd, eventfd_t *value)
|
||||
{
|
||||
return (sizeof(*value) == read(fd, value, sizeof(*value))) ? 0 : -1;
|
||||
}
|
||||
|
||||
int eventfd_write(int fd, eventfd_t value)
|
||||
{
|
||||
return (sizeof(value) == write(fd, &value, sizeof(value))) ? 0 : -1;
|
||||
}
|
45
base/glibc-compatibility/musl/getauxval.c
Normal file
45
base/glibc-compatibility/musl/getauxval.c
Normal file
@ -0,0 +1,45 @@
|
||||
#include <sys/auxv.h>
|
||||
#include <unistd.h> // __environ
|
||||
#include <errno.h>
|
||||
|
||||
// We don't have libc struct available here. Compute aux vector manually.
|
||||
static unsigned long * __auxv = NULL;
|
||||
static unsigned long __auxv_secure = 0;
|
||||
|
||||
static size_t __find_auxv(unsigned long type)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; __auxv[i]; i += 2)
|
||||
{
|
||||
if (__auxv[i] == type)
|
||||
return i + 1;
|
||||
}
|
||||
return (size_t) -1;
|
||||
}
|
||||
|
||||
__attribute__((constructor)) static void __auxv_init()
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; __environ[i]; i++);
|
||||
__auxv = (unsigned long *) (__environ + i + 1);
|
||||
|
||||
size_t secure_idx = __find_auxv(AT_SECURE);
|
||||
if (secure_idx != ((size_t) -1))
|
||||
__auxv_secure = __auxv[secure_idx];
|
||||
}
|
||||
|
||||
unsigned long getauxval(unsigned long type)
|
||||
{
|
||||
if (type == AT_SECURE)
|
||||
return __auxv_secure;
|
||||
|
||||
if (__auxv)
|
||||
{
|
||||
size_t index = __find_auxv(type);
|
||||
if (index != ((size_t) -1))
|
||||
return __auxv[index];
|
||||
}
|
||||
|
||||
errno = ENOENT;
|
||||
return 0;
|
||||
}
|
8
base/glibc-compatibility/musl/secure_getenv.c
Normal file
8
base/glibc-compatibility/musl/secure_getenv.c
Normal file
@ -0,0 +1,8 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <stdlib.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
char * secure_getenv(const char * name)
|
||||
{
|
||||
return getauxval(AT_SECURE) ? NULL : getenv(name);
|
||||
}
|
@ -13,3 +13,11 @@ long __syscall(syscall_arg_t, ...);
|
||||
|
||||
__attribute__((visibility("hidden")))
|
||||
void *__vdsosym(const char *, const char *);
|
||||
|
||||
#define syscall(...) __syscall_ret(__syscall(__VA_ARGS__))
|
||||
|
||||
#define socketcall(...) __syscall_ret(__socketcall(__VA_ARGS__))
|
||||
|
||||
#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f)
|
||||
|
||||
#define socketcall_cp socketcall
|
||||
|
@ -40,24 +40,10 @@ static int checkver(Verdef *def, int vsym, const char *vername, char *strings)
|
||||
#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
|
||||
#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
|
||||
|
||||
extern char** environ;
|
||||
static Ehdr *eh = NULL;
|
||||
void *__vdsosym(const char *vername, const char *name);
|
||||
// We don't have libc struct available here. Compute aux vector manually.
|
||||
__attribute__((constructor)) static void auxv_init()
|
||||
{
|
||||
size_t i, *auxv;
|
||||
for (i=0; environ[i]; i++);
|
||||
auxv = (void *)(environ+i+1);
|
||||
for (i=0; auxv[i] != AT_SYSINFO_EHDR; i+=2)
|
||||
if (!auxv[i]) return;
|
||||
if (!auxv[i+1]) return;
|
||||
eh = (void *)auxv[i+1];
|
||||
}
|
||||
|
||||
void *__vdsosym(const char *vername, const char *name)
|
||||
{
|
||||
size_t i;
|
||||
Ehdr * eh = (void *) getauxval(AT_SYSINFO_EHDR);
|
||||
if (!eh) return 0;
|
||||
Phdr *ph = (void *)((char *)eh + eh->e_phoff);
|
||||
size_t *dynv=0, base=-1;
|
||||
|
@ -1,44 +0,0 @@
|
||||
# - Try to find btrie headers and libraries.
|
||||
#
|
||||
# Usage of this module as follows:
|
||||
#
|
||||
# find_package(btrie)
|
||||
#
|
||||
# Variables used by this module, they can change the default behaviour and need
|
||||
# to be set before calling find_package:
|
||||
#
|
||||
# BTRIE_ROOT_DIR Set this variable to the root installation of
|
||||
# btrie if the module has problems finding
|
||||
# the proper installation path.
|
||||
#
|
||||
# Variables defined by this module:
|
||||
#
|
||||
# BTRIE_FOUND System has btrie libs/headers
|
||||
# BTRIE_LIBRARIES The btrie library/libraries
|
||||
# BTRIE_INCLUDE_DIR The location of btrie headers
|
||||
|
||||
find_path(BTRIE_ROOT_DIR
|
||||
NAMES include/btrie.h
|
||||
)
|
||||
|
||||
find_library(BTRIE_LIBRARIES
|
||||
NAMES btrie
|
||||
PATHS ${BTRIE_ROOT_DIR}/lib ${BTRIE_LIBRARIES_PATHS}
|
||||
)
|
||||
|
||||
find_path(BTRIE_INCLUDE_DIR
|
||||
NAMES btrie.h
|
||||
PATHS ${BTRIE_ROOT_DIR}/include ${BTRIE_INCLUDE_PATHS}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(btrie DEFAULT_MSG
|
||||
BTRIE_LIBRARIES
|
||||
BTRIE_INCLUDE_DIR
|
||||
)
|
||||
|
||||
mark_as_advanced(
|
||||
BTRIE_ROOT_DIR
|
||||
BTRIE_LIBRARIES
|
||||
BTRIE_INCLUDE_DIR
|
||||
)
|
@ -6,11 +6,9 @@ Defines the following variables:
|
||||
The include directories of the gRPC framework, including the include directories of the C++ wrapper.
|
||||
``gRPC_LIBRARIES``
|
||||
The libraries of the gRPC framework.
|
||||
``gRPC_UNSECURE_LIBRARIES``
|
||||
The libraries of the gRPC framework without SSL.
|
||||
``_gRPC_CPP_PLUGIN``
|
||||
``gRPC_CPP_PLUGIN``
|
||||
The plugin for generating gRPC client and server C++ stubs from `.proto` files
|
||||
``_gRPC_PYTHON_PLUGIN``
|
||||
``gRPC_PYTHON_PLUGIN``
|
||||
The plugin for generating gRPC client and server Python stubs from `.proto` files
|
||||
|
||||
The following :prop_tgt:`IMPORTED` targets are also defined:
|
||||
@ -19,6 +17,13 @@ The following :prop_tgt:`IMPORTED` targets are also defined:
|
||||
``grpc_cpp_plugin``
|
||||
``grpc_python_plugin``
|
||||
|
||||
Set the following variables to adjust the behaviour of this script:
|
||||
``gRPC_USE_UNSECURE_LIBRARIES``
|
||||
if set gRPC_LIBRARIES will be filled with the unsecure version of the libraries (i.e. without SSL)
|
||||
instead of the secure ones.
|
||||
``gRPC_DEBUG`
|
||||
if set the debug message will be printed.
|
||||
|
||||
Add custom commands to process ``.proto`` files to C++::
|
||||
protobuf_generate_grpc_cpp(<SRCS> <HDRS>
|
||||
[DESCRIPTORS <DESC>] [EXPORT_MACRO <MACRO>] [<ARGN>...])
|
||||
@ -242,6 +247,7 @@ find_library(gRPC_LIBRARY NAMES grpc)
|
||||
find_library(gRPC_CPP_LIBRARY NAMES grpc++)
|
||||
find_library(gRPC_UNSECURE_LIBRARY NAMES grpc_unsecure)
|
||||
find_library(gRPC_CPP_UNSECURE_LIBRARY NAMES grpc++_unsecure)
|
||||
find_library(gRPC_CARES_LIBRARY NAMES cares)
|
||||
|
||||
set(gRPC_LIBRARIES)
|
||||
if(gRPC_USE_UNSECURE_LIBRARIES)
|
||||
@ -259,6 +265,7 @@ else()
|
||||
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CARES_LIBRARY})
|
||||
|
||||
# Restore the original find library ordering.
|
||||
if(gRPC_USE_STATIC_LIBS)
|
||||
@ -278,11 +285,11 @@ else()
|
||||
endif()
|
||||
|
||||
# Get full path to plugin.
|
||||
find_program(_gRPC_CPP_PLUGIN
|
||||
find_program(gRPC_CPP_PLUGIN
|
||||
NAMES grpc_cpp_plugin
|
||||
DOC "The plugin for generating gRPC client and server C++ stubs from `.proto` files")
|
||||
|
||||
find_program(_gRPC_PYTHON_PLUGIN
|
||||
find_program(gRPC_PYTHON_PLUGIN
|
||||
NAMES grpc_python_plugin
|
||||
DOC "The plugin for generating gRPC client and server Python stubs from `.proto` files")
|
||||
|
||||
@ -317,14 +324,14 @@ endif()
|
||||
|
||||
#include(FindPackageHandleStandardArgs.cmake)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(gRPC
|
||||
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY
|
||||
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR _gRPC_CPP_PLUGIN _gRPC_PYTHON_PLUGIN)
|
||||
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY gRPC_CARES_LIBRARY
|
||||
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR gRPC_CPP_PLUGIN gRPC_PYTHON_PLUGIN)
|
||||
|
||||
if(gRPC_FOUND)
|
||||
if(gRPC_DEBUG)
|
||||
message(STATUS "gRPC: INCLUDE_DIRS=${gRPC_INCLUDE_DIRS}")
|
||||
message(STATUS "gRPC: LIBRARIES=${gRPC_LIBRARIES}")
|
||||
message(STATUS "gRPC: CPP_PLUGIN=${_gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "gRPC: PYTHON_PLUGIN=${_gRPC_PYTHON_PLUGIN}")
|
||||
message(STATUS "gRPC: CPP_PLUGIN=${gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "gRPC: PYTHON_PLUGIN=${gRPC_PYTHON_PLUGIN}")
|
||||
endif()
|
||||
endif()
|
||||
|
@ -12,13 +12,7 @@ set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
|
||||
# Minimal supported SDK version
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.15")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.15")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mmacosx-version-min=10.15")
|
||||
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.15")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -mmacosx-version-min=10.15")
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
|
||||
|
||||
# Global libraries
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# Needed when using Apache Avro serialization format
|
||||
option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_AVRO)
|
||||
|
@ -37,8 +37,8 @@ if(NOT USE_INTERNAL_GRPC_LIBRARY)
|
||||
if(NOT gRPC_INCLUDE_DIRS OR NOT gRPC_LIBRARIES)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system gRPC library")
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
|
||||
elseif(NOT _gRPC_CPP_PLUGIN)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grcp_cpp_plugin")
|
||||
elseif(NOT gRPC_CPP_PLUGIN)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grpc_cpp_plugin")
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
|
||||
else()
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 1)
|
||||
@ -53,8 +53,8 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
|
||||
else()
|
||||
set(gRPC_LIBRARIES grpc grpc++)
|
||||
endif()
|
||||
set(_gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
|
||||
set(_gRPC_PROTOC_EXECUTABLE $<TARGET_FILE:protobuf::protoc>)
|
||||
set(gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
|
||||
set(gRPC_PYTHON_PLUGIN $<TARGET_FILE:grpc_python_plugin>)
|
||||
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
|
||||
|
||||
@ -62,4 +62,4 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
|
||||
set(USE_GRPC 1)
|
||||
endif()
|
||||
|
||||
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${_gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${gRPC_CPP_PLUGIN}")
|
||||
|
@ -1,3 +1,5 @@
|
||||
# Needed when securely connecting to an external server, e.g.
|
||||
# clickhouse-client --host ... --secure
|
||||
option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
|
||||
|
||||
if(NOT ENABLE_SSL)
|
||||
|
@ -23,7 +23,7 @@ option (WEVERYTHING "Enable -Weverything option with some exceptions." ON)
|
||||
|
||||
# Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
|
||||
# Only in release build because debug has too large stack frames.
|
||||
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE))
|
||||
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang"))
|
||||
add_warning(frame-larger-than=32768)
|
||||
endif ()
|
||||
|
||||
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -66,10 +66,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
|
||||
add_subdirectory (libfarmhash)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_BTRIE_LIBRARY)
|
||||
add_subdirectory (libbtrie)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_ZLIB_LIBRARY)
|
||||
set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
|
||||
set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")
|
||||
|
1
contrib/abseil-cpp
vendored
Submodule
1
contrib/abseil-cpp
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 4f3b686f86c3ebaba7e4e926e62a79cb1c659a54
|
2
contrib/grpc
vendored
2
contrib/grpc
vendored
@ -1 +1 @@
|
||||
Subproject commit a6570b863cf76c9699580ba51c7827d5bffaac43
|
||||
Subproject commit 7436366ceb341ba5c00ea29f1645e02a2b70bf93
|
@ -1,6 +1,7 @@
|
||||
set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
|
||||
set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
|
||||
|
||||
# Use re2 from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT RE2_INCLUDE_DIR)
|
||||
message(FATAL_ERROR " grpc: The location of the \"re2\" library is unknown")
|
||||
endif()
|
||||
@ -8,6 +9,7 @@ set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_RE2_INCLUDE_DIR "${RE2_INCLUDE_DIR}")
|
||||
set(_gRPC_RE2_LIBRARIES "${RE2_LIBRARY}")
|
||||
|
||||
# Use zlib from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT ZLIB_INCLUDE_DIRS)
|
||||
message(FATAL_ERROR " grpc: The location of the \"zlib\" library is unknown")
|
||||
endif()
|
||||
@ -15,6 +17,7 @@ set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_INCLUDE_DIRS}")
|
||||
set(_gRPC_ZLIB_LIBRARIES "${ZLIB_LIBRARIES}")
|
||||
|
||||
# Use protobuf from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY)
|
||||
message(FATAL_ERROR " grpc: The location of the \"protobuf\" library is unknown")
|
||||
elseif (NOT Protobuf_PROTOC_EXECUTABLE)
|
||||
@ -29,21 +32,33 @@ set(_gRPC_PROTOBUF_PROTOC "protoc")
|
||||
set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}")
|
||||
set(_gRPC_PROTOBUF_PROTOC_LIBRARIES "${Protobuf_PROTOC_LIBRARY}")
|
||||
|
||||
# Use OpenSSL from ClickHouse contrib, not from gRPC third_party.
|
||||
set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR})
|
||||
set(_gRPC_SSL_LIBRARIES ${OPENSSL_LIBRARIES})
|
||||
|
||||
# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party.
|
||||
set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt")
|
||||
message(FATAL_ERROR " grpc: submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
|
||||
|
||||
# Choose to build static or shared library for c-ares.
|
||||
if (MAKE_STATIC_LIBRARIES)
|
||||
set(CARES_STATIC ON CACHE BOOL "" FORCE)
|
||||
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
|
||||
else ()
|
||||
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
|
||||
set(CARES_SHARED ON CACHE BOOL "" FORCE)
|
||||
endif ()
|
||||
|
||||
# We don't want to build C# extensions.
|
||||
set(gRPC_BUILD_CSHARP_EXT OFF)
|
||||
|
||||
# We don't want to build abseil tests, so we temporarily switch BUILD_TESTING off.
|
||||
set(_gRPC_ORIG_BUILD_TESTING ${BUILD_TESTING})
|
||||
set(BUILD_TESTING OFF)
|
||||
|
||||
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
|
||||
|
||||
set(BUILD_TESTING ${_gRPC_ORIG_BUILD_TESTING})
|
||||
|
||||
# The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes,
|
||||
# so we need to redefine it back.
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
|
||||
|
@ -1,6 +0,0 @@
|
||||
add_library(btrie
|
||||
src/btrie.c
|
||||
include/btrie.h
|
||||
)
|
||||
|
||||
target_include_directories (btrie SYSTEM PUBLIC include)
|
@ -1,23 +0,0 @@
|
||||
Copyright (c) 2013, CobbLiu
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,160 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* In btrie, each leaf means one bit in ip tree.
|
||||
* Left means 0, and right means 1.
|
||||
*/
|
||||
|
||||
#define BTRIE_NULL (uintptr_t) -1
|
||||
|
||||
#if !defined(BTRIE_MAX_PAGES)
|
||||
/// 54 ip per page. 8 bytes memory per page when empty
|
||||
#define BTRIE_MAX_PAGES 1024 * 2048 /// 128m ips , ~16mb ram when empty
|
||||
// #define BTRIE_MAX_PAGES 1024 * 65535 /// 4g ips (whole ipv4), ~512mb ram when empty
|
||||
#endif
|
||||
|
||||
typedef struct btrie_node_s btrie_node_t;
|
||||
|
||||
struct btrie_node_s {
|
||||
btrie_node_t *right;
|
||||
btrie_node_t *left;
|
||||
btrie_node_t *parent;
|
||||
uintptr_t value;
|
||||
};
|
||||
|
||||
|
||||
typedef struct btrie_s {
|
||||
btrie_node_t *root;
|
||||
|
||||
btrie_node_t *free; /* free list of btrie */
|
||||
char *start;
|
||||
size_t size;
|
||||
|
||||
/*
|
||||
* memory pool.
|
||||
* memory management(esp free) will be so easy by using this facility.
|
||||
*/
|
||||
char *pools[BTRIE_MAX_PAGES];
|
||||
size_t len;
|
||||
} btrie_t;
|
||||
|
||||
|
||||
/**
|
||||
* Create an empty btrie
|
||||
*
|
||||
* @Return:
|
||||
* An ip radix_tree created.
|
||||
* NULL if creation failed.
|
||||
*/
|
||||
|
||||
btrie_t *btrie_create();
|
||||
|
||||
/**
|
||||
* Destroy the ip radix_tree
|
||||
*
|
||||
* @Return:
|
||||
* OK if deletion succeed.
|
||||
* ERROR if error occurs while deleting.
|
||||
*/
|
||||
int btrie_destroy(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Count the nodes in the radix tree.
|
||||
*/
|
||||
size_t btrie_count(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Return the allocated number of bytes.
|
||||
*/
|
||||
size_t btrie_allocated(btrie_t *tree);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv4 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value);
|
||||
|
||||
|
||||
/**
|
||||
* Delete an ipv4 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
|
||||
|
||||
|
||||
/**
|
||||
* Find an ipv4 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find(btrie_t *tree, uint32_t key);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv6 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value);
|
||||
|
||||
/**
|
||||
* Delete an ipv6 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
|
||||
|
||||
/**
|
||||
* Find an ipv6 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
@ -1,460 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <btrie.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
|
||||
static btrie_node_t *
|
||||
btrie_alloc(btrie_t *tree)
|
||||
{
|
||||
btrie_node_t *p;
|
||||
|
||||
if (tree->free) {
|
||||
p = tree->free;
|
||||
tree->free = tree->free->right;
|
||||
return p;
|
||||
}
|
||||
|
||||
if (tree->size < sizeof(btrie_node_t)) {
|
||||
tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
|
||||
if (tree->start == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->pools[tree->len++] = tree->start;
|
||||
tree->size = PAGE_SIZE;
|
||||
}
|
||||
|
||||
p = (btrie_node_t *) tree->start;
|
||||
|
||||
tree->start += sizeof(btrie_node_t);
|
||||
tree->size -= sizeof(btrie_node_t);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
btrie_t *
|
||||
btrie_create()
|
||||
{
|
||||
btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
|
||||
if (tree == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->free = NULL;
|
||||
tree->start = NULL;
|
||||
tree->size = 0;
|
||||
memset(tree->pools, 0, sizeof(btrie_t *) * BTRIE_MAX_PAGES);
|
||||
tree->len = 0;
|
||||
|
||||
tree->root = btrie_alloc(tree);
|
||||
if (tree->root == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->root->right = NULL;
|
||||
tree->root->left = NULL;
|
||||
tree->root->parent = NULL;
|
||||
tree->root->value = BTRIE_NULL;
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
static size_t
|
||||
subtree_weight(btrie_node_t *node)
|
||||
{
|
||||
size_t weight = 1;
|
||||
if (node->left) {
|
||||
weight += subtree_weight(node->left);
|
||||
}
|
||||
if (node->right) {
|
||||
weight += subtree_weight(node->right);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_count(btrie_t *tree)
|
||||
{
|
||||
if (tree->root == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return subtree_weight(tree->root);
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_allocated(btrie_t *tree)
|
||||
{
|
||||
return tree->len * PAGE_SIZE;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
bit = 0x80000000;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask) {
|
||||
if (key & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask)) {
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find(btrie_t *tree, uint32_t key)
|
||||
{
|
||||
uint32_t bit;
|
||||
uintptr_t value;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint8_t bit;
|
||||
unsigned int i;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask[i]) {
|
||||
if (key[i] & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask[i]) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key[i] & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
|
||||
{
|
||||
uint8_t bit;
|
||||
unsigned int i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask[i])) {
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find_a6(btrie_t *tree, const uint8_t *key)
|
||||
{
|
||||
uint8_t bit;
|
||||
uintptr_t value;
|
||||
unsigned int i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
i++;
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_destroy(btrie_t *tree)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
||||
/* free memory pools */
|
||||
for (i = 0; i < tree->len; i++) {
|
||||
free(tree->pools[i]);
|
||||
}
|
||||
|
||||
free(tree);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <btrie.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
btrie_t *it;
|
||||
int ret;
|
||||
|
||||
uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
|
||||
uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
|
||||
uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
|
||||
|
||||
it = btrie_create();
|
||||
if (it == NULL) {
|
||||
printf("create error!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
//add 101.45.69.50/16
|
||||
ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 1 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.69.50/16
|
||||
ret = btrie_insert(it, 170738994, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 2 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.79.50/16
|
||||
ret = btrie_insert(it, 170741554, 0xffff0000, 1);
|
||||
if (ret == 0) {
|
||||
printf("insert 3 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 102.45.79.50/24
|
||||
ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 4 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170741554);
|
||||
if (ret == 1) {
|
||||
printf("test case 1 passed\n");
|
||||
} else {
|
||||
printf("test case 1 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170786817);
|
||||
if (ret != 1) {
|
||||
printf("test case 2 passed\n");
|
||||
} else {
|
||||
printf("test case 2 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_delete(it, 1714245426, 0xffffff00);
|
||||
if (ret != 0) {
|
||||
printf("delete 1 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 1714245426);
|
||||
if (ret != 1) {
|
||||
printf("test case 3 passed\n");
|
||||
} else {
|
||||
printf("test case 3 error\n");
|
||||
}
|
||||
|
||||
//add dead:beef::/32
|
||||
ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find_a6(it, ip_v6);
|
||||
if (ret == 1) {
|
||||
printf("test case 4 passed\n");
|
||||
} else {
|
||||
printf("test case 4 error\n");
|
||||
}
|
||||
|
||||
// insert 4m ips
|
||||
for (size_t ip = 1; ip < 1024 * 1024 * 4; ++ip) {
|
||||
ret = btrie_insert(it, ip, 0xffffffff, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error (%d) (%zu) .\n", ret, ip);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
btrie_destroy(it);
|
||||
printf("test failed\n");
|
||||
return 1;
|
||||
}
|
@ -22,7 +22,16 @@ set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-st
|
||||
set(LIBUNWIND_ASM_SOURCES
|
||||
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S
|
||||
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S)
|
||||
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
|
||||
|
||||
# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1]
|
||||
# Workaround these two issues by compiling as C.
|
||||
#
|
||||
# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771
|
||||
if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
|
||||
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
|
||||
else()
|
||||
enable_language(ASM)
|
||||
endif()
|
||||
|
||||
set(LIBUNWIND_SOURCES
|
||||
${LIBUNWIND_CXX_SOURCES}
|
||||
|
2
contrib/protobuf
vendored
2
contrib/protobuf
vendored
@ -1 +1 @@
|
||||
Subproject commit 445d1ae73a450b1e94622e7040989aa2048402e3
|
||||
Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee
|
96
debian/clickhouse-server.init
vendored
96
debian/clickhouse-server.init
vendored
@ -67,26 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
|
||||
fi
|
||||
|
||||
|
||||
is_running()
|
||||
{
|
||||
pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
|
||||
}
|
||||
|
||||
|
||||
wait_for_done()
|
||||
{
|
||||
timeout=$1
|
||||
attempts=0
|
||||
while is_running; do
|
||||
attempts=$(($attempts + 1))
|
||||
if [ -n "$timeout" ] && [ $attempts -gt $timeout ]; then
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
die()
|
||||
{
|
||||
echo $1 >&2
|
||||
@ -105,49 +85,7 @@ check_config()
|
||||
|
||||
initdb()
|
||||
{
|
||||
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
|
||||
if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then
|
||||
die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}";
|
||||
fi
|
||||
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
else
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR
|
||||
fi
|
||||
|
||||
if ! getent passwd ${CLICKHOUSE_USER} >/dev/null; then
|
||||
echo "Can't chown to non-existing user ${CLICKHOUSE_USER}"
|
||||
return
|
||||
fi
|
||||
if ! getent group ${CLICKHOUSE_GROUP} >/dev/null; then
|
||||
echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}"
|
||||
return
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then
|
||||
echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]"
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then
|
||||
if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") = "/" ]; then
|
||||
echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown."
|
||||
else
|
||||
if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then
|
||||
echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
|
||||
echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then
|
||||
echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/*
|
||||
echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
@ -171,17 +109,7 @@ restart()
|
||||
|
||||
forcestop()
|
||||
{
|
||||
local EXIT_STATUS
|
||||
EXIT_STATUS=0
|
||||
|
||||
echo -n "Stop forcefully $PROGRAM service: "
|
||||
|
||||
kill -KILL $(cat "$CLICKHOUSE_PIDFILE")
|
||||
|
||||
wait_for_done
|
||||
|
||||
echo "DONE"
|
||||
return $EXIT_STATUS
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
@ -261,16 +189,16 @@ main()
|
||||
service_or_func restart
|
||||
;;
|
||||
condstart)
|
||||
is_running || service_or_func start
|
||||
service_or_func start
|
||||
;;
|
||||
condstop)
|
||||
is_running && service_or_func stop
|
||||
service_or_func stop
|
||||
;;
|
||||
condrestart)
|
||||
is_running && service_or_func restart
|
||||
service_or_func restart
|
||||
;;
|
||||
condreload)
|
||||
is_running && service_or_func restart
|
||||
service_or_func restart
|
||||
;;
|
||||
initdb)
|
||||
initdb
|
||||
@ -293,17 +221,7 @@ main()
|
||||
|
||||
status()
|
||||
{
|
||||
if is_running; then
|
||||
echo "$PROGRAM service is running"
|
||||
exit 0
|
||||
else
|
||||
if is_cron_disabled; then
|
||||
echo "$PROGRAM service is stopped";
|
||||
else
|
||||
echo "$PROGRAM: process unexpectedly terminated"
|
||||
fi
|
||||
exit 3
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
|
@ -56,6 +56,7 @@ RUN apt-get update \
|
||||
libprotoc-dev \
|
||||
libgrpc++-dev \
|
||||
protobuf-compiler-grpc \
|
||||
libc-ares-dev \
|
||||
rapidjson-dev \
|
||||
libsnappy-dev \
|
||||
libparquet-dev \
|
||||
|
@ -104,223 +104,248 @@ function start_server
|
||||
|
||||
function clone_root
|
||||
{
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
|
||||
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
|
||||
if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
|
||||
git checkout FETCH_HEAD
|
||||
echo 'Clonned merge head'
|
||||
else
|
||||
git fetch
|
||||
git checkout "$COMMIT_SHA"
|
||||
echo 'Checked out to commit'
|
||||
fi
|
||||
else
|
||||
if [ -v COMMIT_SHA ]; then
|
||||
git checkout "$COMMIT_SHA"
|
||||
fi
|
||||
fi
|
||||
)
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
|
||||
if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
|
||||
git checkout FETCH_HEAD
|
||||
echo 'Clonned merge head'
|
||||
else
|
||||
git fetch
|
||||
git checkout "$COMMIT_SHA"
|
||||
echo 'Checked out to commit'
|
||||
fi
|
||||
else
|
||||
if [ -v COMMIT_SHA ]; then
|
||||
git checkout "$COMMIT_SHA"
|
||||
fi
|
||||
fi
|
||||
)
|
||||
}
|
||||
|
||||
function clone_submodules
|
||||
{
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
|
||||
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring contrib/miniselect contrib/xz)
|
||||
SUBMODULES_TO_UPDATE=(
|
||||
contrib/boost
|
||||
contrib/zlib-ng
|
||||
contrib/libxml2
|
||||
contrib/poco
|
||||
contrib/libunwind
|
||||
contrib/ryu
|
||||
contrib/fmtlib
|
||||
contrib/base64
|
||||
contrib/cctz
|
||||
contrib/libcpuid
|
||||
contrib/double-conversion
|
||||
contrib/libcxx
|
||||
contrib/libcxxabi
|
||||
contrib/libc-headers
|
||||
contrib/lz4
|
||||
contrib/zstd
|
||||
contrib/fastops
|
||||
contrib/rapidjson
|
||||
contrib/re2
|
||||
contrib/sparsehash-c11
|
||||
contrib/croaring
|
||||
contrib/miniselect
|
||||
contrib/xz
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
|
||||
git submodule foreach git reset --hard
|
||||
git submodule foreach git checkout @ -f
|
||||
git submodule foreach git clean -xfd
|
||||
)
|
||||
git submodule sync
|
||||
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
|
||||
git submodule foreach git reset --hard
|
||||
git submodule foreach git checkout @ -f
|
||||
git submodule foreach git clean -xfd
|
||||
)
|
||||
}
|
||||
|
||||
function run_cmake
|
||||
{
|
||||
CMAKE_LIBS_CONFIG=(
|
||||
"-DENABLE_LIBRARIES=0"
|
||||
"-DENABLE_TESTS=0"
|
||||
"-DENABLE_UTILS=0"
|
||||
"-DENABLE_EMBEDDED_COMPILER=0"
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
)
|
||||
CMAKE_LIBS_CONFIG=(
|
||||
"-DENABLE_LIBRARIES=0"
|
||||
"-DENABLE_TESTS=0"
|
||||
"-DENABLE_UTILS=0"
|
||||
"-DENABLE_EMBEDDED_COMPILER=0"
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
)
|
||||
|
||||
# TODO remove this? we don't use ccache anyway. An option would be to download it
|
||||
# from S3 simultaneously with cloning.
|
||||
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
|
||||
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
|
||||
export CCACHE_NOHASHDIR=true
|
||||
export CCACHE_COMPILERCHECK=content
|
||||
export CCACHE_MAXSIZE=15G
|
||||
# TODO remove this? we don't use ccache anyway. An option would be to download it
|
||||
# from S3 simultaneously with cloning.
|
||||
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
|
||||
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
|
||||
export CCACHE_NOHASHDIR=true
|
||||
export CCACHE_COMPILERCHECK=content
|
||||
export CCACHE_MAXSIZE=15G
|
||||
|
||||
ccache --show-stats ||:
|
||||
ccache --zero-stats ||:
|
||||
ccache --show-stats ||:
|
||||
ccache --zero-stats ||:
|
||||
|
||||
mkdir "$FASTTEST_BUILD" ||:
|
||||
mkdir "$FASTTEST_BUILD" ||:
|
||||
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
|
||||
)
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
|
||||
)
|
||||
}
|
||||
|
||||
function build
|
||||
{
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
|
||||
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
|
||||
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
|
||||
fi
|
||||
ccache --show-stats ||:
|
||||
)
|
||||
(
|
||||
cd "$FASTTEST_BUILD"
|
||||
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
|
||||
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
|
||||
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
|
||||
fi
|
||||
ccache --show-stats ||:
|
||||
)
|
||||
}
|
||||
|
||||
function configure
|
||||
{
|
||||
clickhouse-client --version
|
||||
clickhouse-test --help
|
||||
clickhouse-client --version
|
||||
clickhouse-test --help
|
||||
|
||||
mkdir -p "$FASTTEST_DATA"{,/client-config}
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
|
||||
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
|
||||
# doesn't support SSL
|
||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||
mkdir -p "$FASTTEST_DATA"{,/client-config}
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
|
||||
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
|
||||
# doesn't support SSL
|
||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||
}
|
||||
|
||||
function run_tests
|
||||
{
|
||||
clickhouse-server --version
|
||||
clickhouse-test --help
|
||||
clickhouse-server --version
|
||||
clickhouse-test --help
|
||||
|
||||
# Kill the server in case we are running locally and not in docker
|
||||
stop_server ||:
|
||||
|
||||
start_server
|
||||
|
||||
TESTS_TO_SKIP=(
|
||||
00105_shard_collations
|
||||
00109_shard_totals_after_having
|
||||
00110_external_sort
|
||||
00302_http_compression
|
||||
00417_kill_query
|
||||
00436_convert_charset
|
||||
00490_special_line_separators_and_characters_outside_of_bmp
|
||||
00652_replicated_mutations_zookeeper
|
||||
00682_empty_parts_merge
|
||||
00701_rollup
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
00911_tautological_compare
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01031_mutations_interpreter_and_context
|
||||
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
|
||||
01083_expressions_in_engine_arguments
|
||||
01092_memory_profiler
|
||||
01098_msgpack_format
|
||||
01098_temporary_and_external_tables
|
||||
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
|
||||
01193_metadata_loading
|
||||
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01251_dict_is_in_infinite_loop
|
||||
01259_dictionary_custom_settings_ddl
|
||||
01268_dictionary_direct_layout
|
||||
01280_ssd_complex_key_dictionary
|
||||
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01318_encrypt # Depends on OpenSSL
|
||||
01318_decrypt # Depends on OpenSSL
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
01292_create_user
|
||||
01294_lazy_database_concurrent
|
||||
01305_replica_create_drop_zookeeper
|
||||
01354_order_by_tuple_collate_const
|
||||
01355_ilike
|
||||
01411_bayesian_ab_testing
|
||||
01532_collate_in_low_cardinality
|
||||
01533_collate_in_nullable
|
||||
01542_collate_in_array
|
||||
01543_collate_in_tuple
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
base64
|
||||
brotli
|
||||
capnproto
|
||||
client
|
||||
ddl_dictionaries
|
||||
h3
|
||||
hashing
|
||||
hdfs
|
||||
java_hash
|
||||
json
|
||||
limit_memory
|
||||
live_view
|
||||
memory_leak
|
||||
memory_limit
|
||||
mysql
|
||||
odbc
|
||||
parallel_alter
|
||||
parquet
|
||||
protobuf
|
||||
secure
|
||||
sha256
|
||||
xz
|
||||
|
||||
# Not sure why these two fail even in sequential mode. Disabled for now
|
||||
# to make some progress.
|
||||
00646_url_engine
|
||||
00974_query_profiler
|
||||
|
||||
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
|
||||
01504_rocksdb
|
||||
|
||||
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
||||
01460_DistributedFilesToInsert
|
||||
|
||||
01541_max_memory_usage_for_user
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
|
||||
01545_system_errors
|
||||
# Checks system.errors
|
||||
01563_distributed_query_finish
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
||||
# We will rerun sequentially any tests that have failed during parallel run.
|
||||
# They might have failed because there was some interference from other tests
|
||||
# running concurrently. If they fail even in seqential mode, we will report them.
|
||||
# FIXME All tests that require exclusive access to the server must be
|
||||
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
|
||||
# run them in a separate group after all other tests. This is faster and also
|
||||
# explicit instead of guessing.
|
||||
if [[ -n "${FAILED_TESTS[*]}" ]]
|
||||
then
|
||||
# Kill the server in case we are running locally and not in docker
|
||||
stop_server ||:
|
||||
|
||||
# Clean the data so that there is no interference from the previous test run.
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
|
||||
|
||||
start_server
|
||||
|
||||
echo "Going to run again: ${FAILED_TESTS[*]}"
|
||||
TESTS_TO_SKIP=(
|
||||
00105_shard_collations
|
||||
00109_shard_totals_after_having
|
||||
00110_external_sort
|
||||
00302_http_compression
|
||||
00417_kill_query
|
||||
00436_convert_charset
|
||||
00490_special_line_separators_and_characters_outside_of_bmp
|
||||
00652_replicated_mutations_zookeeper
|
||||
00682_empty_parts_merge
|
||||
00701_rollup
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
00911_tautological_compare
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01031_mutations_interpreter_and_context
|
||||
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
|
||||
01083_expressions_in_engine_arguments
|
||||
01092_memory_profiler
|
||||
01098_msgpack_format
|
||||
01098_temporary_and_external_tables
|
||||
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
|
||||
01193_metadata_loading
|
||||
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01251_dict_is_in_infinite_loop
|
||||
01259_dictionary_custom_settings_ddl
|
||||
01268_dictionary_direct_layout
|
||||
01280_ssd_complex_key_dictionary
|
||||
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01318_encrypt # Depends on OpenSSL
|
||||
01318_decrypt # Depends on OpenSSL
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
01292_create_user
|
||||
01294_lazy_database_concurrent
|
||||
01305_replica_create_drop_zookeeper
|
||||
01354_order_by_tuple_collate_const
|
||||
01355_ilike
|
||||
01411_bayesian_ab_testing
|
||||
01532_collate_in_low_cardinality
|
||||
01533_collate_in_nullable
|
||||
01542_collate_in_array
|
||||
01543_collate_in_tuple
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
base64
|
||||
brotli
|
||||
capnproto
|
||||
client
|
||||
ddl_dictionaries
|
||||
h3
|
||||
hashing
|
||||
hdfs
|
||||
java_hash
|
||||
json
|
||||
limit_memory
|
||||
live_view
|
||||
memory_leak
|
||||
memory_limit
|
||||
mysql
|
||||
odbc
|
||||
parallel_alter
|
||||
parquet
|
||||
protobuf
|
||||
secure
|
||||
sha256
|
||||
xz
|
||||
|
||||
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
else
|
||||
echo "No failed tests"
|
||||
fi
|
||||
# Not sure why these two fail even in sequential mode. Disabled for now
|
||||
# to make some progress.
|
||||
00646_url_engine
|
||||
00974_query_profiler
|
||||
|
||||
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
|
||||
01504_rocksdb
|
||||
|
||||
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
||||
01460_DistributedFilesToInsert
|
||||
|
||||
01541_max_memory_usage_for_user
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
01561_mann_whitney_scipy
|
||||
|
||||
01545_system_errors
|
||||
# Checks system.errors
|
||||
01563_distributed_query_finish
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
||||
# We will rerun sequentially any tests that have failed during parallel run.
|
||||
# They might have failed because there was some interference from other tests
|
||||
# running concurrently. If they fail even in seqential mode, we will report them.
|
||||
# FIXME All tests that require exclusive access to the server must be
|
||||
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
|
||||
# run them in a separate group after all other tests. This is faster and also
|
||||
# explicit instead of guessing.
|
||||
if [[ -n "${FAILED_TESTS[*]}" ]]
|
||||
then
|
||||
stop_server ||:
|
||||
|
||||
# Clean the data so that there is no interference from the previous test run.
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
|
||||
|
||||
start_server
|
||||
|
||||
echo "Going to run again: ${FAILED_TESTS[*]}"
|
||||
|
||||
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
else
|
||||
echo "No failed tests"
|
||||
fi
|
||||
}
|
||||
|
||||
case "$stage" in
|
||||
|
@ -50,7 +50,7 @@ services:
|
||||
- label:disable
|
||||
|
||||
kafka_kerberos:
|
||||
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
|
||||
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
|
||||
hostname: kafka_kerberos
|
||||
volumes:
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
|
||||
|
@ -7,4 +7,4 @@ services:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
||||
|
@ -0,0 +1,10 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
mysql1:
|
||||
image: mysql:5.7
|
||||
restart: 'no'
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
@ -2,7 +2,7 @@ version: '2.3'
|
||||
services:
|
||||
mysql8_0:
|
||||
image: mysql:8.0
|
||||
restart: always
|
||||
restart: 'no'
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
@ -1,6 +1,6 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
golang1:
|
||||
image: yandex/clickhouse-mysql-golang-client:${DOCKER_MYSQL_GOLANG_CLIENT_TAG}
|
||||
image: yandex/clickhouse-mysql-golang-client:${DOCKER_MYSQL_GOLANG_CLIENT_TAG:-latest}
|
||||
# to keep container running
|
||||
command: sleep infinity
|
||||
|
@ -1,6 +1,6 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
java1:
|
||||
image: yandex/clickhouse-mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG}
|
||||
image: yandex/clickhouse-mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG:-latest}
|
||||
# to keep container running
|
||||
command: sleep infinity
|
||||
|
@ -1,6 +1,6 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
mysqljs1:
|
||||
image: yandex/clickhouse-mysql-js-client:${DOCKER_MYSQL_JS_CLIENT_TAG}
|
||||
image: yandex/clickhouse-mysql-js-client:${DOCKER_MYSQL_JS_CLIENT_TAG:-latest}
|
||||
# to keep container running
|
||||
command: sleep infinity
|
||||
|
@ -1,6 +1,6 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
php1:
|
||||
image: yandex/clickhouse-mysql-php-client:${DOCKER_MYSQL_PHP_CLIENT_TAG}
|
||||
image: yandex/clickhouse-mysql-php-client:${DOCKER_MYSQL_PHP_CLIENT_TAG:-latest}
|
||||
# to keep container running
|
||||
command: sleep infinity
|
||||
|
@ -1,6 +1,6 @@
|
||||
version: '2.2'
|
||||
services:
|
||||
java:
|
||||
image: yandex/clickhouse-postgresql-java-client:${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG}
|
||||
image: yandex/clickhouse-postgresql-java-client:${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:-latest}
|
||||
# to keep container running
|
||||
command: sleep infinity
|
||||
|
@ -25,12 +25,13 @@ RUN apt-get update \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
rsync \
|
||||
tree \
|
||||
tzdata \
|
||||
vim \
|
||||
wget \
|
||||
&& pip3 --no-cache-dir install 'clickhouse-driver>=0.1.5' scipy \
|
||||
&& pip3 --no-cache-dir install 'git+https://github.com/mymarilyn/clickhouse-driver.git' scipy \
|
||||
&& apt-get purge --yes python3-dev g++ \
|
||||
&& apt-get autoremove --yes \
|
||||
&& apt-get clean \
|
||||
|
@ -48,6 +48,8 @@ parser.add_argument('--profile-seconds', type=int, default=0, help='For how many
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
|
||||
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
|
||||
args = parser.parse_args()
|
||||
|
||||
reportStageEnd('start')
|
||||
@ -141,35 +143,31 @@ reportStageEnd('before-connect')
|
||||
|
||||
# Open connections
|
||||
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
|
||||
all_connections = [clickhouse_driver.Client(**server) for server in servers]
|
||||
# Force settings_is_important to fail queries on unknown settings.
|
||||
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
|
||||
|
||||
for i, s in enumerate(servers):
|
||||
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
|
||||
|
||||
reportStageEnd('connect')
|
||||
|
||||
# Run drop queries, ignoring errors. Do this before all other activity, because
|
||||
# clickhouse_driver disconnects on error (this is not configurable), and the new
|
||||
# connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
pass
|
||||
if not args.use_existing_tables:
|
||||
# Run drop queries, ignoring errors. Do this before all other activity,
|
||||
# because clickhouse_driver disconnects on error (this is not configurable),
|
||||
# and the new connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
pass
|
||||
|
||||
reportStageEnd('drop-1')
|
||||
reportStageEnd('drop-1')
|
||||
|
||||
# Apply settings.
|
||||
# If there are errors, report them and continue -- maybe a new test uses a setting
|
||||
# that is not in master, but the queries can still run. If we have multiple
|
||||
# settings and one of them throws an exception, all previous settings for this
|
||||
# connection will be reset, because the driver reconnects on error (not
|
||||
# configurable). So the end result is uncertain, but hopefully we'll be able to
|
||||
# run at least some queries.
|
||||
settings = root.findall('settings/*')
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
@ -193,37 +191,40 @@ for t in tables:
|
||||
|
||||
reportStageEnd('preconditions')
|
||||
|
||||
# Run create and fill queries. We will run them simultaneously for both servers,
|
||||
# to save time.
|
||||
# The weird search is to keep the relative order of elements, which matters, and
|
||||
# etree doesn't support the appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
# keep the relative order of elements, and etree doesn't support the
|
||||
# appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*')
|
||||
if q.tag in ('create_query', 'fill_query')]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on errors,
|
||||
# and temporary tables are destroyed. We want to be able to continue after some
|
||||
# errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
sys.exit(1)
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on
|
||||
# errors, and temporary tables are destroyed. We want to be able to continue
|
||||
# after some errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
|
||||
threads = [Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
threads = [
|
||||
Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.start()
|
||||
|
||||
for t in threads:
|
||||
t.join()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
reportStageEnd('create')
|
||||
reportStageEnd('create')
|
||||
|
||||
# By default, test all queries.
|
||||
queries_to_run = range(0, len(test_queries))
|
||||
@ -402,10 +403,11 @@ print(f'profile-total\t{profile_total_seconds}')
|
||||
reportStageEnd('run')
|
||||
|
||||
# Run drop queries
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
if not args.keep_created_tables and not args.use_existing_tables:
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
|
||||
reportStageEnd('drop-2')
|
||||
reportStageEnd('drop-2')
|
||||
|
@ -10,6 +10,11 @@ RUN apt-get update --yes \
|
||||
gpg-agent \
|
||||
debsig-verify \
|
||||
strace \
|
||||
protobuf-compiler \
|
||||
protobuf-compiler-grpc \
|
||||
libprotoc-dev \
|
||||
libgrpc++-dev \
|
||||
libc-ares-dev \
|
||||
--yes --no-install-recommends
|
||||
|
||||
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
|
||||
@ -33,7 +38,8 @@ RUN set -x \
|
||||
&& dpkg -i "${PKG_VERSION}.deb"
|
||||
|
||||
CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
|
||||
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF && ninja re2_st \
|
||||
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \
|
||||
&& ninja re2_st clickhouse_grpc_protos \
|
||||
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
|
||||
plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \
|
||||
plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log
|
||||
|
@ -13,9 +13,9 @@ cmake .. \
|
||||
-DENABLE_CLICKHOUSE_SERVER=ON \
|
||||
-DENABLE_CLICKHOUSE_CLIENT=ON \
|
||||
-DUSE_STATIC_LIBRARIES=OFF \
|
||||
-DCLICKHOUSE_SPLIT_BINARY=ON \
|
||||
-DSPLIT_SHARED_LIBRARIES=ON \
|
||||
-DENABLE_LIBRARIES=OFF \
|
||||
-DUSE_UNWIND=ON \
|
||||
-DENABLE_UTILS=OFF \
|
||||
-DENABLE_TESTS=OFF
|
||||
```
|
||||
|
@ -17,7 +17,6 @@ toc_title: Third-Party Libraries Used
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -273,13 +273,15 @@ SELECT
|
||||
sum(Duration) AS Duration
|
||||
FROM UAct
|
||||
GROUP BY UserID
|
||||
```text
|
||||
```
|
||||
|
||||
``` text
|
||||
┌──────────────UserID─┬─PageViews─┬─Duration─┐
|
||||
│ 4324182021466249494 │ 6 │ 185 │
|
||||
└─────────────────────┴───────────┴──────────┘
|
||||
```
|
||||
|
||||
``` sqk
|
||||
``` sql
|
||||
select count() FROM UAct
|
||||
```
|
||||
|
||||
|
@ -53,6 +53,42 @@ Example of setting the addresses of the ZooKeeper cluster:
|
||||
</zookeeper>
|
||||
```
|
||||
|
||||
ClickHouse also supports to store replicas meta information in the auxiliary ZooKeeper cluster by providing ZooKeeper cluster name and path as engine arguments.
|
||||
In other word, it supports to store the metadata of differnt tables in different ZooKeeper clusters.
|
||||
|
||||
Example of setting the addresses of the auxiliary ZooKeeper cluster:
|
||||
|
||||
``` xml
|
||||
<auxiliary_zookeepers>
|
||||
<zookeeper2>
|
||||
<node index="1">
|
||||
<host>example_2_1</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node index="2">
|
||||
<host>example_2_2</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node index="3">
|
||||
<host>example_2_3</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
</zookeeper2>
|
||||
<zookeeper3>
|
||||
<node index="1">
|
||||
<host>example_3_1</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
</zookeeper3>
|
||||
</auxiliary_zookeepers>
|
||||
```
|
||||
|
||||
To store table datameta in a auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with
|
||||
ReplicatedMergeTree engine as follow:
|
||||
|
||||
```
|
||||
CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('zookeeper_name_configured_in_auxiliary_zookeepers:path', 'replica_name') ...
|
||||
```
|
||||
You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table).
|
||||
|
||||
If ZooKeeper isn’t set in the config file, you can’t create replicated tables, and any existing replicated tables will be read-only.
|
||||
@ -152,7 +188,7 @@ You can specify default arguments for `Replicated` table engine in the server co
|
||||
|
||||
```xml
|
||||
<default_replica_path>/clickhouse/tables/{shard}/{database}/{table}</default_replica_path>
|
||||
<default_replica_name>{replica}</default_replica_path>
|
||||
<default_replica_name>{replica}</default_replica_name>
|
||||
```
|
||||
|
||||
In this case, you can omit arguments when creating tables:
|
||||
|
@ -11,7 +11,7 @@ By going through this tutorial, you’ll learn how to set up a simple ClickHouse
|
||||
|
||||
## Single Node Setup {#single-node-setup}
|
||||
|
||||
To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
|
||||
To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do not support them.
|
||||
|
||||
For example, you have chosen `deb` packages and executed:
|
||||
|
||||
|
@ -5,7 +5,7 @@ toc_title: Overview
|
||||
|
||||
# What Is ClickHouse? {#what-is-clickhouse}
|
||||
|
||||
ClickHouse is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).
|
||||
ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).
|
||||
|
||||
In a “normal” row-oriented DBMS, data is stored in this order:
|
||||
|
||||
|
@ -25,6 +25,7 @@ The supported formats are:
|
||||
| [Vertical](#vertical) | ✗ | ✔ |
|
||||
| [VerticalRaw](#verticalraw) | ✗ | ✔ |
|
||||
| [JSON](#json) | ✗ | ✔ |
|
||||
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
|
||||
| [JSONString](#jsonstring) | ✗ | ✔ |
|
||||
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
|
||||
| [JSONCompactString](#jsoncompactstring) | ✗ | ✔ |
|
||||
@ -507,6 +508,34 @@ Example:
|
||||
}
|
||||
```
|
||||
|
||||
## JSONAsString {#jsonasstring}
|
||||
|
||||
In this format, a single JSON object is interpreted as a single value. If input has several JSON objects (comma separated) they will be interpreted as a sepatate rows.
|
||||
|
||||
This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. Once you collect whole JSON object to string you can use [JSON functions](../sql-reference/functions/json-functions.md) to process it.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
DROP TABLE IF EXISTS json_as_string;
|
||||
CREATE TABLE json_as_string (json String) ENGINE = Memory;
|
||||
INSERT INTO json_as_string FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1}
|
||||
SELECT * FROM json_as_string;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─json──────────────────────────────┐
|
||||
│ {"foo":{"bar":{"x":"y"},"baz":1}} │
|
||||
│ {} │
|
||||
│ {"any json stucture":1} │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## JSONCompact {#jsoncompact}
|
||||
## JSONCompactString {#jsoncompactstring}
|
||||
|
||||
|
@ -111,7 +111,7 @@ toc_title: Adopters
|
||||
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
|
||||
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
|
||||
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
|
||||
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
|
||||
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
|
||||
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
|
||||
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
|
||||
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
|
||||
|
@ -44,11 +44,10 @@ stages, such as query planning or distributed queries.
|
||||
|
||||
To be useful, the tracing information has to be exported to a monitoring system
|
||||
that supports OpenTelemetry, such as Jaeger or Prometheus. ClickHouse avoids
|
||||
a dependency on a particular monitoring system, instead only
|
||||
providing the tracing data conforming to the standard. A natural way to do so
|
||||
in an SQL RDBMS is a system table. OpenTelemetry trace span information
|
||||
a dependency on a particular monitoring system, instead only providing the
|
||||
tracing data through a system table. OpenTelemetry trace span information
|
||||
[required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span)
|
||||
is stored in the system table called `system.opentelemetry_span_log`.
|
||||
is stored in the `system.opentelemetry_span_log` table.
|
||||
|
||||
The table must be enabled in the server configuration, see the `opentelemetry_span_log`
|
||||
element in the default config file `config.xml`. It is enabled by default.
|
||||
@ -67,3 +66,31 @@ The table has the following columns:
|
||||
|
||||
The tags or attributes are saved as two parallel arrays, containing the keys
|
||||
and values. Use `ARRAY JOIN` to work with them.
|
||||
|
||||
## Integration with monitoring systems
|
||||
|
||||
At the moment, there is no ready tool that can export the tracing data from
|
||||
ClickHouse to a monitoring system.
|
||||
|
||||
For testing, it is possible to setup the export using a materialized view with the URL engine over the `system.opentelemetry_span_log` table, which would push the arriving log data to an HTTP endpoint of a trace collector. For example, to push the minimal span data to a Zipkin instance running at `http://localhost:9411`, in Zipkin v2 JSON format:
|
||||
|
||||
```sql
|
||||
CREATE MATERIALIZED VIEW default.zipkin_spans
|
||||
ENGINE = URL('http://127.0.0.1:9411/api/v2/spans', 'JSONEachRow')
|
||||
SETTINGS output_format_json_named_tuples_as_objects = 1,
|
||||
output_format_json_array_of_rows = 1 AS
|
||||
SELECT
|
||||
lower(hex(reinterpretAsFixedString(trace_id))) AS traceId,
|
||||
lower(hex(parent_span_id)) AS parentId,
|
||||
lower(hex(span_id)) AS id,
|
||||
operation_name AS name,
|
||||
start_time_us AS timestamp,
|
||||
finish_time_us - start_time_us AS duration,
|
||||
cast(tuple('clickhouse'), 'Tuple(serviceName text)') AS localEndpoint,
|
||||
cast(tuple(
|
||||
attribute.values[indexOf(attribute.names, 'db.statement')]),
|
||||
'Tuple("db.statement" text)') AS tags
|
||||
FROM system.opentelemetry_span_log
|
||||
```
|
||||
|
||||
In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive.
|
||||
|
@ -70,11 +70,21 @@ Parameters:
|
||||
<!-- Configuration of clusters as in an ordinary server config -->
|
||||
<remote_servers>
|
||||
<source_cluster>
|
||||
<!--
|
||||
source cluster & destination clusters accepts exactly the same
|
||||
parameters as parameters for usual Distributed table
|
||||
see https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
|
||||
-->
|
||||
<shard>
|
||||
<internal_replication>false</internal_replication>
|
||||
<replica>
|
||||
<host>127.0.0.1</host>
|
||||
<port>9000</port>
|
||||
<!--
|
||||
<user>default</user>
|
||||
<password>default</password>
|
||||
<secure>1</secure>
|
||||
-->
|
||||
</replica>
|
||||
</shard>
|
||||
...
|
||||
|
@ -1,42 +1,42 @@
|
||||
# ClickHouse obfuscator
|
||||
|
||||
Simple tool for table data obfuscation.
|
||||
|
||||
It reads input table and produces output table, that retain some properties of input, but contains different data.
|
||||
It allows to publish almost real production data for usage in benchmarks.
|
||||
|
||||
It is designed to retain the following properties of data:
|
||||
- cardinalities of values (number of distinct values) for every column and for every tuple of columns;
|
||||
- conditional cardinalities: number of distinct values of one column under condition on value of another column;
|
||||
- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats;
|
||||
- probability distributions of length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, NULLs;
|
||||
- data compression ratio when compressed with LZ77 and entropy family of codecs;
|
||||
- continuity (magnitude of difference) of time values across table; continuity of floating point values.
|
||||
- date component of DateTime values;
|
||||
- UTF-8 validity of string values;
|
||||
- string values continue to look somewhat natural.
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregation and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed.
|
||||
Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret.
|
||||
|
||||
It use some cryptographic primitives to transform data, but from the cryptographic point of view,
|
||||
It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it.
|
||||
|
||||
It may retain some data you don't want to publish.
|
||||
|
||||
It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data.
|
||||
For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value.
|
||||
So, the user will be able to count exact ratio of mobile traffic.
|
||||
|
||||
Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address.
|
||||
If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others,
|
||||
It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them.
|
||||
And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters.
|
||||
|
||||
This tool works fine only with reasonable amount of data (at least 1000s of rows).
|
||||
# ClickHouse obfuscator
|
||||
|
||||
A simple tool for table data obfuscation.
|
||||
|
||||
It reads an input table and produces an output table, that retains some properties of input, but contains different data.
|
||||
It allows publishing almost real production data for usage in benchmarks.
|
||||
|
||||
It is designed to retain the following properties of data:
|
||||
- cardinalities of values (number of distinct values) for every column and every tuple of columns;
|
||||
- conditional cardinalities: number of distinct values of one column under the condition on the value of another column;
|
||||
- probability distributions of the absolute value of integers; the sign of signed integers; exponent and sign for floats;
|
||||
- probability distributions of the length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, `NULL`s;
|
||||
|
||||
- data compression ratio when compressed with LZ77 and entropy family of codecs;
|
||||
- continuity (magnitude of difference) of time values across the table; continuity of floating-point values;
|
||||
- date component of `DateTime` values;
|
||||
|
||||
- UTF-8 validity of string values;
|
||||
- string values look natural.
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregatio, and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.
|
||||
Some transformations are one to one and could be reversed, so you need to have a large seed and keep it in secret.
|
||||
|
||||
It uses some cryptographic primitives to transform data but from the cryptographic point of view, it doesn't do it properly, that is why you should not consider the result as secure unless you have another reason. The result may retain some data you don't want to publish.
|
||||
|
||||
|
||||
It always leaves 0, 1, -1 numbers, dates, lengths of arrays, and null flags exactly as in source data.
|
||||
For example, you have a column `IsMobile` in your table with values 0 and 1. In transformed data, it will have the same value.
|
||||
|
||||
So, the user will be able to count the exact ratio of mobile traffic.
|
||||
|
||||
Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address.
|
||||
If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them.
|
||||
You should look at the working algorithm of this tool works, and fine-tune its command line parameters.
|
||||
|
||||
This tool works fine only with an average amount of data (at least 1000s of rows).
|
||||
|
@ -4,4 +4,59 @@ toc_priority: 5
|
||||
|
||||
# avg {#agg_function-avg}
|
||||
|
||||
Calculates the average. Only works for numbers. The result is always Float64.
|
||||
Calculates the arithmetic mean.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
avgWeighted(x)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `x` — Values.
|
||||
|
||||
`x` must be
|
||||
[Integer](../../../sql-reference/data-types/int-uint.md),
|
||||
[floating-point](../../../sql-reference/data-types/float.md), or
|
||||
[Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `NaN` if the supplied parameter is empty.
|
||||
- Mean otherwise.
|
||||
|
||||
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avg(x) FROM values('x Int8', 0, 1, 2, 3, 4, 5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avg(x)─┐
|
||||
│ 2.5 │
|
||||
└────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE table test (t UInt8) ENGINE = Memory;
|
||||
SELECT avg(t) FROM test
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avg(x)─┐
|
||||
│ nan │
|
||||
└────────┘
|
||||
```
|
||||
|
@ -14,17 +14,21 @@ avgWeighted(x, weight)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
|
||||
- `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
|
||||
- `x` — Values.
|
||||
- `weight` — Weights of the values.
|
||||
|
||||
Type of `x` and `weight` must be the same.
|
||||
`x` and `weight` must both be
|
||||
[Integer](../../../sql-reference/data-types/int-uint.md),
|
||||
[floating-point](../../../sql-reference/data-types/float.md), or
|
||||
[Decimal](../../../sql-reference/data-types/decimal.md),
|
||||
but may have different types.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Weighted mean.
|
||||
- `NaN`. If all the weights are equal to 0.
|
||||
- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty.
|
||||
- Weighted mean otherwise.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -42,3 +46,54 @@ Result:
|
||||
│ 8 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avgWeighted(x, w)
|
||||
FROM values('x Int8, w Float64', (4, 1), (1, 0), (10, 2))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ 8 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avgWeighted(x, w)
|
||||
FROM values('x Int8, w Int8', (0, 0), (1, 0), (10, 0))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ nan │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE table test (t UInt8) ENGINE = Memory;
|
||||
SELECT avgWeighted(t) FROM test
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ nan │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
@ -0,0 +1,53 @@
|
||||
## rankCorr {#agg_function-rankcorr}
|
||||
|
||||
Computes a rank correlation coefficient.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
rankCorr(x, y)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
- Returns a rank correlation coefficient of the ranks of x and y. The value of the correlation coefficient ranges from -1 to +1. If less than two arguments are passed, the function will return an exception. The value close to +1 denotes a high linear relationship, and with an increase of one random variable, the second random variable also increases. The value close to -1 denotes a high linear relationship, and with an increase of one random variable, the second random variable decreases. The value close or equal to 0 denotes no relationship between the two random variables.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT rankCorr(number, number) FROM numbers(100);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─rankCorr(number, number)─┐
|
||||
│ 1 │
|
||||
└──────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT roundBankers(rankCorr(exp(number), sin(number)), 3) FROM numbers(100);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─roundBankers(rankCorr(exp(number), sin(number)), 3)─┐
|
||||
│ -0.037 │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
**See Also**
|
||||
|
||||
- [Spearman's rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
|
@ -67,9 +67,8 @@ Leap seconds are not accounted for.
|
||||
|
||||
## toUnixTimestamp {#to-unix-timestamp}
|
||||
|
||||
For DateTime argument: converts value to its internal numeric representation (Unix Timestamp).
|
||||
For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
|
||||
For Date argument: the behaviour is unspecified.
|
||||
For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
|
||||
For String argument: converts the input string to the datetime according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -9,16 +9,4 @@ toc_title: IN Operator
|
||||
|
||||
See the section [IN operators](../../sql-reference/operators/in.md#select-in-operators).
|
||||
|
||||
## tuple(x, y, …), operator (x, y, …) {#tuplex-y-operator-x-y}
|
||||
|
||||
A function that allows grouping multiple columns.
|
||||
For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function.
|
||||
Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table.
|
||||
|
||||
## tupleElement(tuple, n), operator x.N {#tupleelementtuple-n-operator-x-n}
|
||||
|
||||
A function that allows getting a column from a tuple.
|
||||
‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple.
|
||||
There is no cost to execute the function.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/in_functions/) <!--hide-->
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
toc_priority: 66
|
||||
toc_priority: 67
|
||||
toc_title: Other
|
||||
---
|
||||
|
||||
|
@ -536,4 +536,58 @@ For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCas
|
||||
!!! note "Note"
|
||||
For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters.
|
||||
|
||||
## countSubstrings(haystack, needle) {#countSubstrings}
|
||||
|
||||
Count the number of substring occurrences
|
||||
|
||||
For a case-insensitive search, use the function `countSubstringsCaseInsensitive` (or `countSubstringsCaseInsensitiveUTF8`).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
countSubstrings(haystack, needle[, start_pos])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Number of occurrences.
|
||||
|
||||
Type: `Integer`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstrings('foobar.com', '.')
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─countSubstrings('foobar.com', '.')─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstrings('aaaa', 'aa')
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─countSubstrings('aaaa', 'aa')─┐
|
||||
│ 2 │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/string_search_functions/) <!--hide-->
|
||||
|
114
docs/en/sql-reference/functions/tuple-functions.md
Normal file
114
docs/en/sql-reference/functions/tuple-functions.md
Normal file
@ -0,0 +1,114 @@
|
||||
---
|
||||
toc_priority: 66
|
||||
toc_title: Tuples
|
||||
---
|
||||
|
||||
# Functions for Working with Tuples {#tuple-functions}
|
||||
|
||||
## tuple {#tuple}
|
||||
|
||||
A function that allows grouping multiple columns.
|
||||
For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function.
|
||||
Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table.
|
||||
|
||||
The function implements the operator `(x, y, …)`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tuple(x, y, …)
|
||||
```
|
||||
|
||||
## tupleElement {#tupleelement}
|
||||
|
||||
A function that allows getting a column from a tuple.
|
||||
‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple.
|
||||
There is no cost to execute the function.
|
||||
|
||||
The function implements the operator `x.N`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tupleElement(tuple, n)
|
||||
```
|
||||
|
||||
## untuple {#untuple}
|
||||
|
||||
Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
untuple(x)
|
||||
```
|
||||
|
||||
You can use the `EXCEPT` expression to skip columns as a result of the query.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- None.
|
||||
|
||||
**Examples**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─key─┬─v1─┬─v2─┬─v3─┬─v4─┬─v5─┬─v6────────┐
|
||||
│ 1 │ 10 │ 20 │ 40 │ 30 │ 15 │ (33,'ab') │
|
||||
│ 2 │ 25 │ 65 │ 70 │ 40 │ 6 │ (44,'cd') │
|
||||
│ 3 │ 57 │ 30 │ 20 │ 10 │ 5 │ (55,'ef') │
|
||||
│ 4 │ 55 │ 12 │ 7 │ 80 │ 90 │ (66,'gh') │
|
||||
│ 5 │ 30 │ 50 │ 70 │ 25 │ 55 │ (77,'kl') │
|
||||
└─────┴────┴────┴────┴────┴────┴───────────┘
|
||||
```
|
||||
|
||||
Example of using a `Tuple`-type column as the `untuple` function parameter:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT untuple(v6) FROM kv;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─_ut_1─┬─_ut_2─┐
|
||||
│ 33 │ ab │
|
||||
│ 44 │ cd │
|
||||
│ 55 │ ef │
|
||||
│ 66 │ gh │
|
||||
│ 77 │ kl │
|
||||
└───────┴───────┘
|
||||
```
|
||||
|
||||
Example of using an `EXCEPT` expression:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT untuple((* EXCEPT (v2, v3),)) FROM kv;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─key─┬─v1─┬─v4─┬─v5─┬─v6────────┐
|
||||
│ 1 │ 10 │ 30 │ 15 │ (33,'ab') │
|
||||
│ 2 │ 25 │ 40 │ 6 │ (44,'cd') │
|
||||
│ 3 │ 57 │ 10 │ 5 │ (55,'ef') │
|
||||
│ 4 │ 55 │ 80 │ 90 │ (66,'gh') │
|
||||
│ 5 │ 30 │ 25 │ 55 │ (77,'kl') │
|
||||
└─────┴────┴────┴────┴───────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Tuple](../../sql-reference/data-types/tuple.md)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-functions/) <!--hide-->
|
@ -21,10 +21,10 @@ The following operations with [partitions](../../../engines/table-engines/merget
|
||||
|
||||
<!-- -->
|
||||
|
||||
## DETACH PARTITION {#alter_detach-partition}
|
||||
## DETACH PARTITION\|PART {#alter_detach-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DETACH PARTITION partition_expr
|
||||
ALTER TABLE table_name DETACH PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#alter_attach-partition) query.
|
||||
@ -32,7 +32,8 @@ Moves all data for the specified partition to the `detached` directory. The serv
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE visits DETACH PARTITION 201901
|
||||
ALTER TABLE mt DETACH PARTITION '2020-11-21';
|
||||
ALTER TABLE mt DETACH PART 'all_2_2_0';
|
||||
```
|
||||
|
||||
Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
|
||||
@ -41,10 +42,10 @@ After the query is executed, you can do whatever you want with the data in the `
|
||||
|
||||
This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../../../operations/system-tables/replicas.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica.
|
||||
|
||||
## DROP PARTITION {#alter_drop-partition}
|
||||
## DROP PARTITION\|PART {#alter_drop-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DROP PARTITION partition_expr
|
||||
ALTER TABLE table_name DROP PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Deletes the specified partition from the table. This query tags the partition as inactive and deletes data completely, approximately in 10 minutes.
|
||||
@ -53,6 +54,13 @@ Read about setting the partition expression in a section [How to specify the par
|
||||
|
||||
The query is replicated – it deletes data on all replicas.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE mt DROP PARTITION '2020-11-21';
|
||||
ALTER TABLE mt DROP PART 'all_4_4_0';
|
||||
```
|
||||
|
||||
## DROP DETACHED PARTITION\|PART {#alter_drop-detached}
|
||||
|
||||
``` sql
|
||||
|
@ -25,7 +25,7 @@ SELECT [DISTINCT] expr_list
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
[LIMIT [n, ]m] [WITH TIES]
|
||||
[UNION ALL ...]
|
||||
[UNION ...]
|
||||
[INTO OUTFILE filename]
|
||||
[FORMAT format]
|
||||
```
|
||||
@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li
|
||||
- [SELECT clause](#select-clause)
|
||||
- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md)
|
||||
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
|
||||
- [UNION ALL clause](../../../sql-reference/statements/select/union-all.md)
|
||||
- [UNION clause](../../../sql-reference/statements/select/union-all.md)
|
||||
- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md)
|
||||
- [FORMAT clause](../../../sql-reference/statements/select/format.md)
|
||||
|
||||
@ -159,4 +159,111 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN
|
||||
|
||||
For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation.
|
||||
|
||||
{## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##}
|
||||
## SELECT modifiers {#select-modifiers}
|
||||
|
||||
You can use the following modifiers in `SELECT` queries.
|
||||
|
||||
### APPLY {#apply-modifier}
|
||||
|
||||
Allows you to invoke some function for each row returned by an outer table expression of a query.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> APPLY( <func> ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
|
||||
INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
|
||||
SELECT * APPLY(sum) FROM columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
|
||||
│ 220 │ 18 │ 347 │
|
||||
└────────┴────────┴────────┘
|
||||
```
|
||||
|
||||
### EXCEPT {#except-modifier}
|
||||
|
||||
Specifies the names of one or more columns to exclude from the result. All matching column names are omitted from the output.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT * EXCEPT (i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌──j─┬───k─┐
|
||||
│ 10 │ 324 │
|
||||
│ 8 │ 23 │
|
||||
└────┴─────┘
|
||||
```
|
||||
|
||||
### REPLACE {#replace-modifier}
|
||||
|
||||
Specifies one or more [expression aliases](../../../sql-reference/syntax.md#syntax-expression_aliases). Each alias must match a column name from the `SELECT *` statement. In the output column list, the column that matches the alias is replaced by the expression in that `REPLACE`.
|
||||
|
||||
This modifier does not change the names or order of columns. However, it can change the value and the value type.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌───i─┬──j─┬───k─┐
|
||||
│ 101 │ 10 │ 324 │
|
||||
│ 121 │ 8 │ 23 │
|
||||
└─────┴────┴─────┘
|
||||
```
|
||||
|
||||
### Modifier Combinations {#modifier-combinations}
|
||||
|
||||
You can use each modifier separately or combine them.
|
||||
|
||||
**Examples:**
|
||||
|
||||
Using the same modifier multiple times.
|
||||
|
||||
``` sql
|
||||
SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
|
||||
│ 2 │ 3 │
|
||||
└──────────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
Using multiple modifiers in a single query.
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(plus(i, 1))─┬─sum(k)─┐
|
||||
│ 222 │ 347 │
|
||||
└─────────────────┴────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
|
||||
<!--hide-->
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
toc_title: UNION ALL
|
||||
toc_title: UNION
|
||||
---
|
||||
|
||||
# UNION ALL Clause {#union-all-clause}
|
||||
@ -25,10 +25,13 @@ Type casting is performed for unions. For example, if two queries being combined
|
||||
|
||||
Queries that are parts of `UNION ALL` can’t be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
|
||||
|
||||
## Limitations {#limitations}
|
||||
# UNION DISTINCT Clause {#union-distinct-clause}
|
||||
The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`.
|
||||
|
||||
# UNION Clause {#union-clause}
|
||||
By default, `UNION` has the same behavior as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception.
|
||||
|
||||
Only `UNION ALL` is supported. The regular `UNION` (`UNION DISTINCT`) is not supported. If you need `UNION DISTINCT`, you can write `SELECT DISTINCT` from a subquery containing `UNION ALL`.
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
||||
Queries that are parts of `UNION ALL` can be run simultaneously, and their results can be mixed together.
|
||||
Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together.
|
||||
|
@ -19,7 +19,6 @@ toc_title: Bibliotecas de terceros utilizadas
|
||||
| Más información | [Licencia de 3 cláusulas BSD](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| H3 | [Licencia Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [Licencia de 3 cláusulas BSD](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [Licencia BSD de 2 cláusulas](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Licencia Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [Información adicional](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -21,7 +21,6 @@ toc_title: "\u06A9\u062A\u0627\u0628\u062E\u0627\u0646\u0647 \u0647\u0627\u06CC
|
||||
| googletest | [لیسانس 3 بند](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| اچ 3 | [نمایی مجوز 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [لیسانس 3 بند](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| لیبتری | [لیسانس 2 بند](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| شکنجه نوجوان | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| لیبیدوید | [مجوز زلب](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| نوشیدن شراب | [الجی پی ال2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -19,7 +19,6 @@ toc_title: "Biblioth\xE8ques Tierces Utilis\xE9es"
|
||||
| googletest | [Licence BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Licence Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [Licence BSD 3-Clause](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [Licence BSD 2-Clause](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Licence Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -20,7 +20,6 @@ toc_title: "\u30B5\u30FC\u30C9\u30D1\u30FC\u30C6\u30A3\u88FD\u30E9\u30A4\u30D6\u
|
||||
| googletest | [BSD3条項ライセンス](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apacheライセンス2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD3条項ライセンス](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD2条項ライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlibライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -18,7 +18,6 @@ toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
43
docs/ru/operations/utilities/clickhouse-obfuscator.md
Normal file
43
docs/ru/operations/utilities/clickhouse-obfuscator.md
Normal file
@ -0,0 +1,43 @@
|
||||
# Обфускатор ClickHouse
|
||||
|
||||
Простой инструмент для обфускации табличных данных.
|
||||
|
||||
Он считывает данные входной таблицы и создает выходную таблицу, которая сохраняет некоторые свойства входных данных, но при этом содержит другие данные.
|
||||
|
||||
Это позволяет публиковать практически реальные данные и использовать их в тестах на производительность.
|
||||
|
||||
Обфускатор предназначен для сохранения следующих свойств данных:
|
||||
- кардинальность (количество уникальных данных) для каждого столбца и каждого кортежа столбцов;
|
||||
- условная кардинальность: количество уникальных данных одного столбца в соответствии со значением другого столбца;
|
||||
- вероятностные распределения абсолютного значения целых чисел; знак числа типа Int; показатель степени и знак для чисел с плавающей запятой;
|
||||
- вероятностное распределение длины строк;
|
||||
- вероятность нулевых значений чисел; пустые строки и массивы, `NULL`;
|
||||
- степень сжатия данных алгоритмом LZ77 и семейством энтропийных кодеков;
|
||||
|
||||
- непрерывность (величина разницы) значений времени в таблице; непрерывность значений с плавающей запятой;
|
||||
- дату из значений `DateTime`;
|
||||
|
||||
- кодировка UTF-8 значений строки;
|
||||
- строковые значения выглядят естественным образом.
|
||||
|
||||
|
||||
Большинство перечисленных выше свойств пригодны для тестирования производительности. Чтение данных, фильтрация, агрегирование и сортировка будут работать почти с той же скоростью, что и исходные данные, благодаря сохраненной кардинальности, величине, степени сжатия и т. д.
|
||||
|
||||
Он работает детерминированно. Вы задаёте значение инициализатора, а преобразование полностью определяется входными данными и инициализатором.
|
||||
|
||||
Некоторые преобразования выполняются один к одному, и их можно отменить. Поэтому нужно использовать большое значение инициализатора и хранить его в секрете.
|
||||
|
||||
|
||||
Обфускатор использует некоторые криптографические примитивы для преобразования данных, но, с криптографической точки зрения, результат будет небезопасным. В нем могут сохраниться данные, которые не следует публиковать.
|
||||
|
||||
|
||||
Он всегда оставляет без изменений числа 0, 1, -1, даты, длины массивов и нулевые флаги.
|
||||
Например, если у вас есть столбец `IsMobile` в таблице со значениями 0 и 1, то в преобразованных данных он будет иметь то же значение.
|
||||
|
||||
Таким образом, пользователь сможет посчитать точное соотношение мобильного трафика.
|
||||
|
||||
Давайте рассмотрим случай, когда у вас есть какие-то личные данные в таблице (например, электронная почта пользователя), и вы не хотите их публиковать.
|
||||
Если ваша таблица достаточно большая и содержит несколько разных электронных почтовых адресов, и ни один из них не встречается часто, то обфускатор полностью анонимизирует все данные. Но, если у вас есть небольшое количество разных значений в столбце, он может скопировать некоторые из них.
|
||||
В этом случае вам следует посмотреть на алгоритм работы инструмента и настроить параметры командной строки.
|
||||
|
||||
Обфускатор полезен в работе со средним объемом данных (не менее 1000 строк).
|
@ -0,0 +1,53 @@
|
||||
## rankCorr {#agg_function-rankcorr}
|
||||
|
||||
Вычисляет коэффициент ранговой корреляции.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
rankCorr(x, y)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `x` — Произвольное значение. [Float32](../../../sql-reference/data-types/float.md#float32-float64) или [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `y` — Произвольное значение. [Float32](../../../sql-reference/data-types/float.md#float32-float64) или [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Возвращает коэффициент ранговой корреляции рангов x и y. Значение коэффициента корреляции изменяется в пределах от -1 до +1. Если передается менее двух аргументов, функция возвращает исключение. Значение, близкое к +1, указывает на высокую линейную зависимость, и с увеличением одной случайной величины увеличивается и вторая случайная величина. Значение, близкое к -1, указывает на высокую линейную зависимость, и с увеличением одной случайной величины вторая случайная величина уменьшается. Значение, близкое или равное 0, означает отсутствие связи между двумя случайными величинами.
|
||||
|
||||
Тип: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT rankCorr(number, number) FROM numbers(100);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─rankCorr(number, number)─┐
|
||||
│ 1 │
|
||||
└──────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT roundBankers(rankCorr(exp(number), sin(number)), 3) FROM numbers(100);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─roundBankers(rankCorr(exp(number), sin(number)), 3)─┐
|
||||
│ -0.037 │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
**Смотрите также**
|
||||
|
||||
- [Коэффициент ранговой корреляции Спирмена](https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D1%80%D1%80%D0%B5%D0%BB%D1%8F%D1%86%D0%B8%D1%8F#%D0%9A%D0%BE%D1%8D%D1%84%D1%84%D0%B8%D1%86%D0%B8%D0%B5%D0%BD%D1%82_%D1%80%D0%B0%D0%BD%D0%B3%D0%BE%D0%B2%D0%BE%D0%B9_%D0%BA%D0%BE%D1%80%D1%80%D0%B5%D0%BB%D1%8F%D1%86%D0%B8%D0%B8_%D0%A1%D0%BF%D0%B8%D1%80%D0%BC%D0%B5%D0%BD%D0%B0)
|
@ -57,32 +57,31 @@ SELECT
|
||||
|
||||
## toUnixTimestamp {#to-unix-timestamp}
|
||||
|
||||
For DateTime argument: converts value to its internal numeric representation (Unix Timestamp).
|
||||
For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
|
||||
For Date argument: the behaviour is unspecified.
|
||||
Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
|
||||
Для аргумента String, строка конвертируется в дату и время в соответствии с часовым поясом (необязательный второй аргумент, часовой пояс сервера используется по умолчанию).
|
||||
|
||||
**Syntax**
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
toUnixTimestamp(datetime)
|
||||
toUnixTimestamp(str, [timezone])
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Returns the unix timestamp.
|
||||
- Возвращает Unix Timestamp.
|
||||
|
||||
Type: `UInt32`.
|
||||
Тип: `UInt32`.
|
||||
|
||||
**Example**
|
||||
**Пример**
|
||||
|
||||
Query:
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp
|
||||
```
|
||||
|
||||
Result:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─unix_timestamp─┐
|
||||
@ -490,4 +489,4 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g')
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) <!--hide-->
|
||||
|
@ -9,16 +9,4 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
|
||||
|
||||
Смотрите раздел [Операторы IN](../operators/in.md#select-in-operators).
|
||||
|
||||
## tuple(x, y, …), оператор (x, y, …) {#tuplex-y-operator-x-y}
|
||||
|
||||
Функция, позволяющая сгруппировать несколько столбцов.
|
||||
Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит.
|
||||
Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу.
|
||||
|
||||
## tupleElement(tuple, n), оператор x.N {#tupleelementtuple-n-operator-x-n}
|
||||
|
||||
Функция, позволяющая достать столбец из кортежа.
|
||||
N - индекс столбца начиная с 1. N должно быть константой. N должно быть целым строго положительным числом не большим размера кортежа.
|
||||
Выполнение функции ничего не стоит.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/in_functions/) <!--hide-->
|
||||
|
114
docs/ru/sql-reference/functions/tuple-functions.md
Normal file
114
docs/ru/sql-reference/functions/tuple-functions.md
Normal file
@ -0,0 +1,114 @@
|
||||
---
|
||||
toc_priority: 68
|
||||
toc_title: Функции для работы с кортежами
|
||||
---
|
||||
|
||||
# Функции для работы с кортежами {#tuple-functions}
|
||||
|
||||
## tuple {#tuple}
|
||||
|
||||
Функция, позволяющая сгруппировать несколько столбцов.
|
||||
Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит.
|
||||
Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу.
|
||||
|
||||
С помощью функции реализуется оператор `(x, y, …)`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
tuple(x, y, …)
|
||||
```
|
||||
|
||||
## tupleElement {#tupleelement}
|
||||
|
||||
Функция, позволяющая достать столбец из кортежа.
|
||||
N - индекс столбца начиная с 1. N должно быть константой. N должно быть целым строго положительным числом не большим размера кортежа.
|
||||
Выполнение функции ничего не стоит.
|
||||
|
||||
С помощью функции реализуется оператор `x.N`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
tupleElement(tuple, n)
|
||||
```
|
||||
|
||||
## untuple {#untuple}
|
||||
|
||||
Выполняет синтаксическую подстановку элементов [кортежа](../../sql-reference/data-types/tuple.md#tuplet1-t2) в место вызова.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
untuple(x)
|
||||
```
|
||||
|
||||
Чтобы пропустить некоторые столбцы в результате запроса, вы можете использовать выражение `EXCEPT`.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `x` - функция `tuple`, столбец или кортеж элементов. [Tuple](../../sql-reference/data-types/tuple.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Нет.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Входная таблица:
|
||||
|
||||
``` text
|
||||
┌─key─┬─v1─┬─v2─┬─v3─┬─v4─┬─v5─┬─v6────────┐
|
||||
│ 1 │ 10 │ 20 │ 40 │ 30 │ 15 │ (33,'ab') │
|
||||
│ 2 │ 25 │ 65 │ 70 │ 40 │ 6 │ (44,'cd') │
|
||||
│ 3 │ 57 │ 30 │ 20 │ 10 │ 5 │ (55,'ef') │
|
||||
│ 4 │ 55 │ 12 │ 7 │ 80 │ 90 │ (66,'gh') │
|
||||
│ 5 │ 30 │ 50 │ 70 │ 25 │ 55 │ (77,'kl') │
|
||||
└─────┴────┴────┴────┴────┴────┴───────────┘
|
||||
```
|
||||
|
||||
Пример использования столбца типа `Tuple` в качестве параметра функции `untuple`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT untuple(v6) FROM kv;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─_ut_1─┬─_ut_2─┐
|
||||
│ 33 │ ab │
|
||||
│ 44 │ cd │
|
||||
│ 55 │ ef │
|
||||
│ 66 │ gh │
|
||||
│ 77 │ kl │
|
||||
└───────┴───────┘
|
||||
```
|
||||
|
||||
Пример использования выражения `EXCEPT`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT untuple((* EXCEPT (v2, v3),)) FROM kv;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─key─┬─v1─┬─v4─┬─v5─┬─v6────────┐
|
||||
│ 1 │ 10 │ 30 │ 15 │ (33,'ab') │
|
||||
│ 2 │ 25 │ 40 │ 6 │ (44,'cd') │
|
||||
│ 3 │ 57 │ 10 │ 5 │ (55,'ef') │
|
||||
│ 4 │ 55 │ 80 │ 90 │ (66,'gh') │
|
||||
│ 5 │ 30 │ 25 │ 55 │ (77,'kl') │
|
||||
└─────┴────┴────┴────┴───────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Tuple](../../sql-reference/data-types/tuple.md)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/tuple-functions/) <!--hide-->
|
@ -19,10 +19,10 @@ toc_title: PARTITION
|
||||
- [FETCH PARTITION](#alter_fetch-partition) — скачать партицию с другого сервера;
|
||||
- [MOVE PARTITION\|PART](#alter_move-partition) — переместить партицию/кускок на другой диск или том.
|
||||
|
||||
## DETACH PARTITION {#alter_detach-partition}
|
||||
## DETACH PARTITION\|PART {#alter_detach-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DETACH PARTITION partition_expr
|
||||
ALTER TABLE table_name DETACH PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Перемещает заданную партицию в директорию `detached`. Сервер не будет знать об этой партиции до тех пор, пока вы не выполните запрос [ATTACH](#alter_attach-partition).
|
||||
@ -30,7 +30,8 @@ ALTER TABLE table_name DETACH PARTITION partition_expr
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE visits DETACH PARTITION 201901
|
||||
ALTER TABLE mt DETACH PARTITION '2020-11-21';
|
||||
ALTER TABLE mt DETACH PART 'all_2_2_0';
|
||||
```
|
||||
|
||||
Подробнее о том, как корректно задать имя партиции, см. в разделе [Как задавать имя партиции в запросах ALTER](#alter-how-to-specify-part-expr).
|
||||
@ -39,10 +40,10 @@ ALTER TABLE visits DETACH PARTITION 201901
|
||||
|
||||
Запрос реплицируется — данные будут перенесены в директорию `detached` и забыты на всех репликах. Обратите внимание, запрос может быть отправлен только на реплику-лидер. Чтобы узнать, является ли реплика лидером, выполните запрос `SELECT` к системной таблице [system.replicas](../../../operations/system-tables/replicas.md#system_tables-replicas). Либо можно выполнить запрос `DETACH` на всех репликах — тогда на всех репликах, кроме реплики-лидера, запрос вернет ошибку.
|
||||
|
||||
## DROP PARTITION {#alter_drop-partition}
|
||||
## DROP PARTITION\|PART {#alter_drop-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DROP PARTITION partition_expr
|
||||
ALTER TABLE table_name DROP PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Удаляет партицию. Партиция помечается как неактивная и будет полностью удалена примерно через 10 минут.
|
||||
@ -51,6 +52,13 @@ ALTER TABLE table_name DROP PARTITION partition_expr
|
||||
|
||||
Запрос реплицируется — данные будут удалены на всех репликах.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE mt DROP PARTITION '2020-11-21';
|
||||
ALTER TABLE mt DROP PART 'all_4_4_0';
|
||||
```
|
||||
|
||||
## DROP DETACHED PARTITION\|PART {#alter_drop-detached}
|
||||
|
||||
``` sql
|
||||
|
@ -15,8 +15,6 @@
|
||||
Задача «normalized z-Order curve» в перспективе может быть полезна для БК и Метрики, так как позволяет смешивать OrderID и PageID и избежать дублирования данных.
|
||||
В задаче также вводится способ индексации путём обращения функции нескольких аргументов на интервале, что имеет смысл для дальнейшего развития.
|
||||
|
||||
[Андрей Чулков](https://github.com/achulkov2), ВШЭ.
|
||||
|
||||
### 1.2. + Wait-free каталог баз данных {#wait-free-katalog-baz-dannykh}
|
||||
|
||||
Q2. Делает [Александр Токмаков](https://github.com/tavplubix), первый рабочий вариант в декабре 2019. Нужно для DataLens и Яндекс.Метрики.
|
||||
@ -292,7 +290,8 @@ Upd. Иван Блинков сделал эту задачу путём зам
|
||||
|
||||
### 4.1. Уменьшение числа потоков при распределённых запросах {#umenshenie-chisla-potokov-pri-raspredelionnykh-zaprosakh}
|
||||
|
||||
Весна 2020. Upd. Есть прототип. Upd. Он не работает. Upd. Человек отказался от задачи, теперь сроки не определены.
|
||||
Upd. Есть прототип. Upd. Он не работает. Upd. Человек отказался от задачи, теперь сроки не определены.
|
||||
Upd. Павел Круглов, весна 2021.
|
||||
|
||||
### 4.2. Спекулятивное выполнение запросов на нескольких репликах {#spekuliativnoe-vypolnenie-zaprosov-na-neskolkikh-replikakh}
|
||||
|
||||
@ -306,6 +305,8 @@ Upd. Иван Блинков сделал эту задачу путём зам
|
||||
|
||||
Upd. Сейчас обсуждается, как сделать другую задачу вместо этой.
|
||||
|
||||
Павел Круглов, весна 2021.
|
||||
|
||||
### 4.3. Ограничение числа одновременных скачиваний с реплик {#ogranichenie-chisla-odnovremennykh-skachivanii-s-replik}
|
||||
|
||||
Изначально делал Олег Алексеенков, но пока решение не готово, хотя там не так уж много доделывать.
|
||||
@ -320,9 +321,10 @@ Upd. Сейчас обсуждается, как сделать другую з
|
||||
|
||||
### 4.7. Ленивая загрузка множеств для IN и JOIN с помощью k/v запросов {#lenivaia-zagruzka-mnozhestv-dlia-in-i-join-s-pomoshchiu-kv-zaprosov}
|
||||
|
||||
### 4.8. Разделить background pool для fetch и merge {#razdelit-background-pool-dlia-fetch-i-merge}
|
||||
### 4.8. + Разделить background pool для fetch и merge {#razdelit-background-pool-dlia-fetch-i-merge}
|
||||
|
||||
В очереди. Исправить проблему, что восстанавливающаяся реплика перестаёт мержить. Частично компенсируется 4.3.
|
||||
Исправить проблему, что восстанавливающаяся реплика перестаёт мержить. Частично компенсируется 4.3.
|
||||
Ура, готово! Сделал Александр Сапин.
|
||||
|
||||
|
||||
## 5. Операции {#operatsii}
|
||||
@ -381,6 +383,7 @@ Upd. Появилась вторая версия LTS - 20.3.
|
||||
### 6.5. Эксперименты с LLVM X-Ray {#eksperimenty-s-llvm-x-ray}
|
||||
|
||||
Требует 2.2.
|
||||
Перенос на 2021 или отмена.
|
||||
|
||||
### 6.6. + Стек трейс для любых исключений {#stek-treis-dlia-liubykh-iskliuchenii}
|
||||
|
||||
@ -401,6 +404,8 @@ Upd. В разработке.
|
||||
|
||||
### 6.10. Сбор общих системных метрик {#sbor-obshchikh-sistemnykh-metrik}
|
||||
|
||||
Перенос на весну 2021.
|
||||
|
||||
|
||||
## 7. Сопровождение разработки {#soprovozhdenie-razrabotki}
|
||||
|
||||
@ -461,7 +466,7 @@ UBSan включен в функциональных тестах, но не в
|
||||
### 7.12. Показывать тестовое покрытие нового кода в PR {#pokazyvat-testovoe-pokrytie-novogo-koda-v-pr}
|
||||
|
||||
Пока есть просто показ тестового покрытия всего кода.
|
||||
Отложено.
|
||||
Отложено на весну 2021.
|
||||
|
||||
### 7.13. + Включение аналога -Weverything в gcc {#vkliuchenie-analoga-weverything-v-gcc}
|
||||
|
||||
@ -512,6 +517,7 @@ Upd. Минимальная подсветка добавлена, а все о
|
||||
Поводом использования libressl послужило желание нашего хорошего друга из известной компании несколько лет назад. Но сейчас ситуация состоит в том, что openssl продолжает развиваться, а libressl не особо, и можно спокойно менять обратно.
|
||||
|
||||
Нужно для Яндекс.Облака для поддержки TLS 1.3.
|
||||
Теперь нужно заменить OpenSSL на BoringSSL.
|
||||
|
||||
### 7.16. + tzdata внутри бинарника {#tzdata-vnutri-binarnika}
|
||||
|
||||
@ -612,7 +618,7 @@ Upd. Эльдар Заитов добавляет OSS Fuzz.
|
||||
Upd. Сделаны randomString, randomFixedString.
|
||||
Upd. Сделаны fuzzBits.
|
||||
|
||||
### 7.24. Fuzzing лексера и парсера запросов; кодеков и форматов {#fuzzing-leksera-i-parsera-zaprosov-kodekov-i-formatov}
|
||||
### 7.24. + Fuzzing лексера и парсера запросов; кодеков и форматов {#fuzzing-leksera-i-parsera-zaprosov-kodekov-i-formatov}
|
||||
|
||||
Продолжение 7.23.
|
||||
|
||||
@ -656,6 +662,7 @@ Upd. В Аркадии частично работает небольшая ча
|
||||
### 7.30. Возможность переключения бинарных файлов на продакшене без выкладки пакетов {#vozmozhnost-perekliucheniia-binarnykh-failov-na-prodakshene-bez-vykladki-paketov}
|
||||
|
||||
Низкий приоритет.
|
||||
Сделали файл clickhouse.old.
|
||||
|
||||
### 7.31. Зеркалирование нагрузки между серверами {#zerkalirovanie-nagruzki-mezhdu-serverami}
|
||||
|
||||
@ -737,7 +744,7 @@ Upd. Задача взята в работу.
|
||||
### 8.6. Kerberos аутентификация для HDFS и Kafka {#kerberos-autentifikatsiia-dlia-hdfs-i-kafka}
|
||||
|
||||
Андрей Коняев, ArenaData. Он куда-то пропал.
|
||||
Upd. В процессе работа для Kafka.
|
||||
Для Kafka готово, для HDFS в процессе.
|
||||
|
||||
### 8.7. + Исправление мелочи HDFS на очень старых ядрах Linux {#ispravlenie-melochi-hdfs-na-ochen-starykh-iadrakh-linux}
|
||||
|
||||
@ -1024,14 +1031,14 @@ Upd. Сделано хранение прав. До готового к испо
|
||||
[Виталий Баранов](https://github.com/vitlibar). Финальная стадия разработки, рабочая версия в декабре 2019.
|
||||
Q1. Сделано управление правами полностью, но не реализовано их хранение, см. 12.1.
|
||||
|
||||
### 12.3. Подключение справочника пользователей и прав доступа из LDAP {#podkliuchenie-spravochnika-polzovatelei-i-prav-dostupa-iz-ldap}
|
||||
### 12.3. + Подключение справочника пользователей и прав доступа из LDAP {#podkliuchenie-spravochnika-polzovatelei-i-prav-dostupa-iz-ldap}
|
||||
|
||||
Аутентификация через LDAP - Денис Глазачев.
|
||||
[Виталий Баранов](https://github.com/vitlibar) и Денис Глазачев, Altinity. Требует 12.1.
|
||||
Q3.
|
||||
Upd. Pull request на финальной стадии.
|
||||
|
||||
### 12.4. Подключение IDM системы Яндекса как справочника пользователей и прав доступа {#podkliuchenie-idm-sistemy-iandeksa-kak-spravochnika-polzovatelei-i-prav-dostupa}
|
||||
### 12.4. - Подключение IDM системы Яндекса как справочника пользователей и прав доступа {#podkliuchenie-idm-sistemy-iandeksa-kak-spravochnika-polzovatelei-i-prav-dostupa}
|
||||
|
||||
Пока низкий приоритет. Нужно для Метрики. Требует 12.3.
|
||||
Отложено.
|
||||
@ -1051,7 +1058,7 @@ Upd. Есть pull request.
|
||||
|
||||
### 13.1. Overcommit запросов по памяти и вытеснение {#overcommit-zaprosov-po-pamiati-i-vytesnenie}
|
||||
|
||||
Требует 2.1. Способ реализации обсуждается. Александр Казаков.
|
||||
Требует 2.1. Способ реализации обсуждается.
|
||||
|
||||
### 13.2. Общий конвейер выполнения на сервер {#obshchii-konveier-vypolneniia-na-server}
|
||||
|
||||
@ -1059,8 +1066,6 @@ Upd. Есть pull request.
|
||||
|
||||
### 13.3. Пулы ресурсов {#puly-resursov}
|
||||
|
||||
Александр Казаков.
|
||||
|
||||
Требует 13.2 или сможем сделать более неудобную реализацию раньше.
|
||||
Обсуждается вариант неудобной реализации. Пока средний приоритет, целимся на Q1/Q2.
|
||||
Вариант реализации выбрал Александр Казаков.
|
||||
@ -1068,6 +1073,7 @@ Upd. Не уследили, и задачу стали обсуждать мен
|
||||
Upd. Задачу смотрит Александр Казаков.
|
||||
Upd. Задача взята в работу.
|
||||
Upd. Задача как будто взята в работу.
|
||||
Upd. Задачу не сделал.
|
||||
|
||||
|
||||
## 14. Диалект SQL {#dialekt-sql}
|
||||
@ -1082,19 +1088,18 @@ Upd. Задача как будто взята в работу.
|
||||
|
||||
### 14.3. Поддержка подстановок для множеств в правой части IN {#podderzhka-podstanovok-dlia-mnozhestv-v-pravoi-chasti-in}
|
||||
|
||||
### 14.4. Поддержка подстановок для идентификаторов (имён) в SQL запросе {#podderzhka-podstanovok-dlia-identifikatorov-imion-v-sql-zaprose}
|
||||
### 14.4. + Поддержка подстановок для идентификаторов (имён) в SQL запросе {#podderzhka-podstanovok-dlia-identifikatorov-imion-v-sql-zaprose}
|
||||
|
||||
zhang2014
|
||||
Задача на паузе.
|
||||
Amos Bird сделал.
|
||||
|
||||
### 14.5. + Поддержка задания множества как массива в правой части секции IN {#podderzhka-zadaniia-mnozhestva-kak-massiva-v-pravoi-chasti-sektsii-in}
|
||||
|
||||
Василий Немков, Altinity, делал эту задачу, но забросил её в пользу других задач.
|
||||
В результате, сейчас доделывает Антон Попов.
|
||||
|
||||
### 14.6. Глобальный scope для WITH {#globalnyi-scope-dlia-with}
|
||||
### 14.6. + Глобальный scope для WITH {#globalnyi-scope-dlia-with}
|
||||
|
||||
В обсуждении. Amos Bird.
|
||||
Amos Bird сделал.
|
||||
|
||||
### 14.7. Nullable для WITH ROLLUP, WITH CUBE, WITH TOTALS {#nullable-dlia-with-rollup-with-cube-with-totals}
|
||||
|
||||
@ -1148,13 +1153,13 @@ Upd. Есть pull request. Готово.
|
||||
|
||||
### 14.17. + Ввести понятие stateful функций {#vvesti-poniatie-stateful-funktsii}
|
||||
|
||||
zhang2014.
|
||||
Для runningDifference, neighbour - их учёт в оптимизаторе запросов.
|
||||
В интерфейсе уже сделано. Надо проверить, что учитывается в нужных местах (например, что работает predicate pushdown сквозь ORDER BY, если таких функций нет).
|
||||
Александр Кузьменков.
|
||||
|
||||
### 14.18. UNION DISTINCT и возможность включить его по-умолчанию {#union-distinct-i-vozmozhnost-vkliuchit-ego-po-umolchaniiu}
|
||||
### 14.18. + UNION DISTINCT и возможность включить его по-умолчанию {#union-distinct-i-vozmozhnost-vkliuchit-ego-po-umolchaniiu}
|
||||
|
||||
Для BI систем.
|
||||
Для BI систем. flynn ucasFL.
|
||||
|
||||
### 14.19. + Совместимость парсера типов данных с SQL {#sovmestimost-parsera-tipov-dannykh-s-sql}
|
||||
|
||||
@ -1278,7 +1283,7 @@ Upd. Есть pull request.
|
||||
Исправление фундаментальной проблемы - есть PR.
|
||||
Фундаментальная проблема решена.
|
||||
|
||||
### 18.2. Агрегатные функции для статистических тестов {#agregatnye-funktsii-dlia-statisticheskikh-testov}
|
||||
### 18.2. + Агрегатные функции для статистических тестов {#agregatnye-funktsii-dlia-statisticheskikh-testov}
|
||||
|
||||
Артём Цыганов, Руденский Константин Игоревич, Семёнов Денис, ВШЭ.
|
||||
|
||||
@ -1286,6 +1291,7 @@ Upd. Есть pull request.
|
||||
|
||||
Сделали прототип двух тестов, есть pull request. Также есть pull request для корелляции рангов.
|
||||
Upd. Помержили корелляцию рангов, но ещё не помержили сравнение t-test, u-test.
|
||||
Upd. Всё доделал Никита Михайлов.
|
||||
|
||||
### 18.3. Инфраструктура для тренировки моделей в ClickHouse {#infrastruktura-dlia-trenirovki-modelei-v-clickhouse}
|
||||
|
||||
@ -1295,7 +1301,7 @@ Upd. Помержили корелляцию рангов, но ещё не по
|
||||
|
||||
## 19. Улучшение работы кластера {#uluchshenie-raboty-klastera}
|
||||
|
||||
### 19.1. Параллельные кворумные вставки без линеаризуемости {#parallelnye-kvorumnye-vstavki-bez-linearizuemosti}
|
||||
### 19.1. + Параллельные кворумные вставки без линеаризуемости {#parallelnye-kvorumnye-vstavki-bez-linearizuemosti}
|
||||
|
||||
Upd. В работе, ожидается в начале октября.
|
||||
|
||||
@ -1361,6 +1367,8 @@ Upd. Задача в разработке.
|
||||
|
||||
### 20.2. Поддержка DELETE путём преобразования множества ключей в множество row_numbers на реплике, столбца флагов и индекса по диапазонам {#podderzhka-delete-putiom-preobrazovaniia-mnozhestva-kliuchei-v-mnozhestvo-row-numbers-na-replike-stolbtsa-flagov-i-indeksa-po-diapazonam}
|
||||
|
||||
Задача назначена на 2021.
|
||||
|
||||
### 20.3. Поддержка ленивых DELETE путём запоминания выражений и преобразования к множеству ключей в фоне {#podderzhka-lenivykh-delete-putiom-zapominaniia-vyrazhenii-i-preobrazovaniia-k-mnozhestvu-kliuchei-v-fone}
|
||||
|
||||
### 20.4. Поддержка UPDATE с помощью преобразования в DELETE и вставок {#podderzhka-update-s-pomoshchiu-preobrazovaniia-v-delete-i-vstavok}
|
||||
@ -1413,6 +1421,7 @@ ucasFL, в разработке. Готово.
|
||||
[Achimbab](https://github.com/achimbab).
|
||||
Есть pull request. Но это не совсем то.
|
||||
Upd. В обсуждении.
|
||||
Upd. Назначено на 2021.
|
||||
|
||||
### 21.8. Взаимная интеграция аллокатора и кэша {#vzaimnaia-integratsiia-allokatora-i-kesha}
|
||||
|
||||
@ -1427,6 +1436,7 @@ Upd. В обсуждении.
|
||||
Upd. Есть нерабочий прототип, скорее всего будет отложено.
|
||||
Upd. Отложено до осени.
|
||||
Upd. Отложено до.
|
||||
Upd. Отложено.
|
||||
|
||||
### 21.8.1. Отдельный аллокатор для кэшей с ASLR {#otdelnyi-allokator-dlia-keshei-s-aslr}
|
||||
|
||||
@ -1517,7 +1527,7 @@ Upd. Сделаны самые существенные из предложен
|
||||
|
||||
Для сортировки по кортежам используется обычная сортировка с компаратором, который в цикле по элементам кортежа делает виртуальные вызовы `IColumn::compareAt`. Это неоптимально - как из-за короткого цикла по неизвестному в compile-time количеству элементов, так и из-за виртуальных вызовов. Чтобы обойтись без виртуальных вызовов, есть метод `IColumn::getPermutation`. Он используется в случае сортировки по одному столбцу. Есть вариант, что в случае сортировки по кортежу, что-то похожее тоже можно применить… например, сделать метод `updatePermutation`, принимающий аргументы offset и limit, и допереставляющий перестановку в диапазоне значений, в которых предыдущий столбец имел равные значения.
|
||||
|
||||
3. RadixSort для сортировки.
|
||||
\+ 3. RadixSort для сортировки.
|
||||
|
||||
Один наш знакомый начал делать задачу по попытке использования RadixSort для сортировки столбцов. Был сделан вариант indirect сортировки (для `getPermutation`), но не оптимизирован до конца - есть лишние ненужные перекладывания элементов. Для того, чтобы его оптимизировать, придётся добавить немного шаблонной магии (на последнем шаге что-то не копировать, вместо перекладывания индексов - складывать их в готовое место). Также этот человек добавил метод MSD Radix Sort для реализации radix partial sort. Но даже не проверил производительность.
|
||||
|
||||
@ -1527,7 +1537,9 @@ Upd. Сделаны самые существенные из предложен
|
||||
|
||||
Виртуальный метод `compareAt` возвращает -1, 0, 1. Но алгоритмы сортировки сравнениями обычно рассчитаны на `operator<` и не могут получить преимущества от three-way comparison. А можно ли написать так, чтобы преимущество было?
|
||||
|
||||
5. pdq partial sort
|
||||
\+ 5. pdq partial sort
|
||||
|
||||
Upd. Данила Кутенин решил эту задачу ультимативно, используя Floyd–Rivest алгоритм.
|
||||
|
||||
Хороший алгоритм сортировки сравнениями `pdqsort` не имеет варианта partial sort. Заметим, что на практике, почти все сортировки в запросах ClickHouse являются partial_sort, так как `ORDER BY` почти всегда идёт с `LIMIT`. Кстати, Данила Кутенин уже попробовал это и показал, что в тривиальном случае преимущества нет. Но не очевидно, что нельзя сделать лучше.
|
||||
|
||||
@ -1619,6 +1631,7 @@ Upd. Добавили таймауты.
|
||||
|
||||
Altinity.
|
||||
Я не в курсе, какой статус.
|
||||
Там предлагают очень сложное решение вместо простого.
|
||||
|
||||
### 22.16. + Исправление низкой производительности кодека DoubleDelta {#ispravlenie-nizkoi-proizvoditelnosti-kodeka-doubledelta}
|
||||
|
||||
@ -1656,15 +1669,15 @@ Upd. Готово.
|
||||
|
||||
Нужно для Метрики. Алексей Миловидов.
|
||||
|
||||
### 22.25. Избавиться от библиотеки btrie {#izbavitsia-ot-biblioteki-btrie}
|
||||
### 22.25. + Избавиться от библиотеки btrie {#izbavitsia-ot-biblioteki-btrie}
|
||||
|
||||
Алексей Миловидов. Низкий приоритет.
|
||||
Владимир Черкасов сделал эту задачу.
|
||||
|
||||
### 22.26. Плохая производительность quantileTDigest {#plokhaia-proizvoditelnost-quantiletdigest}
|
||||
|
||||
[#2668](https://github.com/ClickHouse/ClickHouse/issues/2668)
|
||||
|
||||
Алексей Миловидов или будет переназначено.
|
||||
Павел Круглов и Илья Щербак (ВК).
|
||||
|
||||
### 22.27. Проверить несколько PR, которые были закрыты zhang2014 и sundy-li {#proverit-neskolko-pr-kotorye-byli-zakryty-zhang2014-i-sundy-li}
|
||||
|
||||
@ -1766,7 +1779,7 @@ Upd. Отменено.
|
||||
Виталий Баранов.
|
||||
Отложено, после бэкапов.
|
||||
|
||||
### 24.5. Поддержка функций шифрования для отдельных значений {#podderzhka-funktsii-shifrovaniia-dlia-otdelnykh-znachenii}
|
||||
### 24.5. + Поддержка функций шифрования для отдельных значений {#podderzhka-funktsii-shifrovaniia-dlia-otdelnykh-znachenii}
|
||||
|
||||
Смотрите также 24.5.
|
||||
|
||||
@ -1775,6 +1788,7 @@ Upd. Отменено.
|
||||
|
||||
Делает Василий Немков, Altinity
|
||||
Есть pull request в процессе ревью, исправляем проблемы производительности.
|
||||
Сейчас в состоянии, что уже добавлено в продакшен, но производительность всё ещё низкая (тех долг).
|
||||
|
||||
### 24.6. Userspace RAID {#userspace-raid}
|
||||
|
||||
@ -1825,7 +1839,7 @@ RAID позволяет одновременно увеличить надёжн
|
||||
|
||||
Upd. Есть pull request. В стадии ревью. Готово.
|
||||
|
||||
### 24.10. Поддержка типов half/bfloat16/unum {#podderzhka-tipov-halfbfloat16unum}
|
||||
### 24.10. - Поддержка типов half/bfloat16/unum {#podderzhka-tipov-halfbfloat16unum}
|
||||
|
||||
[#7657](https://github.com/ClickHouse/ClickHouse/issues/7657)
|
||||
|
||||
@ -1833,6 +1847,7 @@ Upd. Есть pull request. В стадии ревью. Готово.
|
||||
|
||||
Есть pull request на промежуточной стадии.
|
||||
Отложено.
|
||||
Отменено.
|
||||
|
||||
### 24.11. User Defined Functions {#user-defined-functions}
|
||||
|
||||
@ -1882,10 +1897,12 @@ Upd. Прототип bitonic sort помержен, но целесообраз
|
||||
Требует 2.1.
|
||||
|
||||
Upd. Есть два прототипа от внешних контрибьюторов.
|
||||
Александр Кузьменков.
|
||||
|
||||
### 24.15. Поддержка полуструктурированных данных {#podderzhka-polustrukturirovannykh-dannykh}
|
||||
|
||||
Требует 1.14 и 2.10.
|
||||
Антон Попов.
|
||||
|
||||
### 24.16. Улучшение эвристики слияний {#uluchshenie-evristiki-sliianii}
|
||||
|
||||
@ -1915,6 +1932,7 @@ Upd. Есть pull request - в большинстве случаев однов
|
||||
### 24.21. Реализация в ClickHouse протокола распределённого консенсуса {#realizatsiia-v-clickhouse-protokola-raspredelionnogo-konsensusa}
|
||||
|
||||
Имеет смысл только после 19.2.
|
||||
Александр Сапин.
|
||||
|
||||
### 24.22. Вывод типов по блоку данных. Вывод формата данных по примеру {#vyvod-tipov-po-bloku-dannykh-vyvod-formata-dannykh-po-primeru}
|
||||
|
||||
@ -1955,13 +1973,14 @@ ClickHouse также может использоваться для быстр
|
||||
Михаил Филитов, ВШЭ.
|
||||
Upd. Есть pull request. Нужно ещё чистить код библиотеки.
|
||||
|
||||
### 24.26. Поддержка open tracing или аналогов {#podderzhka-open-tracing-ili-analogov}
|
||||
### 24.26. + Поддержка open tracing или аналогов {#podderzhka-open-tracing-ili-analogov}
|
||||
|
||||
[#5182](https://github.com/ClickHouse/ClickHouse/issues/5182)
|
||||
|
||||
Александр Кожихов, ВШЭ и Яндекс.YT.
|
||||
Upd. Есть pull request с прототипом.
|
||||
Upd. Александ Кузьменков взял задачу в работу.
|
||||
Сделано.
|
||||
|
||||
### 24.27. Реализация алгоритмов min-hash, sim-hash для нечёткого поиска полудубликатов {#realizatsiia-algoritmov-min-hash-sim-hash-dlia-nechiotkogo-poiska-poludublikatov}
|
||||
|
||||
@ -1995,7 +2014,7 @@ Amos Bird, но его решение слишком громоздкое и п
|
||||
Перепиcывание в JOIN. Не раньше 21.11, 21.12, 21.9. Низкий приоритет.
|
||||
Отложено.
|
||||
|
||||
### 24.32. Поддержка GRPC {#podderzhka-grpc}
|
||||
### 24.32. + Поддержка GRPC {#podderzhka-grpc}
|
||||
|
||||
Мария Конькова, ВШЭ и Яндекс.
|
||||
Также смотрите 24.29.
|
||||
@ -2009,6 +2028,7 @@ Amos Bird, но его решение слишком громоздкое и п
|
||||
|
||||
Задача в работе, есть pull request. [#10136](https://github.com/ClickHouse/ClickHouse/pull/10136)
|
||||
Upd. Задачу взял в работу Виталий Баранов.
|
||||
Сделано.
|
||||
|
||||
|
||||
## 25. DevRel {#devrel}
|
||||
@ -2067,13 +2087,14 @@ Upd. Задачу взял в работу Виталий Баранов.
|
||||
Алексей Миловидов и все подготовленные докладчики.
|
||||
Upd. Участвуем.
|
||||
|
||||
### 25.14. Конференции в России: все HighLoad, возможно CodeFest, DUMP или UWDC, возможно C++ Russia {#konferentsii-v-rossii-vse-highload-vozmozhno-codefest-dump-ili-uwdc-vozmozhno-c-russia}
|
||||
### 25.14. + Конференции в России: все HighLoad, возможно CodeFest, DUMP или UWDC, возможно C++ Russia {#konferentsii-v-rossii-vse-highload-vozmozhno-codefest-dump-ili-uwdc-vozmozhno-c-russia}
|
||||
|
||||
Алексей Миловидов и все подготовленные докладчики.
|
||||
Upd. Есть Saint HighLoad online.
|
||||
Upd. Есть C++ Russia.
|
||||
CodeFest, DUMP, UWDC отменились.
|
||||
Upd. Добавились Highload Fwdays, Матемаркетинг.
|
||||
Upd. Добавились подкасты C++ Russia.
|
||||
|
||||
### 25.15. Конференции зарубежные: Percona, DataOps, попытка попасть на более крупные {#konferentsii-zarubezhnye-percona-dataops-popytka-popast-na-bolee-krupnye}
|
||||
|
||||
@ -2096,6 +2117,7 @@ DataOps отменилась.
|
||||
|
||||
Есть минимальный прототип. Сделал Илья Яцишин. Этот прототип не позволяет делиться ссылками на результаты запросов.
|
||||
Upd. На финальной стадии инструмент для экспериментирования с разными версиями ClickHouse.
|
||||
Upd. По факту, задача считается не сделанной (готово только 99%, не 100%).
|
||||
|
||||
### 25.17. Взаимодействие с ВУЗами: ВШЭ, УрФУ, ICT Beijing {#vzaimodeistvie-s-vuzami-vshe-urfu-ict-beijing}
|
||||
|
||||
@ -2103,6 +2125,7 @@ Upd. На финальной стадии инструмент для экспе
|
||||
Благодаря Robert Hodges добавлен CMU.
|
||||
Upd. Взаимодействие с ВШЭ 2019/2020 успешно выполнено.
|
||||
Upd. Идёт подготовка к 2020/2021.
|
||||
Upd. Уже взяли несколько десятков человек на 2020/2021.
|
||||
|
||||
### 25.18. - Лекция в ШАД {#lektsiia-v-shad}
|
||||
|
||||
|
@ -202,7 +202,11 @@ def build(args):
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
|
||||
website_dir = os.path.join('..', 'website')
|
||||
|
||||
# A root path to ClickHouse source code.
|
||||
src_dir = '..'
|
||||
|
||||
website_dir = os.path.join(src_dir, 'website')
|
||||
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja,tr,fa')
|
||||
@ -210,6 +214,7 @@ if __name__ == '__main__':
|
||||
arg_parser.add_argument('--docs-dir', default='.')
|
||||
arg_parser.add_argument('--theme-dir', default=website_dir)
|
||||
arg_parser.add_argument('--website-dir', default=website_dir)
|
||||
arg_parser.add_argument('--src-dir', default=src_dir)
|
||||
arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog'))
|
||||
arg_parser.add_argument('--output-dir', default='build')
|
||||
arg_parser.add_argument('--enable-stable-releases', action='store_true')
|
||||
|
@ -145,13 +145,19 @@ def build_website(args):
|
||||
'public',
|
||||
'node_modules',
|
||||
'templates',
|
||||
'locale'
|
||||
'locale',
|
||||
'.gitkeep'
|
||||
)
|
||||
)
|
||||
|
||||
# This file can be requested to check for available ClickHouse releases.
|
||||
shutil.copy2(
|
||||
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'),
|
||||
os.path.join(args.output_dir, 'data', 'version_date.tsv'))
|
||||
|
||||
shutil.copy2(
|
||||
os.path.join(args.website_dir, 'js', 'embedd.min.js'),
|
||||
os.path.join(args.output_dir, 'js', 'embedd.min.js')
|
||||
)
|
||||
os.path.join(args.output_dir, 'js', 'embedd.min.js'))
|
||||
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
|
@ -19,7 +19,6 @@ toc_title: "Kullan\u0131lan \xDC\xE7\xFCnc\xFC Taraf K\xFCt\xFCphaneleri"
|
||||
| googletest | [BSD 3-Clause Lisansı](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h33 | [Apache Lic 2.0ense 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause Lisansı](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2. 1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -11,7 +11,6 @@
|
||||
| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
|
||||
| googletest | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| 超扫描 | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -1,8 +1,13 @@
|
||||
# AMPLab大数据基准测试 {#amplab-da-shu-ju-ji-zhun-ce-shi}
|
||||
---
|
||||
toc_priority: 19
|
||||
toc_title: AMPLab Big Data Benchmark
|
||||
---
|
||||
|
||||
# AMPLab Big Data Benchmark {#amplab-big-data-benchmark}
|
||||
|
||||
参考 https://amplab.cs.berkeley.edu/benchmark/
|
||||
|
||||
需要您在https://aws.amazon.com注册一个免费的账号。注册时需要您提供信用卡、邮箱、电话等信息。之后可以在https://console.aws.amazon.com/iam/home?nc2=h_m_sc#security_credential获取新的访问密钥
|
||||
需要您在[Amazon](https://aws.amazon.com)注册一个免费的账号。注册时需要您提供信用卡、邮箱、电话等信息。之后可以在[Amazon AWS Console](https://console.aws.amazon.com/iam/home?nc2=h_m_sc#security_credential)获取新的访问密钥
|
||||
|
||||
在控制台运行以下命令:
|
||||
|
||||
|
@ -1,6 +1,11 @@
|
||||
# Criteo TB级别点击日志 {#criteo-tbji-bie-dian-ji-ri-zhi}
|
||||
---
|
||||
toc_priority: 18
|
||||
toc_title: Terabyte Click Logs from Criteo
|
||||
---
|
||||
|
||||
可以从http://labs.criteo.com/downloads/download-terabyte-click-logs/上下载数据
|
||||
# Terabyte of Click Logs from Criteo {#criteo-tbji-bie-dian-ji-ri-zhi}
|
||||
|
||||
可以从 http://labs.criteo.com/downloads/download-terabyte-click-logs/ 上下载数据
|
||||
|
||||
创建原始数据对应的表结构:
|
||||
|
||||
|
@ -6,15 +6,16 @@ toc_title: "\u5BFC\u8A00"
|
||||
|
||||
# 示例数据集 {#example-datasets}
|
||||
|
||||
本节介绍如何获取示例数据集并将其导入ClickHouse。
|
||||
本节介绍如何获取示例数据集并将其导入ClickHouse。对于某些数据集,还可以使用示例查询。
|
||||
|
||||
对于某些数据集示例查询也可用。
|
||||
|
||||
- [脱敏的Yandex.Metrica数据集](metrica.md)
|
||||
- [星型基准测试](star-schema.md)
|
||||
- [维基访问数据](wikistat.md)
|
||||
- [Criteo TB级别点击日志](criteo.md)
|
||||
- [AMPLab大数据基准测试](amplab-benchmark.md)
|
||||
- [纽约出租车数据](nyc-taxi.md)
|
||||
- [航班飞行数据](ontime.md)
|
||||
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
|
||||
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
|
||||
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
|
||||
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
|
||||
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
|
||||
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
|
||||
- [OnTime](../../getting-started/example-datasets/ontime.md)
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->
|
||||
|
@ -1,17 +1,17 @@
|
||||
---
|
||||
toc_priority: 21
|
||||
toc_title: "Yandex\u6885\u7279\u91CC\u5361\u6570\u636E"
|
||||
toc_priority: 15
|
||||
toc_title: Yandex.Metrica Data
|
||||
---
|
||||
|
||||
# 脱敏的Yandex.Metrica数据集 {#anonymized-yandex-metrica-data}
|
||||
# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data}
|
||||
|
||||
Dataset由两个表组成,其中包含有关命中的匿名数据 (`hits_v1`)和访问 (`visits_v1`)的Yandex的。梅特里卡 你可以阅读更多关于Yandex的。梅特里卡 [ClickHouse历史](../../introduction/history.md) 科。
|
||||
数据集由两个表组成,包含关于Yandex.Metrica的hits(`hits_v1`)和visit(`visits_v1`)的匿名数据。你可以阅读更多关于Yandex的信息。在[ClickHouse历史](../../introduction/history.md)的Metrica部分。
|
||||
|
||||
数据集由两个表组成,其中任何一个都可以作为压缩表下载 `tsv.xz` 文件或作为准备的分区。 除此之外,该扩展版本 `hits` 包含1亿行的表可作为TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz 并作为准备的分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
|
||||
数据集由两个表组成,他们中的任何一个都可以下载作为一个压缩`tsv.xz`的文件或准备的分区。除此之外,一个扩展版的`hits`表包含1亿行TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz,准备分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz。
|
||||
|
||||
## 从准备好的分区获取表 {#obtaining-tables-from-prepared-partitions}
|
||||
|
||||
下载和导入点击表:
|
||||
下载和导入`hits`表:
|
||||
|
||||
``` bash
|
||||
curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar
|
||||
@ -21,7 +21,7 @@ sudo service clickhouse-server restart
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
|
||||
```
|
||||
|
||||
下载和导入访问:
|
||||
下载和导入`visits`表:
|
||||
|
||||
``` bash
|
||||
curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar
|
||||
@ -31,9 +31,9 @@ sudo service clickhouse-server restart
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
|
||||
```
|
||||
|
||||
## 从压缩TSV文件获取表 {#obtaining-tables-from-compressed-tsv-file}
|
||||
## 从TSV压缩文件获取表 {#obtaining-tables-from-compressed-tsv-file}
|
||||
|
||||
从压缩的TSV文件下载并导入命中:
|
||||
从TSV压缩文件下载并导入`hits`:
|
||||
|
||||
``` bash
|
||||
curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
|
||||
@ -47,7 +47,7 @@ clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL"
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
|
||||
```
|
||||
|
||||
从压缩tsv文件下载和导入访问:
|
||||
从压缩tsv文件下载和导入`visits`:
|
||||
|
||||
``` bash
|
||||
curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv
|
||||
@ -63,6 +63,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
|
||||
|
||||
## 查询示例 {#example-queries}
|
||||
|
||||
[点击教程](../../getting-started/tutorial.md) 是基于Yandex的。Metrica数据集和开始使用此数据集的推荐方式是通过教程。
|
||||
[使用教程](../../getting-started/tutorial.md)是以Yandex.Metrica数据集开始教程。
|
||||
|
||||
查询这些表的其他示例可以在 [有状态测试](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) ClickHouse的(它们被命名为 `test.hists` 和 `test.visits` 那里)。
|
||||
可以在ClickHouse的[stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) 中找到对这些表的查询的其他示例(它们被命名为`test.hists`和`test.visits`)。
|
||||
|
@ -1,15 +1,20 @@
|
||||
# 纽约市出租车数据 {#niu-yue-shi-chu-zu-che-shu-ju}
|
||||
---
|
||||
toc_priority: 20
|
||||
toc_title: New York Taxi Data
|
||||
---
|
||||
|
||||
# 纽约出租车数据 {#niu-yue-shi-chu-zu-che-shu-ju}
|
||||
|
||||
纽约市出租车数据有以下两个方式获取:
|
||||
|
||||
从原始数据导入
|
||||
下载预处理好的分区数据
|
||||
- 从原始数据导入
|
||||
- 下载处理好的数据
|
||||
|
||||
## 怎样导入原始数据 {#zen-yang-dao-ru-yuan-shi-shu-ju}
|
||||
|
||||
可以参考https://github.com/toddwschneider/nyc-taxi-data和http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html中的关于数据集结构描述与数据下载指令说明。
|
||||
可以参考 https://github.com/toddwschneider/nyc-taxi-data 和 http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html 中的关于数据集结构描述与数据下载指令说明。
|
||||
|
||||
数据集包含227GB的CSV文件。这大约需要一个小时的下载时间(1Gbit带宽下,并行下载大概是一半时间)。
|
||||
数据集包含227GB的CSV文件。在1Gbig的带宽下,下载大约需要一个小时这大约需要一个小时的下载时间(从s3.amazonaws.com并行下载时间至少可以缩减一半)。
|
||||
下载时注意损坏的文件。可以检查文件大小并重新下载损坏的文件。
|
||||
|
||||
有些文件中包含一些无效的行,您可以使用如下语句修复他们:
|
||||
@ -21,7 +26,7 @@ mv data/yellow_tripdata_2010-02.csv_ data/yellow_tripdata_2010-02.csv
|
||||
mv data/yellow_tripdata_2010-03.csv_ data/yellow_tripdata_2010-03.csv
|
||||
```
|
||||
|
||||
然后您必须在PostgreSQL中预处理这些数据。这将创建多边形中的点(以匹配在地图中纽约市中范围),然后通过使用JOIN查询将数据关联组合到一个规范的表中。为了完成这部分操作,您需要安装PostgreSQL的同时安装PostGIS插件。
|
||||
然后必须在PostgreSQL中对数据进行预处理。这将创建多边形中选择的点(将地图上的点与纽约市的行政区相匹配),并使用连接将所有数据合并到一个非规范化的平面表中。为此,您需要安装支持PostGIS的PostgreSQL。
|
||||
|
||||
运行`initialize_database.sh`时要小心,并手动重新检查是否正确创建了所有表。
|
||||
|
||||
@ -114,7 +119,7 @@ COPY
|
||||
) TO '/opt/milovidov/nyc-taxi-data/trips.tsv';
|
||||
```
|
||||
|
||||
数据快照的创建速度约为每秒50 MB。 在创建快照时,PostgreSQL以每秒约28 MB的速度从磁盘读取数据。
|
||||
数据快照的创建速度约为每秒50MB。 在创建快照时,PostgreSQL以每秒约28MB的速度从磁盘读取数据。
|
||||
这大约需要5个小时。 最终生成的TSV文件为590612904969 bytes。
|
||||
|
||||
在ClickHouse中创建临时表:
|
||||
@ -186,11 +191,11 @@ real 75m56.214s
|
||||
|
||||
数据的读取速度为112-140 Mb/秒。
|
||||
通过这种方式将数据加载到Log表中需要76分钟。
|
||||
这个表中的数据需要使用142 GB的磁盘空间.
|
||||
这个表中的数据需要使用142GB的磁盘空间.
|
||||
|
||||
(也可以直接使用`COPY ... TO PROGRAM`从Postgres中导入数据)
|
||||
|
||||
由于数据中与天气相关的所有数据(precipitation……average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们
|
||||
数据中所有与天气相关的字段(precipitation……average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们
|
||||
|
||||
首先,我们使用单台服务器创建表,后面我们将在多台节点上创建这些表。
|
||||
|
||||
@ -259,7 +264,7 @@ FROM trips
|
||||
```
|
||||
|
||||
这需要3030秒,速度约为每秒428,000行。
|
||||
要加快速度,可以使用`Log`引擎替换’MergeTree\`引擎来创建表。 在这种情况下,下载速度超过200秒。
|
||||
要加快速度,可以使用`Log`引擎替换`MergeTree`引擎来创建表。 在这种情况下,下载速度超过200秒。
|
||||
|
||||
这个表需要使用126GB的磁盘空间。
|
||||
|
||||
@ -286,8 +291,7 @@ $ clickhouse-client --query "select count(*) from datasets.trips_mergetree"
|
||||
```
|
||||
|
||||
!!! info "信息"
|
||||
如果要运行下面的SQL查询,必须使用完整的表名,
|
||||
`datasets.trips_mergetree`。
|
||||
如果要运行下面的SQL查询,必须使用完整的表名,`datasets.trips_mergetree`。
|
||||
|
||||
## 单台服务器运行结果 {#dan-tai-fu-wu-qi-yun-xing-jie-guo}
|
||||
|
||||
@ -328,9 +332,9 @@ ORDER BY year, count(*) DESC
|
||||
|
||||
我们使用的是如下配置的服务器:
|
||||
|
||||
两个英特尔(R)至强(R)CPU E5-2650v2@2.60GHz,总共有16个物理内核,128GiB RAM,硬件RAID-5上的8X6TB HD
|
||||
两个`Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz`,总共有16个物理内核,128GiB RAM,8X6TB HD,RAID-5
|
||||
|
||||
执行时间是取三次运行中最好的值,但是从第二次查询开始,查询就讲从文件系统的缓存中读取数据。同时在每次读取和处理后不在进行缓存。
|
||||
执行时间是取三次运行中最好的值,但是从第二次查询开始,查询就将从文件系统的缓存中读取数据。同时在每次读取和处理后不在进行缓存。
|
||||
|
||||
在三台服务器中创建表结构:
|
||||
|
||||
@ -356,12 +360,12 @@ INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree
|
||||
|
||||
在三台服务器集群中运行的结果:
|
||||
|
||||
Q1:0.212秒.
|
||||
Q1: 0.212秒.
|
||||
Q2:0.438秒。
|
||||
Q3:0.733秒。
|
||||
Q4:1.241秒.
|
||||
Q4: 1.241秒.
|
||||
|
||||
不出意料,查询是线性扩展的。
|
||||
这并不奇怪,因为查询是线性扩展的。
|
||||
|
||||
我们同时在140台服务器的集群中运行的结果:
|
||||
|
||||
@ -371,7 +375,7 @@ Q3:0.051秒。
|
||||
Q4:0.072秒。
|
||||
|
||||
在这种情况下,查询处理时间首先由网络延迟确定。
|
||||
我们使用位于芬兰的Yandex数据中心中的客户端去位于俄罗斯的集群上运行查询,这增加了大约20毫秒的延迟。
|
||||
我们使用位于芬兰Yandex数据中心的客户机在俄罗斯的一个集群上运行查询,这增加了大约20毫秒的延迟。
|
||||
|
||||
## 总结 {#zong-jie}
|
||||
|
||||
|
@ -1,9 +1,14 @@
|
||||
# 航班飞行数据 {#hang-ban-fei-xing-shu-ju}
|
||||
---
|
||||
toc_priority: 21
|
||||
toc_title: OnTime
|
||||
---
|
||||
|
||||
# OnTime {#ontime}
|
||||
|
||||
航班飞行数据有以下两个方式获取:
|
||||
|
||||
- 从原始数据导入
|
||||
- 下载预处理好的分区数据
|
||||
- 下载预处理好的数据
|
||||
|
||||
## 从原始数据导入 {#cong-yuan-shi-shu-ju-dao-ru}
|
||||
|
||||
@ -19,7 +24,7 @@ done
|
||||
done
|
||||
```
|
||||
|
||||
(引用 https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh )
|
||||
(参考 https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh )
|
||||
|
||||
创建表结构:
|
||||
|
||||
@ -157,8 +162,7 @@ $ clickhouse-client --query "select count(*) from datasets.ontime"
|
||||
```
|
||||
|
||||
!!! info "信息"
|
||||
如果要运行下面的SQL查询,必须使用完整的表名,
|
||||
`datasets.ontime`。
|
||||
如果要运行下面的SQL查询,必须使用完整的表名,`datasets.ontime`。
|
||||
|
||||
## 查询: {#cha-xun}
|
||||
|
||||
@ -356,7 +360,7 @@ ORDER by rate DESC
|
||||
LIMIT 1000;
|
||||
```
|
||||
|
||||
奖金:
|
||||
Bonus:
|
||||
|
||||
``` sql
|
||||
SELECT avg(cnt)
|
||||
@ -402,3 +406,5 @@ LIMIT 10;
|
||||
- https://www.percona.com/blog/2014/04/21/using-apache-hadoop-and-impala-together-with-mysql-for-data-analysis/
|
||||
- https://www.percona.com/blog/2016/01/07/apache-spark-with-air-ontime-performance-data/
|
||||
- http://nickmakos.blogspot.ru/2012/08/analyzing-air-traffic-performance-with.html
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/ontime/) <!--hide-->
|
||||
|
@ -1,4 +1,9 @@
|
||||
# 星型基准测试 {#star-schema-benchmark}
|
||||
---
|
||||
toc_priority: 16
|
||||
toc_title: Star Schema Benchmark
|
||||
---
|
||||
|
||||
# Star Schema Benchmark {#star-schema-benchmark}
|
||||
|
||||
编译 dbgen:
|
||||
|
||||
@ -10,6 +15,9 @@ $ make
|
||||
|
||||
开始生成数据:
|
||||
|
||||
!!! warning "注意"
|
||||
使用`-s 100`dbgen将生成6亿行数据(67GB), 如果使用`-s 1000`它会生成60亿行数据(这需要很多时间))
|
||||
|
||||
``` bash
|
||||
$ ./dbgen -s 1000 -T c
|
||||
$ ./dbgen -s 1000 -T l
|
||||
@ -18,7 +26,7 @@ $ ./dbgen -s 1000 -T s
|
||||
$ ./dbgen -s 1000 -T d
|
||||
```
|
||||
|
||||
在ClickHouse中创建表结构:
|
||||
在ClickHouse中创建数据表:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE customer
|
||||
@ -92,7 +100,7 @@ $ clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
|
||||
$ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
|
||||
```
|
||||
|
||||
将«星型模型»转换为非规范化的«平面模型»:
|
||||
将`star schema`转换为`flat schema`:
|
||||
|
||||
``` sql
|
||||
SET max_memory_usage = 20000000000, allow_experimental_multiple_joins_emulation = 1;
|
||||
|
@ -1,4 +1,9 @@
|
||||
# 维基访问数据 {#wei-ji-fang-wen-shu-ju}
|
||||
---
|
||||
toc_priority: 17
|
||||
toc_title: WikiStat
|
||||
---
|
||||
|
||||
# WikiStat {#wikistat}
|
||||
|
||||
参考: http://dumps.wikimedia.org/other/pagecounts-raw/
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
---
|
||||
machine_translated: true
|
||||
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
|
||||
toc_folder_title: "\u5BFC\u8A00"
|
||||
toc_priority: 2
|
||||
---
|
||||
|
||||
# 入门 {#ru-men}
|
||||
|
||||
如果您是ClickHouse的新手,并希望亲身体验它的性能,首先您需要通过 [安装过程](install.md).
|
||||
|
@ -1,34 +1,46 @@
|
||||
---
|
||||
toc_priority: 11
|
||||
toc_title: 安装部署
|
||||
---
|
||||
|
||||
# 安装 {#clickhouse-an-zhuang}
|
||||
|
||||
## 系统要求 {#xi-tong-yao-qiu}
|
||||
|
||||
ClickHouse可以在任何具有x86_64,AArch64或PowerPC64LE CPU架构的Linux,FreeBSD或Mac OS X上运行。
|
||||
|
||||
虽然预构建的二进制文件通常是为x86 _64编译并利用SSE 4.2指令集,但除非另有说明,否则使用支持它的CPU将成为额外的系统要求。这是检查当前CPU是否支持SSE 4.2的命令:
|
||||
官方预构建的二进制文件通常针对x86_64进行编译,并利用`SSE 4.2`指令集,因此,除非另有说明,支持它的CPU使用将成为额外的系统需求。下面是检查当前CPU是否支持SSE 4.2的命令:
|
||||
|
||||
``` bash
|
||||
$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
|
||||
```
|
||||
|
||||
要在不支持SSE 4.2或具有AArch64或PowerPC64LE体系结构的处理器上运行ClickHouse,您应该[通过源构建ClickHouse](#from-sources)进行适当的配置调整。
|
||||
要在不支持`SSE 4.2`或`AArch64`,`PowerPC64LE`架构的处理器上运行ClickHouse,您应该通过适当的配置调整从[源代码构建ClickHouse](#from-sources)。
|
||||
|
||||
## 可用的安装选项 {#install-from-deb-packages}
|
||||
## 可用安装包 {#install-from-deb-packages}
|
||||
|
||||
建议为Debian或Ubuntu使用官方的预编译`deb`软件包。 运行以下命令以安装软件包:
|
||||
### `DEB`安装包
|
||||
|
||||
然后运行:
|
||||
建议使用Debian或Ubuntu的官方预编译`deb`软件包。运行以下命令来安装包:
|
||||
|
||||
``` bash
|
||||
{% include 'install/deb.sh' %}
|
||||
```
|
||||
|
||||
你也可以从这里手动下载安装包:https://repo.clickhouse.tech/deb/stable/main/。
|
||||
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。
|
||||
|
||||
如果你想使用最新的测试版本,请使用`testing`替换`stable`。
|
||||
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/deb/stable/main/)。
|
||||
|
||||
### 来自RPM包 {#from-rpm-packages}
|
||||
安装包列表:
|
||||
|
||||
Yandex ClickHouse团队建议使用官方预编译的`rpm`软件包,用于CentOS,RedHat和所有其他基于rpm的Linux发行版。
|
||||
- `clickhouse-common-static` — ClickHouse编译的二进制文件。
|
||||
- `clickhouse-server` — 创建`clickhouse-server`软连接,并安装默认配置服务
|
||||
- `clickhouse-client` — 创建`clickhouse-client`客户端工具软连接,并安装客户端配置文件。
|
||||
- `clickhouse-common-static-dbg` — 带有调试信息的ClickHouse二进制文件。
|
||||
|
||||
### `RPM`安装包 {#from-rpm-packages}
|
||||
|
||||
推荐使用CentOS、RedHat和所有其他基于rpm的Linux发行版的官方预编译`rpm`包。
|
||||
|
||||
首先,您需要添加官方存储库:
|
||||
|
||||
@ -38,84 +50,120 @@ sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
|
||||
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
|
||||
```
|
||||
|
||||
如果您想使用最新版本,请将`stable`替换为`testing`(建议您在测试环境中使用)。
|
||||
如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。
|
||||
|
||||
然后运行这些命令以实际安装包:
|
||||
然后运行命令安装:
|
||||
|
||||
``` bash
|
||||
sudo yum install clickhouse-server clickhouse-client
|
||||
```
|
||||
|
||||
您也可以从此处手动下载和安装软件包:https://repo.clickhouse.tech/rpm/stable/x86_64。
|
||||
你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/rpm/stable/x86_64)。
|
||||
|
||||
### 来自Docker {#from-docker-image}
|
||||
### `Tgz`安装包 {#from-tgz-archives}
|
||||
|
||||
要在Docker中运行ClickHouse,请遵循[码头工人中心](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。那些图像使用官方的`deb`包。
|
||||
如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。
|
||||
|
||||
所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.tech/tgz/`下载。
|
||||
|
||||
下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新版本的安装示例:
|
||||
|
||||
``` bash
|
||||
export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1`
|
||||
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz
|
||||
curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz
|
||||
|
||||
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
|
||||
|
||||
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
|
||||
|
||||
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
|
||||
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
|
||||
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
|
||||
```
|
||||
|
||||
对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它,它以后缀`-stable`标志。
|
||||
|
||||
### `Docker`安装包 {#from-docker-image}
|
||||
|
||||
要在Docker中运行ClickHouse,请遵循[Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。它是官方的`deb`安装包。
|
||||
|
||||
### 其他环境安装包 {#from-other}
|
||||
|
||||
对于非linux操作系统和Arch64 CPU架构,ClickHouse将会以`master`分支的最新提交的进行编译提供(它将会有几小时的延迟)。
|
||||
|
||||
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
|
||||
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
|
||||
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
|
||||
|
||||
下载后,您可以使用`clickhouse client`连接服务,或者使用`clickhouse local`模式处理数据,不过您必须要额外在GitHub下载[server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml)和[users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml)配置文件。
|
||||
|
||||
不建议在生产环境中使用这些构建版本,因为它们没有经过充分的测试,但是您可以自行承担这样做的风险。它们只是ClickHouse功能的一个部分。
|
||||
|
||||
### 使用源码安装 {#from-sources}
|
||||
|
||||
具体编译方式可以参考build.md。
|
||||
要手动编译ClickHouse, 请遵循[Linux](../development/build.md)或[Mac OS X](../development/build-osx.md)说明。
|
||||
|
||||
你可以编译并安装它们。
|
||||
你也可以直接使用而不进行安装。
|
||||
您可以编译并安装它们,也可以使用不安装包的程序。通过手动构建,您可以禁用`SSE 4.2`或`AArch64 cpu`。
|
||||
|
||||
``` text
|
||||
Client: programs/clickhouse-client
|
||||
Server: programs/clickhouse-server
|
||||
```
|
||||
Client: programs/clickhouse-client
|
||||
Server: programs/clickhouse-server
|
||||
|
||||
在服务器中为数据创建如下目录:
|
||||
您需要创建一个数据和元数据文件夹,并为所需的用户`chown`授权。它们的路径可以在服务器配置(`src/programs/server/config.xml`)中改变,默认情况下它们是:
|
||||
|
||||
``` text
|
||||
/opt/clickhouse/data/default/
|
||||
/opt/clickhouse/metadata/default/
|
||||
```
|
||||
/opt/clickhouse/data/default/
|
||||
/opt/clickhouse/metadata/default/
|
||||
|
||||
(它们可以在server config中配置。)
|
||||
为需要的用户运行’chown’
|
||||
|
||||
日志的路径可以在server config (src/programs/server/config.xml)中配置。
|
||||
在Gentoo上,你可以使用`emerge clickhouse`从源代码安装ClickHouse。
|
||||
|
||||
## 启动 {#qi-dong}
|
||||
|
||||
可以运行如下命令在后台启动服务:
|
||||
如果没有`service`,可以运行如下命令在后台启动服务:
|
||||
|
||||
``` bash
|
||||
sudo service clickhouse-server start
|
||||
$ sudo /etc/init.d/clickhouse-server start
|
||||
```
|
||||
|
||||
可以在`/var/log/clickhouse-server/`目录中查看日志。
|
||||
日志文件将输出在`/var/log/clickhouse-server/`文件夹。
|
||||
|
||||
如果服务没有启动,请检查配置文件 `/etc/clickhouse-server/config.xml`。
|
||||
如果服务器没有启动,检查`/etc/clickhouse-server/config.xml`中的配置。
|
||||
|
||||
你也可以在控制台中直接启动服务:
|
||||
您也可以手动从控制台启动服务器:
|
||||
|
||||
``` bash
|
||||
clickhouse-server --config-file=/etc/clickhouse-server/config.xml
|
||||
```bash
|
||||
$ clickhouse-server --config-file=/etc/clickhouse-server/config.xml
|
||||
```
|
||||
|
||||
在这种情况下,日志将被打印到控制台中,这在开发过程中很方便。
|
||||
如果配置文件在当前目录中,你可以不指定’–config-file’参数。它默认使用’./config.xml’。
|
||||
在这种情况下,日志将被打印到控制台,这在开发过程中很方便。
|
||||
|
||||
你可以使用命令行客户端连接到服务:
|
||||
如果配置文件在当前目录中,则不需要指定`——config-file`参数。默认情况下,它的路径为`./config.xml`。
|
||||
|
||||
ClickHouse支持访问限制设置。它们位于`users.xml`文件(与`config.xml`同级目录)。
|
||||
默认情况下,允许`default`用户从任何地方访问,不需要密码。可查看`user/default/networks`。
|
||||
更多信息,请参见[Configuration Files](../operations/configuration-files.md)。
|
||||
|
||||
启动服务后,您可以使用命令行客户端连接到它:
|
||||
|
||||
``` bash
|
||||
clickhouse-client
|
||||
$ clickhouse-client
|
||||
```
|
||||
|
||||
默认情况下它使用’default’用户无密码的与localhost:9000服务建立连接。
|
||||
客户端也可以用于连接远程服务,例如:
|
||||
默认情况下,使用`default`用户并不携带密码连接到`localhost:9000`。还可以使用`--host`参数连接到指定服务器。
|
||||
|
||||
终端必须使用UTF-8编码。
|
||||
更多信息,请参阅[Command-line client](../interfaces/cli.md)。
|
||||
|
||||
示例:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --host=example.com
|
||||
```
|
||||
|
||||
有关更多信息,请参考«Command-line client»部分。
|
||||
|
||||
检查系统是否工作:
|
||||
|
||||
``` bash
|
||||
milovidov@hostname:~/work/metrica/src/src/Client$ ./clickhouse-client
|
||||
$ ./clickhouse-client
|
||||
ClickHouse client version 0.0.18749.
|
||||
Connecting to localhost:9000.
|
||||
Connected to ClickHouse server version 0.0.18749.
|
||||
@ -135,6 +183,6 @@ SELECT 1
|
||||
|
||||
**恭喜,系统已经工作了!**
|
||||
|
||||
为了继续进行实验,你可以尝试下载测试数据集。
|
||||
为了继续进行实验,你可以尝试下载测试数据集或查看[教程](https://clickhouse.tech/tutorial.html)。
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/getting_started/install/) <!--hide-->
|
||||
|
@ -1,19 +1,19 @@
|
||||
---
|
||||
toc_priority: 12
|
||||
toc_title: "\u6559\u7A0B"
|
||||
toc_title: 使用教程
|
||||
---
|
||||
|
||||
# 点击教程 {#clickhouse-tutorial}
|
||||
# ClickHouse教程 {#clickhouse-tutorial}
|
||||
|
||||
## 从本教程中可以期待什么? {#what-to-expect-from-this-tutorial}
|
||||
## 从本教程中可以获得什么? {#what-to-expect-from-this-tutorial}
|
||||
|
||||
通过本教程,您将学习如何设置一个简单的ClickHouse集群。 它会很小,但却是容错和可扩展的。 然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。
|
||||
通过学习本教程,您将了解如何设置一个简单的ClickHouse集群。它会很小,但是可以容错和扩展。然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。
|
||||
|
||||
## 单节点设置 {#single-node-setup}
|
||||
|
||||
为了推迟分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从[deb](install.md#install-from-deb-packages) 或 [rpm](install.md#from-rpm-packages) 包安装,但对于不支持它们的操作系统也有 [替代方法](install.md#from-docker-image) 。
|
||||
为了延迟演示分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。ClickHouse通常是从[deb](install.md#install-from-deb-packages)或[rpm](install.md#from-rpm-packages)包安装,但对于不支持它们的操作系统也有[其他方法](install.md#from-docker-image)。
|
||||
|
||||
例如,您选择了从 `deb` 包安装,执行:
|
||||
例如,您选择`deb`安装包,执行:
|
||||
|
||||
``` bash
|
||||
{% include 'install/deb.sh' %}
|
||||
@ -21,13 +21,13 @@ toc_title: "\u6559\u7A0B"
|
||||
|
||||
在我们安装的软件中包含这些包:
|
||||
|
||||
- `clickhouse-client` 包,包含 [clickhouse-client](../interfaces/cli.md) 应用程序,它是交互式ClickHouse控制台客户端。
|
||||
- `clickhouse-client` 包,包含[clickhouse-client](../interfaces/cli.md)客户端,它是交互式ClickHouse控制台客户端。
|
||||
- `clickhouse-common` 包,包含一个ClickHouse可执行文件。
|
||||
- `clickhouse-server` 包,包含要作为服务端运行的ClickHouse配置文件。
|
||||
|
||||
服务端配置文件位于 `/etc/clickhouse-server/`。 在进一步讨论之前,请注意 `config.xml`文件中的`<path>` 元素. Path决定了数据存储的位置,因此该位置应该位于磁盘容量较大的卷上;默认值为 `/var/lib/clickhouse/`。 如果你想调整配置,考虑到它可能会在未来的软件包更新中被重写,直接编辑`config.xml` 文件并不方便。 推荐的方法是在[配置文件](../operations/configuration-files.md)目录创建文件,作为config.xml文件的“补丁”,用以复写配置元素。
|
||||
服务器配置文件位于`/etc/clickhouse-server/`。在继续之前,请注意`config.xml`中的`<path>`元素。它决定了数据存储的位置,因此它应该位于磁盘容量的卷上;默认值是`/var/lib/clickhouse/`。如果你想调整配置,直接编辑config是不方便的。考虑到它可能会在将来的包更新中被重写。建议重写配置元素的方法是在配置中创建[config.d文件夹](../operations/configuration-files.md),作为config.xml的重写方式。
|
||||
|
||||
你可能已经注意到了, `clickhouse-server` 安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样:
|
||||
你可能已经注意到了,`clickhouse-server`安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样:
|
||||
|
||||
``` bash
|
||||
sudo service clickhouse-server start
|
||||
@ -39,9 +39,9 @@ sudo service clickhouse-server start
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
```
|
||||
|
||||
服务端日志的默认位置是 `/var/log/clickhouse-server/`。当服务端在日志中记录 `Ready for connections` 消息,即表示服务端已准备好处理客户端连接。
|
||||
服务端日志的默认位置是`/var/log/clickhouse-server/`。当服务端在日志中记录`Ready for connections`消息,即表示服务端已准备好处理客户端连接。
|
||||
|
||||
一旦 `clickhouse-server` 启动并运行,我们可以利用 `clickhouse-client` 连接到服务端,并运行一些测试查询,如 `SELECT "Hello, world!";`.
|
||||
一旦`clickhouse-server`启动并运行,我们可以利用`clickhouse-client`连接到服务端,并运行一些测试查询,如`SELECT "Hello, world!";`.
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
|
||||
|
||||
## 导入示例数据集 {#import-sample-dataset}
|
||||
|
||||
现在是时候用一些示例数据填充我们的ClickHouse服务端。 在本教程中,我们将使用Yandex.Metrica的匿名数据,它是在ClickHouse成为开源之前作为生产环境运行的第一个服务(关于这一点的更多内容请参阅[ClickHouse历史](../introduction/history.md))。有 [多种导入Yandex.Metrica数据集的的方法](example-datasets/metrica.md),为了本教程,我们将使用最现实的一个。
|
||||
现在是时候用一些示例数据填充我们的ClickHouse服务端。 在本教程中,我们将使用Yandex.Metrica的匿名数据,它是在ClickHouse成为开源之前作为生产环境运行的第一个服务(关于这一点的更多内容请参阅[ClickHouse历史](../introduction/history.md))。[多种导入Yandex.Metrica数据集方法](example-datasets/metrica.md),为了本教程,我们将使用最现实的一个。
|
||||
|
||||
### 下载并提取表数据 {#download-and-extract-table-data}
|
||||
|
||||
@ -93,17 +93,17 @@ curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unx
|
||||
|
||||
### 创建表 {#create-tables}
|
||||
|
||||
与大多数数据库管理系统一样,ClickHouse在逻辑上将表分组为数据库。包含一个 `default` 数据库,但我们将创建一个新的数据库 `tutorial`:
|
||||
与大多数数据库管理系统一样,ClickHouse在逻辑上将表分组为数据库。包含一个`default`数据库,但我们将创建一个新的数据库`tutorial`:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial"
|
||||
```
|
||||
|
||||
与创建数据库相比,创建表的语法要复杂得多(请参阅 [参考资料](../sql-reference/statements/create.md). 一般 `CREATE TABLE` 声明必须指定三个关键的事情:
|
||||
与创建数据库相比,创建表的语法要复杂得多(请参阅[参考资料](../sql-reference/statements/create.md). 一般`CREATE TABLE`声明必须指定三个关键的事情:
|
||||
|
||||
1. 要创建的表的名称。
|
||||
2. 表结构,例如:列名和对应的[数据类型](../sql-reference/data-types/index.md)。
|
||||
3. [表引擎](../engines/table-engines/index.md) 及其设置,这决定了对此表的查询操作是如何在物理层面执行的所有细节。
|
||||
3. [表引擎](../engines/table-engines/index.md)及其设置,这决定了对此表的查询操作是如何在物理层面执行的所有细节。
|
||||
|
||||
Yandex.Metrica是一个网络分析服务,样本数据集不包括其全部功能,因此只有两个表可以创建:
|
||||
|
||||
@ -455,11 +455,11 @@ SETTINGS index_granularity = 8192
|
||||
|
||||
您可以使用`clickhouse-client`的交互模式执行这些查询(只需在终端中启动它,而不需要提前指定查询)。或者如果你愿意,可以尝试一些[替代接口](../interfaces/index.md)。
|
||||
|
||||
正如我们所看到的, `hits_v1` 使用 [基本的MergeTree引擎](../engines/table-engines/mergetree-family/mergetree.md),而 `visits_v1` 使用 [折叠树](../engines/table-engines/mergetree-family/collapsingmergetree.md) 变体。
|
||||
正如我们所看到的, `hits_v1`使用 [MergeTree引擎](../engines/table-engines/mergetree-family/mergetree.md),而`visits_v1`使用 [Collapsing](../engines/table-engines/mergetree-family/collapsingmergetree.md)引擎。
|
||||
|
||||
### 导入数据 {#import-data}
|
||||
|
||||
数据导入到ClickHouse是通过以下方式完成的 [INSERT INTO](../sql-reference/statements/insert-into.md) 查询像许多其他SQL数据库。 然而,数据通常是在一个提供 [支持的序列化格式](../interfaces/formats.md) 而不是 `VALUES` 子句(也支持)。
|
||||
数据导入到ClickHouse是通过[INSERT INTO](../sql-reference/statements/insert-into.md)方式完成的,查询类似许多SQL数据库。然而,数据通常是在一个提供[支持序列化格式](../interfaces/formats.md)而不是`VALUES`子句(也支持)。
|
||||
|
||||
我们之前下载的文件是以制表符分隔的格式,所以这里是如何通过控制台客户端导入它们:
|
||||
|
||||
@ -468,7 +468,7 @@ clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert
|
||||
clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv
|
||||
```
|
||||
|
||||
ClickHouse有很多 [要调整的设置](../operations/settings/index.md) 在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的 `--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询 `system.settings` 表:
|
||||
ClickHouse有很多[要调整的设置](../operations/settings/index.md)在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的`--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询`system.settings` 表:
|
||||
|
||||
``` sql
|
||||
SELECT name, value, changed, description
|
||||
@ -479,14 +479,14 @@ FORMAT TSV
|
||||
max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion."
|
||||
```
|
||||
|
||||
您也可以 [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) 导入后的表。 使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储(或至少检查是否有意义)。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行:
|
||||
您也可以[OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize)导入后的表。使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储(或至少检查是否有意义)。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL"
|
||||
clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL"
|
||||
```
|
||||
|
||||
这些查询开始一个I/O和CPU密集型操作,所以如果表一直接收到新数据,最好不要管它,让合并在后台运行。
|
||||
这些查询开始I/O和CPU密集型操作,所以如果表一直接收到新数据,最好不要管它,让合并在后台运行。
|
||||
|
||||
现在我们可以检查表导入是否成功:
|
||||
|
||||
@ -524,9 +524,9 @@ ClickHouse集群是一个同质集群。 设置步骤:
|
||||
1. 在群集的所有机器上安装ClickHouse服务端
|
||||
2. 在配置文件中设置群集配置
|
||||
3. 在每个实例上创建本地表
|
||||
4. 创建一个 [分布式表](../engines/table-engines/special/distributed.md)
|
||||
4. 创建一个[分布式表](../engines/table-engines/special/distributed.md)
|
||||
|
||||
[分布式表](../engines/table-engines/special/distributed.md) 实际上是一种 “视图”,映射到ClickHouse集群的本地表。 从分布式表中执行 **SELECT** 查询会使用集群所有分片的资源。 您可以为多个集群指定configs,并创建多个分布式表,为不同的集群提供视图。
|
||||
[分布式表](../engines/table-engines/special/distributed.md)实际上是一种`view`,映射到ClickHouse集群的本地表。 从分布式表中执行**SELECT**查询会使用集群所有分片的资源。 您可以为多个集群指定configs,并创建多个分布式表,为不同的集群提供视图。
|
||||
|
||||
具有三个分片,每个分片一个副本的集群的示例配置:
|
||||
|
||||
@ -555,7 +555,7 @@ ClickHouse集群是一个同质集群。 设置步骤:
|
||||
</remote_servers>
|
||||
```
|
||||
|
||||
为了进一步演示,让我们使用和创建 `hits_v1` 表相同的 `CREATE TABLE` 语句创建一个新的本地表,但表名不同:
|
||||
为了进一步演示,让我们使用和创建`hits_v1`表相同的`CREATE TABLE`语句创建一个新的本地表,但表名不同:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ...
|
||||
@ -568,9 +568,9 @@ CREATE TABLE tutorial.hits_all AS tutorial.hits_local
|
||||
ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
|
||||
```
|
||||
|
||||
常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表 [远程](../sql-reference/table-functions/remote.md) 表功能。
|
||||
常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表[远程](../sql-reference/table-functions/remote.md)表功能。
|
||||
|
||||
让我们运行 [INSERT SELECT](../sql-reference/statements/insert-into.md) 将该表传播到多个服务器。
|
||||
让我们运行[INSERT SELECT](../sql-reference/statements/insert-into.md)将该表传播到多个服务器。
|
||||
|
||||
``` sql
|
||||
INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
|
||||
@ -609,10 +609,10 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
|
||||
</remote_servers>
|
||||
```
|
||||
|
||||
启用本机复制 [Zookeeper](http://zookeeper.apache.org/) 是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。 建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。
|
||||
启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。
|
||||
|
||||
!!! note "注"
|
||||
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是 **不** 建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
|
||||
!!! note "注意"
|
||||
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
|
||||
|
||||
ZooKeeper位置在配置文件中指定:
|
||||
|
||||
@ -653,12 +653,12 @@ ENGINE = ReplcatedMergeTree(
|
||||
...
|
||||
```
|
||||
|
||||
在这里,我们使用 [ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md) 表引擎。 在参数中,我们指定包含分片和副本标识符的ZooKeeper路径。
|
||||
在这里,我们使用[ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md)表引擎。 在参数中,我们指定包含分片和副本标识符的ZooKeeper路径。
|
||||
|
||||
``` sql
|
||||
INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local;
|
||||
```
|
||||
|
||||
复制在多主机模式下运行。 数据可以加载到任何副本中,然后系统会自动将其与其他实例同步。 复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。 至少应有一个副本允许数据摄取。 其他人将同步数据和修复一致性,一旦他们将再次变得活跃。 请注意,这种方法允许最近插入的数据丢失的可能性很低。
|
||||
复制在多主机模式下运行。数据可以加载到任何副本中,然后系统自动将其与其他实例同步。复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。至少应该有一个副本允许数据摄入。另一些则会在重新激活后同步数据并修复一致性。请注意,这种方法允许最近插入的数据丢失的可能性很低。
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/getting_started/tutorial/) <!--hide-->
|
||||
|
@ -4,53 +4,50 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
|
||||
|
||||
在传统的行式数据库系统中,数据按如下顺序存储:
|
||||
|
||||
| row | watchID | JavaEnable | title | GoodEvent | EventTime |
|
||||
|-----|-------------|------------|------------|-----------|---------------------|
|
||||
| #0 | 89354350662 | 1 | 投资者关系 | 1 | 2016-05-18 05:19:20 |
|
||||
| #1 | 90329509958 | 0 | 联系我们 | 1 | 2016-05-18 08:10:20 |
|
||||
| #2 | 89953706054 | 1 | 任务 | 1 | 2016-05-18 07:38:00 |
|
||||
| #N | … | … | … | … | … |
|
||||
| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime |
|
||||
|-----|-------------|------------|--------------------|-----------|---------------------|
|
||||
| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
|
||||
| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
|
||||
| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
|
||||
| #N | … | … | … | … | … |
|
||||
|
||||
处于同一行中的数据总是被物理的存储在一起。
|
||||
|
||||
常见的行式数据库系统有: MySQL、Postgres和MS SQL Server。
|
||||
{: .灰色 }
|
||||
常见的行式数据库系统有:`MySQL`、`Postgres`和`MS SQL Server`。
|
||||
|
||||
在列式数据库系统中,数据按如下的顺序存储:
|
||||
|
||||
| row: | #0 | #1 | #2 | #N |
|
||||
| Row: | #0 | #1 | #2 | #N |
|
||||
|-------------|---------------------|---------------------|---------------------|-----|
|
||||
| watchID: | 89354350662 | 90329509958 | 89953706054 | … |
|
||||
| WatchID: | 89354350662 | 90329509958 | 89953706054 | … |
|
||||
| JavaEnable: | 1 | 0 | 1 | … |
|
||||
| title: | 投资者关系 | 联系我们 | 任务 | … |
|
||||
| Title: | Investor Relations | Contact us | Mission | … |
|
||||
| GoodEvent: | 1 | 1 | 1 | … |
|
||||
| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
|
||||
| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … |
|
||||
|
||||
该示例中只展示了数据在列式数据库中数据的排列方式。
|
||||
对于存储而言,列式数据库总是将同一列的数据存储在一起,不同列的数据也总是分开存储。
|
||||
这些示例只显示了数据的排列顺序。来自不同列的值被单独存储,来自同一列的数据被存储在一起。
|
||||
|
||||
常见的列式数据库有: Vertica、 Paraccel (Actian Matrix,Amazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise, Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。
|
||||
{: .灰色 }
|
||||
|
||||
不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例; 每种查询读取多少数据————行、列和字节;读取数据和写入数据之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。
|
||||
不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例;每种类型的查询(行、列和字节)读取多少数据;读取数据和更新之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。
|
||||
|
||||
系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有明显不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率?
|
||||
系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率?
|
||||
|
||||
## OLAP场景的关键特征 {#olapchang-jing-de-guan-jian-te-zheng}
|
||||
|
||||
- 大多数是读请求
|
||||
- 数据总是以相当大的批(\> 1000 rows)进行写入
|
||||
- 不修改已添加的数据
|
||||
- 每次查询都从数据库中读取大量的行,但是同时又仅需要少量的列
|
||||
- 绝大多数是读请求
|
||||
- 数据以相当大的批次(\> 1000行)更新,而不是单行更新;或者根本没有更新。
|
||||
- 已添加到数据库的数据不能修改。
|
||||
- 对于读取,从数据库中提取相当多的行,但只提取列的一小部分。
|
||||
- 宽表,即每个表包含着大量的列
|
||||
- 较少的查询(通常每台服务器每秒数百个查询或更少)
|
||||
- 查询相对较少(通常每台服务器每秒查询数百次或更少)
|
||||
- 对于简单查询,允许延迟大约50毫秒
|
||||
- 列中的数据相对较小: 数字和短字符串(例如,每个URL 60个字节)
|
||||
- 处理单个查询时需要高吞吐量(每个服务器每秒高达数十亿行)
|
||||
- 列中的数据相对较小:数字和短字符串(例如,每个URL 60个字节)
|
||||
- 处理单个查询时需要高吞吐量(每台服务器每秒可达数十亿行)
|
||||
- 事务不是必须的
|
||||
- 对数据一致性要求低
|
||||
- 每一个查询除了一个大表外都很小
|
||||
- 查询结果明显小于源数据,换句话说,数据被过滤或聚合后能够被盛放在单台服务器的内存中
|
||||
- 每个查询有一个大表。除了他意以外,其他的都很小。
|
||||
- 查询结果明显小于源数据。换句话说,数据经过过滤或聚合,因此结果适合于单个服务器的RAM中
|
||||
|
||||
很容易可以看出,OLAP场景与其他通常业务场景(例如,OLTP或K/V)有很大的不同, 因此想要使用OLTP或Key-Value数据库去高效的处理分析查询场景,并不是非常完美的适用方案。例如,使用OLAP数据库去处理分析请求通常要优于使用MongoDB或Redis去处理分析请求。
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
toc_priority: 8
|
||||
toc_title: "\u91C7\u7528\u8005"
|
||||
toc_priority: 5
|
||||
toc_title: "ClickHouse用户"
|
||||
---
|
||||
|
||||
# ClickHouse用户 {#clickhouse-adopters}
|
||||
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 2
|
||||
toc_title: ClickHouse的特性
|
||||
---
|
||||
|
||||
# ClickHouse的特性 {#clickhouse-de-te-xing}
|
||||
|
||||
## 真正的列式数据库管理系统 {#zhen-zheng-de-lie-shi-shu-ju-ku-guan-li-xi-tong}
|
||||
@ -12,9 +17,13 @@
|
||||
|
||||
在一些列式数据库管理系统中(例如:InfiniDB CE 和 MonetDB) 并没有使用数据压缩。但是, 若想达到比较优异的性能,数据压缩确实起到了至关重要的作用。
|
||||
|
||||
除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外,ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create/table.md#create-query-specialized-codecs),这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。
|
||||
|
||||
## 数据的磁盘存储 {#shu-ju-de-ci-pan-cun-chu}
|
||||
|
||||
许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作,这种方式会造成比实际更多的设备预算。ClickHouse被设计用于工作在传统磁盘上的系统,它提供每GB更低的存储成本,但如果有可以使用SSD和内存,它也会合理的利用这些资源。
|
||||
许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作,这种方式会造成比实际更多的设备预算。
|
||||
|
||||
ClickHouse被设计用于工作在传统磁盘上的系统,它提供每GB更低的存储成本,但如果可以使用SSD和内存,它也会合理的利用这些资源。
|
||||
|
||||
## 多核心并行处理 {#duo-he-xin-bing-xing-chu-li}
|
||||
|
||||
@ -27,9 +36,11 @@ ClickHouse会使用服务器上一切可用的资源,从而以最自然的方
|
||||
|
||||
## 支持SQL {#zhi-chi-sql}
|
||||
|
||||
ClickHouse支持基于SQL的声明式查询语言,该语言大部分情况下是与SQL标准兼容的。
|
||||
支持的查询包括 GROUP BY,ORDER BY,IN,JOIN以及非相关子查询。
|
||||
不支持窗口函数和相关子查询。
|
||||
ClickHouse支持一种[基于SQL的声明式查询语言](../sql-reference/index.md),它在许多情况下与[ANSI SQL标准](../sql-reference/ansi.md)相同。
|
||||
|
||||
支持的查询[GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md), [IN](../sql-reference/operators/in.md)以及非相关子查询。
|
||||
|
||||
相关(依赖性)子查询和窗口函数暂不受支持,但将来会被实现。
|
||||
|
||||
## 向量引擎 {#xiang-liang-yin-qing}
|
||||
|
||||
@ -55,12 +66,20 @@ ClickHouse提供各种各样在允许牺牲数据精度的情况下对查询进
|
||||
2. 基于数据的部分样本进行近似查询。这时,仅会从磁盘检索少部分比例的数据。
|
||||
3. 不使用全部的聚合条件,通过随机选择有限个数据聚合条件进行聚合。这在数据聚合条件满足某些分布条件下,在提供相当准确的聚合结果的同时降低了计算资源的使用。
|
||||
|
||||
## Adaptive Join Algorithm {#adaptive-join-algorithm}
|
||||
|
||||
ClickHouse支持自定义[JOIN](../sql-reference/statements/select/join.md)多个表,它更倾向于散列连接算法,如果有多个大表,则使用合并-连接算法
|
||||
|
||||
## 支持数据复制和数据完整性 {#zhi-chi-shu-ju-fu-zhi-he-shu-ju-wan-zheng-xing}
|
||||
|
||||
ClickHouse使用异步的多主复制技术。当数据被写入任何一个可用副本后,系统会在后台将数据分发给其他副本,以保证系统在不同副本上保持相同的数据。在大多数情况下ClickHouse能在故障后自动恢复,在一些少数的复杂情况下需要手动恢复。
|
||||
|
||||
更多信息,参见 [数据复制](../engines/table-engines/mergetree-family/replication.md)。
|
||||
|
||||
## 角色的访问控制 {#role-based-access-control}
|
||||
|
||||
ClickHouse使用SQL查询实现用户帐户管理,并允许[角色的访问控制](../operations/access-rights.md),类似于ANSI SQL标准和流行的关系数据库管理系统。
|
||||
|
||||
# 限制 {#clickhouseke-xian-zhi}
|
||||
|
||||
1. 没有完整的事务支持。
|
||||
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 4
|
||||
toc_title: ClickHouse历史
|
||||
---
|
||||
|
||||
# ClickHouse历史 {#clickhouseli-shi}
|
||||
|
||||
ClickHouse最初是为 [YandexMetrica](https://metrica.yandex.com/) [世界第二大Web分析平台](http://w3techs.com/technologies/overview/traffic_analysis/all) 而开发的。多年来一直作为该系统的核心组件被该系统持续使用着。目前为止,该系统在ClickHouse中有超过13万亿条记录,并且每天超过200多亿个事件被处理。它允许直接从原始数据中动态查询并生成报告。本文简要介绍了ClickHouse在其早期发展阶段的目标。
|
||||
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 3
|
||||
toc_title: ClickHouse性能
|
||||
---
|
||||
|
||||
# 性能 {#performance}
|
||||
|
||||
根据Yandex的内部测试结果,ClickHouse表现出了比同类可比较产品更优的性能。你可以在 [这里](https://clickhouse.tech/benchmark/dbms/) 查看具体的测试结果。
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user