mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge branch 'master' of github.com:ClickHouse/ClickHouse into fix
This commit is contained in:
commit
5c5d72379a
@ -2,8 +2,7 @@
|
||||
name: Documentation issue
|
||||
about: Report something incorrect or missing in documentation
|
||||
title: ''
|
||||
labels: documentation
|
||||
assignees: BayoNet
|
||||
labels: comp-documentation
|
||||
|
||||
---
|
||||
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -124,3 +124,5 @@ website/package-lock.json
|
||||
|
||||
# Toolchains
|
||||
/cmake/toolchain/*
|
||||
|
||||
*.iml
|
||||
|
8
.gitmodules
vendored
8
.gitmodules
vendored
@ -44,6 +44,7 @@
|
||||
[submodule "contrib/protobuf"]
|
||||
path = contrib/protobuf
|
||||
url = https://github.com/ClickHouse-Extras/protobuf.git
|
||||
branch = v3.13.0.1
|
||||
[submodule "contrib/boost"]
|
||||
path = contrib/boost
|
||||
url = https://github.com/ClickHouse-Extras/boost.git
|
||||
@ -107,6 +108,7 @@
|
||||
[submodule "contrib/grpc"]
|
||||
path = contrib/grpc
|
||||
url = https://github.com/ClickHouse-Extras/grpc.git
|
||||
branch = v1.33.2
|
||||
[submodule "contrib/aws"]
|
||||
path = contrib/aws
|
||||
url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git
|
||||
@ -196,7 +198,11 @@
|
||||
[submodule "contrib/rocksdb"]
|
||||
path = contrib/rocksdb
|
||||
url = https://github.com/facebook/rocksdb
|
||||
branch = v6.11.4
|
||||
branch = v6.14.5
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
[submodule "contrib/abseil-cpp"]
|
||||
path = contrib/abseil-cpp
|
||||
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
|
||||
branch = lts_2020_02_25
|
||||
|
@ -154,6 +154,7 @@ endif ()
|
||||
# Make sure the final executable has symbols exported
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
|
||||
|
||||
if (OS_LINUX)
|
||||
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
|
||||
if (OBJCOPY_PATH)
|
||||
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
|
||||
@ -166,6 +167,7 @@ if (OBJCOPY_PATH)
|
||||
else ()
|
||||
message(FATAL_ERROR "Cannot find objcopy.")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (OS_DARWIN)
|
||||
set(WHOLE_ARCHIVE -all_load)
|
||||
@ -475,9 +477,6 @@ find_contrib_lib(cityhash)
|
||||
|
||||
find_contrib_lib(farmhash)
|
||||
|
||||
set (USE_INTERNAL_BTRIE_LIBRARY ON CACHE INTERNAL "")
|
||||
find_contrib_lib(btrie)
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
include (cmake/find/gtest.cmake)
|
||||
endif ()
|
||||
|
@ -1,6 +1,6 @@
|
||||
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.tech)
|
||||
|
||||
ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.
|
||||
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real time.
|
||||
|
||||
## Useful Links
|
||||
|
||||
@ -14,9 +14,3 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
|
||||
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [The Second ClickHouse Meetup East (online)](https://www.eventbrite.com/e/the-second-clickhouse-meetup-east-tickets-126787955187) on October 31, 2020.
|
||||
* [ClickHouse for Enterprise Meetup (online in Russian)](https://arenadata-events.timepad.ru/event/1465249/) on November 10, 2020.
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept> // for std::logic_error
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
@ -3,7 +3,6 @@
|
||||
/// Macros for convenient usage of Poco logger.
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ostream.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Message.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
19
base/glibc-compatibility/musl/accept4.c
Normal file
19
base/glibc-compatibility/musl/accept4.c
Normal file
@ -0,0 +1,19 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <sys/socket.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int flg)
|
||||
{
|
||||
if (!flg) return accept(fd, addr, len);
|
||||
int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0);
|
||||
if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret;
|
||||
ret = accept(fd, addr, len);
|
||||
if (ret<0) return ret;
|
||||
if (flg & SOCK_CLOEXEC)
|
||||
__syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
|
||||
if (flg & SOCK_NONBLOCK)
|
||||
__syscall(SYS_fcntl, ret, F_SETFL, O_NONBLOCK);
|
||||
return ret;
|
||||
}
|
37
base/glibc-compatibility/musl/epoll.c
Normal file
37
base/glibc-compatibility/musl/epoll.c
Normal file
@ -0,0 +1,37 @@
|
||||
#include <sys/epoll.h>
|
||||
#include <signal.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int epoll_create(int size)
|
||||
{
|
||||
return epoll_create1(0);
|
||||
}
|
||||
|
||||
int epoll_create1(int flags)
|
||||
{
|
||||
int r = __syscall(SYS_epoll_create1, flags);
|
||||
#ifdef SYS_epoll_create
|
||||
if (r==-ENOSYS && !flags) r = __syscall(SYS_epoll_create, 1);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev)
|
||||
{
|
||||
return syscall(SYS_epoll_ctl, fd, op, fd2, ev);
|
||||
}
|
||||
|
||||
int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs)
|
||||
{
|
||||
int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8);
|
||||
#ifdef SYS_epoll_wait
|
||||
if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int epoll_wait(int fd, struct epoll_event *ev, int cnt, int to)
|
||||
{
|
||||
return epoll_pwait(fd, ev, cnt, to, 0);
|
||||
}
|
23
base/glibc-compatibility/musl/eventfd.c
Normal file
23
base/glibc-compatibility/musl/eventfd.c
Normal file
@ -0,0 +1,23 @@
|
||||
#include <sys/eventfd.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int eventfd(unsigned int count, int flags)
|
||||
{
|
||||
int r = __syscall(SYS_eventfd2, count, flags);
|
||||
#ifdef SYS_eventfd
|
||||
if (r==-ENOSYS && !flags) r = __syscall(SYS_eventfd, count);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
|
||||
int eventfd_read(int fd, eventfd_t *value)
|
||||
{
|
||||
return (sizeof(*value) == read(fd, value, sizeof(*value))) ? 0 : -1;
|
||||
}
|
||||
|
||||
int eventfd_write(int fd, eventfd_t value)
|
||||
{
|
||||
return (sizeof(value) == write(fd, &value, sizeof(value))) ? 0 : -1;
|
||||
}
|
45
base/glibc-compatibility/musl/getauxval.c
Normal file
45
base/glibc-compatibility/musl/getauxval.c
Normal file
@ -0,0 +1,45 @@
|
||||
#include <sys/auxv.h>
|
||||
#include <unistd.h> // __environ
|
||||
#include <errno.h>
|
||||
|
||||
// We don't have libc struct available here. Compute aux vector manually.
|
||||
static unsigned long * __auxv = NULL;
|
||||
static unsigned long __auxv_secure = 0;
|
||||
|
||||
static size_t __find_auxv(unsigned long type)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; __auxv[i]; i += 2)
|
||||
{
|
||||
if (__auxv[i] == type)
|
||||
return i + 1;
|
||||
}
|
||||
return (size_t) -1;
|
||||
}
|
||||
|
||||
__attribute__((constructor)) static void __auxv_init()
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; __environ[i]; i++);
|
||||
__auxv = (unsigned long *) (__environ + i + 1);
|
||||
|
||||
size_t secure_idx = __find_auxv(AT_SECURE);
|
||||
if (secure_idx != ((size_t) -1))
|
||||
__auxv_secure = __auxv[secure_idx];
|
||||
}
|
||||
|
||||
unsigned long getauxval(unsigned long type)
|
||||
{
|
||||
if (type == AT_SECURE)
|
||||
return __auxv_secure;
|
||||
|
||||
if (__auxv)
|
||||
{
|
||||
size_t index = __find_auxv(type);
|
||||
if (index != ((size_t) -1))
|
||||
return __auxv[index];
|
||||
}
|
||||
|
||||
errno = ENOENT;
|
||||
return 0;
|
||||
}
|
8
base/glibc-compatibility/musl/secure_getenv.c
Normal file
8
base/glibc-compatibility/musl/secure_getenv.c
Normal file
@ -0,0 +1,8 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <stdlib.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
char * secure_getenv(const char * name)
|
||||
{
|
||||
return getauxval(AT_SECURE) ? NULL : getenv(name);
|
||||
}
|
@ -13,3 +13,11 @@ long __syscall(syscall_arg_t, ...);
|
||||
|
||||
__attribute__((visibility("hidden")))
|
||||
void *__vdsosym(const char *, const char *);
|
||||
|
||||
#define syscall(...) __syscall_ret(__syscall(__VA_ARGS__))
|
||||
|
||||
#define socketcall(...) __syscall_ret(__socketcall(__VA_ARGS__))
|
||||
|
||||
#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f)
|
||||
|
||||
#define socketcall_cp socketcall
|
||||
|
@ -40,24 +40,10 @@ static int checkver(Verdef *def, int vsym, const char *vername, char *strings)
|
||||
#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
|
||||
#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
|
||||
|
||||
extern char** environ;
|
||||
static Ehdr *eh = NULL;
|
||||
void *__vdsosym(const char *vername, const char *name);
|
||||
// We don't have libc struct available here. Compute aux vector manually.
|
||||
__attribute__((constructor)) static void auxv_init()
|
||||
{
|
||||
size_t i, *auxv;
|
||||
for (i=0; environ[i]; i++);
|
||||
auxv = (void *)(environ+i+1);
|
||||
for (i=0; auxv[i] != AT_SYSINFO_EHDR; i+=2)
|
||||
if (!auxv[i]) return;
|
||||
if (!auxv[i+1]) return;
|
||||
eh = (void *)auxv[i+1];
|
||||
}
|
||||
|
||||
void *__vdsosym(const char *vername, const char *name)
|
||||
{
|
||||
size_t i;
|
||||
Ehdr * eh = (void *) getauxval(AT_SYSINFO_EHDR);
|
||||
if (!eh) return 0;
|
||||
Phdr *ph = (void *)((char *)eh + eh->e_phoff);
|
||||
size_t *dynv=0, base=-1;
|
||||
|
@ -1,44 +0,0 @@
|
||||
# - Try to find btrie headers and libraries.
|
||||
#
|
||||
# Usage of this module as follows:
|
||||
#
|
||||
# find_package(btrie)
|
||||
#
|
||||
# Variables used by this module, they can change the default behaviour and need
|
||||
# to be set before calling find_package:
|
||||
#
|
||||
# BTRIE_ROOT_DIR Set this variable to the root installation of
|
||||
# btrie if the module has problems finding
|
||||
# the proper installation path.
|
||||
#
|
||||
# Variables defined by this module:
|
||||
#
|
||||
# BTRIE_FOUND System has btrie libs/headers
|
||||
# BTRIE_LIBRARIES The btrie library/libraries
|
||||
# BTRIE_INCLUDE_DIR The location of btrie headers
|
||||
|
||||
find_path(BTRIE_ROOT_DIR
|
||||
NAMES include/btrie.h
|
||||
)
|
||||
|
||||
find_library(BTRIE_LIBRARIES
|
||||
NAMES btrie
|
||||
PATHS ${BTRIE_ROOT_DIR}/lib ${BTRIE_LIBRARIES_PATHS}
|
||||
)
|
||||
|
||||
find_path(BTRIE_INCLUDE_DIR
|
||||
NAMES btrie.h
|
||||
PATHS ${BTRIE_ROOT_DIR}/include ${BTRIE_INCLUDE_PATHS}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(btrie DEFAULT_MSG
|
||||
BTRIE_LIBRARIES
|
||||
BTRIE_INCLUDE_DIR
|
||||
)
|
||||
|
||||
mark_as_advanced(
|
||||
BTRIE_ROOT_DIR
|
||||
BTRIE_LIBRARIES
|
||||
BTRIE_INCLUDE_DIR
|
||||
)
|
@ -6,11 +6,9 @@ Defines the following variables:
|
||||
The include directories of the gRPC framework, including the include directories of the C++ wrapper.
|
||||
``gRPC_LIBRARIES``
|
||||
The libraries of the gRPC framework.
|
||||
``gRPC_UNSECURE_LIBRARIES``
|
||||
The libraries of the gRPC framework without SSL.
|
||||
``_gRPC_CPP_PLUGIN``
|
||||
``gRPC_CPP_PLUGIN``
|
||||
The plugin for generating gRPC client and server C++ stubs from `.proto` files
|
||||
``_gRPC_PYTHON_PLUGIN``
|
||||
``gRPC_PYTHON_PLUGIN``
|
||||
The plugin for generating gRPC client and server Python stubs from `.proto` files
|
||||
|
||||
The following :prop_tgt:`IMPORTED` targets are also defined:
|
||||
@ -19,6 +17,13 @@ The following :prop_tgt:`IMPORTED` targets are also defined:
|
||||
``grpc_cpp_plugin``
|
||||
``grpc_python_plugin``
|
||||
|
||||
Set the following variables to adjust the behaviour of this script:
|
||||
``gRPC_USE_UNSECURE_LIBRARIES``
|
||||
if set gRPC_LIBRARIES will be filled with the unsecure version of the libraries (i.e. without SSL)
|
||||
instead of the secure ones.
|
||||
``gRPC_DEBUG`
|
||||
if set the debug message will be printed.
|
||||
|
||||
Add custom commands to process ``.proto`` files to C++::
|
||||
protobuf_generate_grpc_cpp(<SRCS> <HDRS>
|
||||
[DESCRIPTORS <DESC>] [EXPORT_MACRO <MACRO>] [<ARGN>...])
|
||||
@ -242,6 +247,7 @@ find_library(gRPC_LIBRARY NAMES grpc)
|
||||
find_library(gRPC_CPP_LIBRARY NAMES grpc++)
|
||||
find_library(gRPC_UNSECURE_LIBRARY NAMES grpc_unsecure)
|
||||
find_library(gRPC_CPP_UNSECURE_LIBRARY NAMES grpc++_unsecure)
|
||||
find_library(gRPC_CARES_LIBRARY NAMES cares)
|
||||
|
||||
set(gRPC_LIBRARIES)
|
||||
if(gRPC_USE_UNSECURE_LIBRARIES)
|
||||
@ -259,6 +265,7 @@ else()
|
||||
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CARES_LIBRARY})
|
||||
|
||||
# Restore the original find library ordering.
|
||||
if(gRPC_USE_STATIC_LIBS)
|
||||
@ -278,11 +285,11 @@ else()
|
||||
endif()
|
||||
|
||||
# Get full path to plugin.
|
||||
find_program(_gRPC_CPP_PLUGIN
|
||||
find_program(gRPC_CPP_PLUGIN
|
||||
NAMES grpc_cpp_plugin
|
||||
DOC "The plugin for generating gRPC client and server C++ stubs from `.proto` files")
|
||||
|
||||
find_program(_gRPC_PYTHON_PLUGIN
|
||||
find_program(gRPC_PYTHON_PLUGIN
|
||||
NAMES grpc_python_plugin
|
||||
DOC "The plugin for generating gRPC client and server Python stubs from `.proto` files")
|
||||
|
||||
@ -317,14 +324,14 @@ endif()
|
||||
|
||||
#include(FindPackageHandleStandardArgs.cmake)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(gRPC
|
||||
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY
|
||||
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR _gRPC_CPP_PLUGIN _gRPC_PYTHON_PLUGIN)
|
||||
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY gRPC_CARES_LIBRARY
|
||||
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR gRPC_CPP_PLUGIN gRPC_PYTHON_PLUGIN)
|
||||
|
||||
if(gRPC_FOUND)
|
||||
if(gRPC_DEBUG)
|
||||
message(STATUS "gRPC: INCLUDE_DIRS=${gRPC_INCLUDE_DIRS}")
|
||||
message(STATUS "gRPC: LIBRARIES=${gRPC_LIBRARIES}")
|
||||
message(STATUS "gRPC: CPP_PLUGIN=${_gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "gRPC: PYTHON_PLUGIN=${_gRPC_PYTHON_PLUGIN}")
|
||||
message(STATUS "gRPC: CPP_PLUGIN=${gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "gRPC: PYTHON_PLUGIN=${gRPC_PYTHON_PLUGIN}")
|
||||
endif()
|
||||
endif()
|
||||
|
@ -1,9 +1,9 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
SET(VERSION_REVISION 54443)
|
||||
SET(VERSION_REVISION 54444)
|
||||
SET(VERSION_MAJOR 20)
|
||||
SET(VERSION_MINOR 12)
|
||||
SET(VERSION_MINOR 13)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH c53725fb1f846fda074347607ab582fbb9c6f7a1)
|
||||
SET(VERSION_DESCRIBE v20.12.1.1-prestable)
|
||||
SET(VERSION_STRING 20.12.1.1)
|
||||
SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471)
|
||||
SET(VERSION_DESCRIBE v20.13.1.1-prestable)
|
||||
SET(VERSION_STRING 20.13.1.1)
|
||||
# end of autochange
|
||||
|
@ -12,13 +12,7 @@ set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
|
||||
# Minimal supported SDK version
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.15")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.15")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mmacosx-version-min=10.15")
|
||||
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.15")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -mmacosx-version-min=10.15")
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
|
||||
|
||||
# Global libraries
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# Needed when using Apache Avro serialization format
|
||||
option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_AVRO)
|
||||
|
@ -37,8 +37,8 @@ if(NOT USE_INTERNAL_GRPC_LIBRARY)
|
||||
if(NOT gRPC_INCLUDE_DIRS OR NOT gRPC_LIBRARIES)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system gRPC library")
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
|
||||
elseif(NOT _gRPC_CPP_PLUGIN)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grcp_cpp_plugin")
|
||||
elseif(NOT gRPC_CPP_PLUGIN)
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grpc_cpp_plugin")
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
|
||||
else()
|
||||
set(EXTERNAL_GRPC_LIBRARY_FOUND 1)
|
||||
@ -53,8 +53,8 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
|
||||
else()
|
||||
set(gRPC_LIBRARIES grpc grpc++)
|
||||
endif()
|
||||
set(_gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
|
||||
set(_gRPC_PROTOC_EXECUTABLE $<TARGET_FILE:protobuf::protoc>)
|
||||
set(gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
|
||||
set(gRPC_PYTHON_PLUGIN $<TARGET_FILE:grpc_python_plugin>)
|
||||
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
|
||||
|
||||
@ -62,4 +62,4 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
|
||||
set(USE_GRPC 1)
|
||||
endif()
|
||||
|
||||
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${_gRPC_CPP_PLUGIN}")
|
||||
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${gRPC_CPP_PLUGIN}")
|
||||
|
@ -1,3 +1,5 @@
|
||||
# Needed when securely connecting to an external server, e.g.
|
||||
# clickhouse-client --host ... --secure
|
||||
option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
|
||||
|
||||
if(NOT ENABLE_SSL)
|
||||
|
@ -23,7 +23,7 @@ option (WEVERYTHING "Enable -Weverything option with some exceptions." ON)
|
||||
|
||||
# Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
|
||||
# Only in release build because debug has too large stack frames.
|
||||
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE))
|
||||
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang"))
|
||||
add_warning(frame-larger-than=32768)
|
||||
endif ()
|
||||
|
||||
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -66,10 +66,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
|
||||
add_subdirectory (libfarmhash)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_BTRIE_LIBRARY)
|
||||
add_subdirectory (libbtrie)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_ZLIB_LIBRARY)
|
||||
set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
|
||||
set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")
|
||||
|
1
contrib/abseil-cpp
vendored
Submodule
1
contrib/abseil-cpp
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 4f3b686f86c3ebaba7e4e926e62a79cb1c659a54
|
2
contrib/cctz
vendored
2
contrib/cctz
vendored
@ -1 +1 @@
|
||||
Subproject commit 7a2db4ece6e0f1b246173cbdb62711ae258ee841
|
||||
Subproject commit 260ba195ef6c489968bae8c88c62a67cdac5ff9d
|
2
contrib/grpc
vendored
2
contrib/grpc
vendored
@ -1 +1 @@
|
||||
Subproject commit a6570b863cf76c9699580ba51c7827d5bffaac43
|
||||
Subproject commit 7436366ceb341ba5c00ea29f1645e02a2b70bf93
|
@ -1,6 +1,7 @@
|
||||
set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
|
||||
set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
|
||||
|
||||
# Use re2 from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT RE2_INCLUDE_DIR)
|
||||
message(FATAL_ERROR " grpc: The location of the \"re2\" library is unknown")
|
||||
endif()
|
||||
@ -8,6 +9,7 @@ set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_RE2_INCLUDE_DIR "${RE2_INCLUDE_DIR}")
|
||||
set(_gRPC_RE2_LIBRARIES "${RE2_LIBRARY}")
|
||||
|
||||
# Use zlib from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT ZLIB_INCLUDE_DIRS)
|
||||
message(FATAL_ERROR " grpc: The location of the \"zlib\" library is unknown")
|
||||
endif()
|
||||
@ -15,6 +17,7 @@ set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_INCLUDE_DIRS}")
|
||||
set(_gRPC_ZLIB_LIBRARIES "${ZLIB_LIBRARIES}")
|
||||
|
||||
# Use protobuf from ClickHouse contrib, not from gRPC third_party.
|
||||
if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY)
|
||||
message(FATAL_ERROR " grpc: The location of the \"protobuf\" library is unknown")
|
||||
elseif (NOT Protobuf_PROTOC_EXECUTABLE)
|
||||
@ -29,21 +32,33 @@ set(_gRPC_PROTOBUF_PROTOC "protoc")
|
||||
set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}")
|
||||
set(_gRPC_PROTOBUF_PROTOC_LIBRARIES "${Protobuf_PROTOC_LIBRARY}")
|
||||
|
||||
# Use OpenSSL from ClickHouse contrib, not from gRPC third_party.
|
||||
set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR})
|
||||
set(_gRPC_SSL_LIBRARIES ${OPENSSL_LIBRARIES})
|
||||
|
||||
# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party.
|
||||
set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
|
||||
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt")
|
||||
message(FATAL_ERROR " grpc: submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
|
||||
|
||||
# Choose to build static or shared library for c-ares.
|
||||
if (MAKE_STATIC_LIBRARIES)
|
||||
set(CARES_STATIC ON CACHE BOOL "" FORCE)
|
||||
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
|
||||
else ()
|
||||
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
|
||||
set(CARES_SHARED ON CACHE BOOL "" FORCE)
|
||||
endif ()
|
||||
|
||||
# We don't want to build C# extensions.
|
||||
set(gRPC_BUILD_CSHARP_EXT OFF)
|
||||
|
||||
# We don't want to build abseil tests, so we temporarily switch BUILD_TESTING off.
|
||||
set(_gRPC_ORIG_BUILD_TESTING ${BUILD_TESTING})
|
||||
set(BUILD_TESTING OFF)
|
||||
|
||||
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
|
||||
|
||||
set(BUILD_TESTING ${_gRPC_ORIG_BUILD_TESTING})
|
||||
|
||||
# The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes,
|
||||
# so we need to redefine it back.
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
|
||||
|
@ -1,6 +0,0 @@
|
||||
add_library(btrie
|
||||
src/btrie.c
|
||||
include/btrie.h
|
||||
)
|
||||
|
||||
target_include_directories (btrie SYSTEM PUBLIC include)
|
@ -1,23 +0,0 @@
|
||||
Copyright (c) 2013, CobbLiu
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,160 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* In btrie, each leaf means one bit in ip tree.
|
||||
* Left means 0, and right means 1.
|
||||
*/
|
||||
|
||||
#define BTRIE_NULL (uintptr_t) -1
|
||||
|
||||
#if !defined(BTRIE_MAX_PAGES)
|
||||
/// 54 ip per page. 8 bytes memory per page when empty
|
||||
#define BTRIE_MAX_PAGES 1024 * 2048 /// 128m ips , ~16mb ram when empty
|
||||
// #define BTRIE_MAX_PAGES 1024 * 65535 /// 4g ips (whole ipv4), ~512mb ram when empty
|
||||
#endif
|
||||
|
||||
typedef struct btrie_node_s btrie_node_t;
|
||||
|
||||
struct btrie_node_s {
|
||||
btrie_node_t *right;
|
||||
btrie_node_t *left;
|
||||
btrie_node_t *parent;
|
||||
uintptr_t value;
|
||||
};
|
||||
|
||||
|
||||
typedef struct btrie_s {
|
||||
btrie_node_t *root;
|
||||
|
||||
btrie_node_t *free; /* free list of btrie */
|
||||
char *start;
|
||||
size_t size;
|
||||
|
||||
/*
|
||||
* memory pool.
|
||||
* memory management(esp free) will be so easy by using this facility.
|
||||
*/
|
||||
char *pools[BTRIE_MAX_PAGES];
|
||||
size_t len;
|
||||
} btrie_t;
|
||||
|
||||
|
||||
/**
|
||||
* Create an empty btrie
|
||||
*
|
||||
* @Return:
|
||||
* An ip radix_tree created.
|
||||
* NULL if creation failed.
|
||||
*/
|
||||
|
||||
btrie_t *btrie_create();
|
||||
|
||||
/**
|
||||
* Destroy the ip radix_tree
|
||||
*
|
||||
* @Return:
|
||||
* OK if deletion succeed.
|
||||
* ERROR if error occurs while deleting.
|
||||
*/
|
||||
int btrie_destroy(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Count the nodes in the radix tree.
|
||||
*/
|
||||
size_t btrie_count(btrie_t *tree);
|
||||
|
||||
/**
|
||||
* Return the allocated number of bytes.
|
||||
*/
|
||||
size_t btrie_allocated(btrie_t *tree);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv4 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value);
|
||||
|
||||
|
||||
/**
|
||||
* Delete an ipv4 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
|
||||
|
||||
|
||||
/**
|
||||
* Find an ipv4 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find(btrie_t *tree, uint32_t key);
|
||||
|
||||
|
||||
/**
|
||||
* Add an ipv6 into btrie
|
||||
*
|
||||
* @Args:
|
||||
* key: ip address
|
||||
* mask: key's mask
|
||||
* value: value of this IP, may be NULL.
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value);
|
||||
|
||||
/**
|
||||
* Delete an ipv6 from btrie
|
||||
*
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* OK for success.
|
||||
* ERROR for failure.
|
||||
*/
|
||||
int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
|
||||
|
||||
/**
|
||||
* Find an ipv6 from btrie
|
||||
*
|
||||
|
||||
* @Args:
|
||||
*
|
||||
* @Return:
|
||||
* Value if succeed.
|
||||
* NULL if failed.
|
||||
*/
|
||||
uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
@ -1,460 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <btrie.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
|
||||
static btrie_node_t *
|
||||
btrie_alloc(btrie_t *tree)
|
||||
{
|
||||
btrie_node_t *p;
|
||||
|
||||
if (tree->free) {
|
||||
p = tree->free;
|
||||
tree->free = tree->free->right;
|
||||
return p;
|
||||
}
|
||||
|
||||
if (tree->size < sizeof(btrie_node_t)) {
|
||||
tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
|
||||
if (tree->start == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->pools[tree->len++] = tree->start;
|
||||
tree->size = PAGE_SIZE;
|
||||
}
|
||||
|
||||
p = (btrie_node_t *) tree->start;
|
||||
|
||||
tree->start += sizeof(btrie_node_t);
|
||||
tree->size -= sizeof(btrie_node_t);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
btrie_t *
|
||||
btrie_create()
|
||||
{
|
||||
btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
|
||||
if (tree == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->free = NULL;
|
||||
tree->start = NULL;
|
||||
tree->size = 0;
|
||||
memset(tree->pools, 0, sizeof(btrie_t *) * BTRIE_MAX_PAGES);
|
||||
tree->len = 0;
|
||||
|
||||
tree->root = btrie_alloc(tree);
|
||||
if (tree->root == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tree->root->right = NULL;
|
||||
tree->root->left = NULL;
|
||||
tree->root->parent = NULL;
|
||||
tree->root->value = BTRIE_NULL;
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
static size_t
|
||||
subtree_weight(btrie_node_t *node)
|
||||
{
|
||||
size_t weight = 1;
|
||||
if (node->left) {
|
||||
weight += subtree_weight(node->left);
|
||||
}
|
||||
if (node->right) {
|
||||
weight += subtree_weight(node->right);
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_count(btrie_t *tree)
|
||||
{
|
||||
if (tree->root == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return subtree_weight(tree->root);
|
||||
}
|
||||
|
||||
size_t
|
||||
btrie_allocated(btrie_t *tree)
|
||||
{
|
||||
return tree->len * PAGE_SIZE;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
bit = 0x80000000;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask) {
|
||||
if (key & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
|
||||
{
|
||||
uint32_t bit;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask)) {
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find(btrie_t *tree, uint32_t key)
|
||||
{
|
||||
uint32_t bit;
|
||||
uintptr_t value;
|
||||
btrie_node_t *node;
|
||||
|
||||
bit = 0x80000000;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
|
||||
uintptr_t value)
|
||||
{
|
||||
uint8_t bit;
|
||||
unsigned int i;
|
||||
btrie_node_t *node, *next;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
|
||||
node = tree->root;
|
||||
next = tree->root;
|
||||
|
||||
while (bit & mask[i]) {
|
||||
if (key[i] & bit) {
|
||||
next = node->right;
|
||||
|
||||
} else {
|
||||
next = node->left;
|
||||
}
|
||||
|
||||
if (next == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (bit & mask[i]) {
|
||||
next = btrie_alloc(tree);
|
||||
if (next == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
next->right = NULL;
|
||||
next->left = NULL;
|
||||
next->parent = node;
|
||||
next->value = BTRIE_NULL;
|
||||
|
||||
if (key[i] & bit) {
|
||||
node->right = next;
|
||||
|
||||
} else {
|
||||
node->left = next;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
node = next;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
|
||||
{
|
||||
uint8_t bit;
|
||||
unsigned int i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
node = tree->root;
|
||||
|
||||
while (node && (bit & mask[i])) {
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
if (++i == 16) {
|
||||
break;
|
||||
}
|
||||
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (node->right || node->left) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
node->value = BTRIE_NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
for ( ;; ) {
|
||||
if (node->parent->right == node) {
|
||||
node->parent->right = NULL;
|
||||
|
||||
} else {
|
||||
node->parent->left = NULL;
|
||||
}
|
||||
|
||||
node->right = tree->free;
|
||||
tree->free = node;
|
||||
|
||||
node = node->parent;
|
||||
|
||||
if (node->right || node->left) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->value != BTRIE_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->parent == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uintptr_t
|
||||
btrie_find_a6(btrie_t *tree, const uint8_t *key)
|
||||
{
|
||||
uint8_t bit;
|
||||
uintptr_t value;
|
||||
unsigned int i;
|
||||
btrie_node_t *node;
|
||||
|
||||
i = 0;
|
||||
bit = 0x80;
|
||||
value = BTRIE_NULL;
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->value != BTRIE_NULL) {
|
||||
value = node->value;
|
||||
}
|
||||
|
||||
if (key[i] & bit) {
|
||||
node = node->right;
|
||||
|
||||
} else {
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
bit >>= 1;
|
||||
|
||||
if (bit == 0) {
|
||||
i++;
|
||||
bit = 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
btrie_destroy(btrie_t *tree)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
|
||||
/* free memory pools */
|
||||
for (i = 0; i < tree->len; i++) {
|
||||
free(tree->pools[i]);
|
||||
}
|
||||
|
||||
free(tree);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <btrie.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
btrie_t *it;
|
||||
int ret;
|
||||
|
||||
uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
|
||||
uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
|
||||
uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
|
||||
|
||||
it = btrie_create();
|
||||
if (it == NULL) {
|
||||
printf("create error!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
//add 101.45.69.50/16
|
||||
ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 1 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.69.50/16
|
||||
ret = btrie_insert(it, 170738994, 0xffff0000, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 2 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 10.45.79.50/16
|
||||
ret = btrie_insert(it, 170741554, 0xffff0000, 1);
|
||||
if (ret == 0) {
|
||||
printf("insert 3 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
//add 102.45.79.50/24
|
||||
ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 4 error.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170741554);
|
||||
if (ret == 1) {
|
||||
printf("test case 1 passed\n");
|
||||
} else {
|
||||
printf("test case 1 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 170786817);
|
||||
if (ret != 1) {
|
||||
printf("test case 2 passed\n");
|
||||
} else {
|
||||
printf("test case 2 error\n");
|
||||
}
|
||||
|
||||
ret = btrie_delete(it, 1714245426, 0xffffff00);
|
||||
if (ret != 0) {
|
||||
printf("delete 1 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find(it, 1714245426);
|
||||
if (ret != 1) {
|
||||
printf("test case 3 passed\n");
|
||||
} else {
|
||||
printf("test case 3 error\n");
|
||||
}
|
||||
|
||||
//add dead:beef::/32
|
||||
ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrie_find_a6(it, ip_v6);
|
||||
if (ret == 1) {
|
||||
printf("test case 4 passed\n");
|
||||
} else {
|
||||
printf("test case 4 error\n");
|
||||
}
|
||||
|
||||
// insert 4m ips
|
||||
for (size_t ip = 1; ip < 1024 * 1024 * 4; ++ip) {
|
||||
ret = btrie_insert(it, ip, 0xffffffff, 1);
|
||||
if (ret != 0) {
|
||||
printf("insert 5 error (%d) (%zu) .\n", ret, ip);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
btrie_destroy(it);
|
||||
printf("test failed\n");
|
||||
return 1;
|
||||
}
|
@ -22,7 +22,16 @@ set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-st
|
||||
set(LIBUNWIND_ASM_SOURCES
|
||||
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S
|
||||
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S)
|
||||
|
||||
# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1]
|
||||
# Workaround these two issues by compiling as C.
|
||||
#
|
||||
# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771
|
||||
if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
|
||||
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
|
||||
else()
|
||||
enable_language(ASM)
|
||||
endif()
|
||||
|
||||
set(LIBUNWIND_SOURCES
|
||||
${LIBUNWIND_CXX_SOURCES}
|
||||
|
2
contrib/protobuf
vendored
2
contrib/protobuf
vendored
@ -1 +1 @@
|
||||
Subproject commit 445d1ae73a450b1e94622e7040989aa2048402e3
|
||||
Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee
|
2
contrib/rocksdb
vendored
2
contrib/rocksdb
vendored
@ -1 +1 @@
|
||||
Subproject commit 963314ffd681596ef2738a95249fe4c1163ef87a
|
||||
Subproject commit 35d8e36ef1b8e3e0759ca81215f855226a0a54bd
|
@ -347,8 +347,9 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/builder.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/c.cc
|
||||
@ -394,6 +395,8 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/output_validator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/repair.cc
|
||||
@ -451,12 +454,12 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/stats_dump_scheduler.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/cf_options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/configurable.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/db_options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
|
||||
@ -507,6 +510,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/table_factory.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
|
||||
@ -515,6 +519,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc
|
||||
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (20.12.1.1) unstable; urgency=low
|
||||
clickhouse (20.13.1.1) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 05 Nov 2020 21:52:47 +0300
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 23 Nov 2020 10:29:24 +0300
|
||||
|
96
debian/clickhouse-server.init
vendored
96
debian/clickhouse-server.init
vendored
@ -67,26 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
|
||||
fi
|
||||
|
||||
|
||||
is_running()
|
||||
{
|
||||
pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
|
||||
}
|
||||
|
||||
|
||||
wait_for_done()
|
||||
{
|
||||
timeout=$1
|
||||
attempts=0
|
||||
while is_running; do
|
||||
attempts=$(($attempts + 1))
|
||||
if [ -n "$timeout" ] && [ $attempts -gt $timeout ]; then
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
die()
|
||||
{
|
||||
echo $1 >&2
|
||||
@ -105,49 +85,7 @@ check_config()
|
||||
|
||||
initdb()
|
||||
{
|
||||
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
|
||||
if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then
|
||||
die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}";
|
||||
fi
|
||||
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
else
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR
|
||||
fi
|
||||
|
||||
if ! getent passwd ${CLICKHOUSE_USER} >/dev/null; then
|
||||
echo "Can't chown to non-existing user ${CLICKHOUSE_USER}"
|
||||
return
|
||||
fi
|
||||
if ! getent group ${CLICKHOUSE_GROUP} >/dev/null; then
|
||||
echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}"
|
||||
return
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then
|
||||
echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]"
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then
|
||||
if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") = "/" ]; then
|
||||
echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown."
|
||||
else
|
||||
if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then
|
||||
echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
|
||||
echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then
|
||||
echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/*
|
||||
echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]"
|
||||
chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
@ -171,17 +109,7 @@ restart()
|
||||
|
||||
forcestop()
|
||||
{
|
||||
local EXIT_STATUS
|
||||
EXIT_STATUS=0
|
||||
|
||||
echo -n "Stop forcefully $PROGRAM service: "
|
||||
|
||||
kill -KILL $(cat "$CLICKHOUSE_PIDFILE")
|
||||
|
||||
wait_for_done
|
||||
|
||||
echo "DONE"
|
||||
return $EXIT_STATUS
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
@ -261,16 +189,16 @@ main()
|
||||
service_or_func restart
|
||||
;;
|
||||
condstart)
|
||||
is_running || service_or_func start
|
||||
service_or_func start
|
||||
;;
|
||||
condstop)
|
||||
is_running && service_or_func stop
|
||||
service_or_func stop
|
||||
;;
|
||||
condrestart)
|
||||
is_running && service_or_func restart
|
||||
service_or_func restart
|
||||
;;
|
||||
condreload)
|
||||
is_running && service_or_func restart
|
||||
service_or_func restart
|
||||
;;
|
||||
initdb)
|
||||
initdb
|
||||
@ -293,17 +221,7 @@ main()
|
||||
|
||||
status()
|
||||
{
|
||||
if is_running; then
|
||||
echo "$PROGRAM service is running"
|
||||
exit 0
|
||||
else
|
||||
if is_cron_disabled; then
|
||||
echo "$PROGRAM service is stopped";
|
||||
else
|
||||
echo "$PROGRAM: process unexpectedly terminated"
|
||||
fi
|
||||
exit 3
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -56,6 +56,7 @@ RUN apt-get update \
|
||||
libprotoc-dev \
|
||||
libgrpc++-dev \
|
||||
protobuf-compiler-grpc \
|
||||
libc-ares-dev \
|
||||
rapidjson-dev \
|
||||
libsnappy-dev \
|
||||
libparquet-dev \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
RUN apt-get update \
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.12.1.*
|
||||
ARG version=20.13.1.*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build
|
||||
ENV OUTPUT_DIR=/output
|
||||
ENV IGNORE='.*contrib.*'
|
||||
|
||||
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \
|
||||
RUN apt-get update && apt-get install cmake --yes --no-install-recommends
|
||||
|
||||
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \
|
||||
dpkg -i /package_folder/clickhouse-common-static_*.deb; \
|
||||
llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
|
||||
llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
|
||||
llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
|
||||
llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
|
||||
genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR}
|
||||
|
@ -15,6 +15,9 @@ stage=${stage:-}
|
||||
# empty parameter.
|
||||
read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}"
|
||||
|
||||
# Run only matching tests.
|
||||
FASTTEST_FOCUS=${FASTTEST_FOCUS:-""}
|
||||
|
||||
FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}")
|
||||
FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}")
|
||||
FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}")
|
||||
@ -127,7 +130,31 @@ function clone_submodules
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
|
||||
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring contrib/miniselect contrib/xz)
|
||||
SUBMODULES_TO_UPDATE=(
|
||||
contrib/boost
|
||||
contrib/zlib-ng
|
||||
contrib/libxml2
|
||||
contrib/poco
|
||||
contrib/libunwind
|
||||
contrib/ryu
|
||||
contrib/fmtlib
|
||||
contrib/base64
|
||||
contrib/cctz
|
||||
contrib/libcpuid
|
||||
contrib/double-conversion
|
||||
contrib/libcxx
|
||||
contrib/libcxxabi
|
||||
contrib/libc-headers
|
||||
contrib/lz4
|
||||
contrib/zstd
|
||||
contrib/fastops
|
||||
contrib/rapidjson
|
||||
contrib/re2
|
||||
contrib/sparsehash-c11
|
||||
contrib/croaring
|
||||
contrib/miniselect
|
||||
contrib/xz
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
|
||||
@ -285,13 +312,14 @@ TESTS_TO_SKIP=(
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
01561_mann_whitney_scipy
|
||||
|
||||
01545_system_errors
|
||||
# Checks system.errors
|
||||
01563_distributed_query_finish
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
@ -0,0 +1,10 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
mysql1:
|
||||
image: mysql:5.7
|
||||
restart: 'no'
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
@ -2,7 +2,7 @@ version: '2.3'
|
||||
services:
|
||||
mysql8_0:
|
||||
image: mysql:8.0
|
||||
restart: always
|
||||
restart: 'no'
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
@ -25,12 +25,13 @@ RUN apt-get update \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
rsync \
|
||||
tree \
|
||||
tzdata \
|
||||
vim \
|
||||
wget \
|
||||
&& pip3 --no-cache-dir install clickhouse_driver scipy \
|
||||
&& pip3 --no-cache-dir install 'git+https://github.com/mymarilyn/clickhouse-driver.git' scipy \
|
||||
&& apt-get purge --yes python3-dev g++ \
|
||||
&& apt-get autoremove --yes \
|
||||
&& apt-get clean \
|
||||
|
@ -14,10 +14,12 @@ import string
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import logging
|
||||
import xml.etree.ElementTree as et
|
||||
from threading import Thread
|
||||
from scipy import stats
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
|
||||
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
@ -46,6 +48,8 @@ parser.add_argument('--profile-seconds', type=int, default=0, help='For how many
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
|
||||
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
|
||||
args = parser.parse_args()
|
||||
|
||||
reportStageEnd('start')
|
||||
@ -139,16 +143,18 @@ reportStageEnd('before-connect')
|
||||
|
||||
# Open connections
|
||||
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
|
||||
all_connections = [clickhouse_driver.Client(**server) for server in servers]
|
||||
# Force settings_is_important to fail queries on unknown settings.
|
||||
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
|
||||
|
||||
for i, s in enumerate(servers):
|
||||
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
|
||||
|
||||
reportStageEnd('connect')
|
||||
|
||||
# Run drop queries, ignoring errors. Do this before all other activity, because
|
||||
# clickhouse_driver disconnects on error (this is not configurable), and the new
|
||||
# connection loses the changes in settings.
|
||||
if not args.use_existing_tables:
|
||||
# Run drop queries, ignoring errors. Do this before all other activity,
|
||||
# because clickhouse_driver disconnects on error (this is not configurable),
|
||||
# and the new connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
@ -162,21 +168,12 @@ for conn_index, c in enumerate(all_connections):
|
||||
reportStageEnd('drop-1')
|
||||
|
||||
# Apply settings.
|
||||
# If there are errors, report them and continue -- maybe a new test uses a setting
|
||||
# that is not in master, but the queries can still run. If we have multiple
|
||||
# settings and one of them throws an exception, all previous settings for this
|
||||
# connection will be reset, because the driver reconnects on error (not
|
||||
# configurable). So the end result is uncertain, but hopefully we'll be able to
|
||||
# run at least some queries.
|
||||
settings = root.findall('settings/*')
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
try:
|
||||
q = f"set {s.tag} = '{s.text}'"
|
||||
c.execute(q)
|
||||
print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
|
||||
# (https://github.com/mymarilyn/clickhouse-driver/pull/142)
|
||||
c.settings[s.tag] = s.text
|
||||
|
||||
reportStageEnd('settings')
|
||||
|
||||
@ -194,16 +191,18 @@ for t in tables:
|
||||
|
||||
reportStageEnd('preconditions')
|
||||
|
||||
# Run create and fill queries. We will run them simultaneously for both servers,
|
||||
# to save time.
|
||||
# The weird search is to keep the relative order of elements, which matters, and
|
||||
# etree doesn't support the appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')]
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
# keep the relative order of elements, and etree doesn't support the
|
||||
# appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*')
|
||||
if q.tag in ('create_query', 'fill_query')]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on errors,
|
||||
# and temporary tables are destroyed. We want to be able to continue after some
|
||||
# errors.
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on
|
||||
# errors, and temporary tables are destroyed. We want to be able to continue
|
||||
# after some errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
@ -215,7 +214,8 @@ def do_create(connection, index, queries):
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
|
||||
threads = [Thread(target = do_create, args = (connection, index, create_queries))
|
||||
threads = [
|
||||
Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
|
||||
for t in threads:
|
||||
@ -403,6 +403,7 @@ print(f'profile-total\t{profile_total_seconds}')
|
||||
reportStageEnd('run')
|
||||
|
||||
# Run drop queries
|
||||
if not args.keep_created_tables and not args.use_existing_tables:
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
|
@ -10,6 +10,11 @@ RUN apt-get update --yes \
|
||||
gpg-agent \
|
||||
debsig-verify \
|
||||
strace \
|
||||
protobuf-compiler \
|
||||
protobuf-compiler-grpc \
|
||||
libprotoc-dev \
|
||||
libgrpc++-dev \
|
||||
libc-ares-dev \
|
||||
--yes --no-install-recommends
|
||||
|
||||
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
|
||||
@ -33,7 +38,8 @@ RUN set -x \
|
||||
&& dpkg -i "${PKG_VERSION}.deb"
|
||||
|
||||
CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
|
||||
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF && ninja re2_st \
|
||||
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \
|
||||
&& ninja re2_st clickhouse_grpc_protos \
|
||||
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
|
||||
plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \
|
||||
plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log
|
||||
|
@ -1,12 +1,12 @@
|
||||
# docker build -t yandex/clickhouse-stateful-test-with-coverage .
|
||||
FROM yandex/clickhouse-stateless-test
|
||||
FROM yandex/clickhouse-stateless-test-with-coverage
|
||||
|
||||
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
python3-requests
|
||||
python3-requests procps psmisc
|
||||
|
||||
COPY s3downloader /s3downloader
|
||||
COPY run.sh /run.sh
|
||||
|
@ -1,40 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
kill_clickhouse () {
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
pkill -f "clickhouse-server" 2>/dev/null
|
||||
|
||||
for _ in {1..10}
|
||||
|
||||
for _ in {1..120}
|
||||
do
|
||||
if ! kill -0 "$(pgrep -u clickhouse)"; then
|
||||
echo "No clickhouse process"
|
||||
break
|
||||
else
|
||||
echo "Process $(pgrep -u clickhouse) still alive"
|
||||
sleep 10
|
||||
fi
|
||||
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
|
||||
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if pkill -0 -f "clickhouse-server"
|
||||
then
|
||||
pstree -apgT
|
||||
jobs
|
||||
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_clickhouse () {
|
||||
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
|
||||
}
|
||||
|
||||
wait_llvm_profdata () {
|
||||
while kill -0 "$(pgrep llvm-profdata-10)"
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive"
|
||||
sleep 3
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
merge_client_files_in_background () {
|
||||
client_files=$(ls /client_*profraw 2>/dev/null)
|
||||
if [ -n "$client_files" ]
|
||||
then
|
||||
llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw"
|
||||
rm "$client_files"
|
||||
fi
|
||||
}
|
||||
|
||||
chmod 777 /
|
||||
|
||||
@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
function start()
|
||||
{
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
timeout 120 service clickhouse-server start
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
start
|
||||
start_clickhouse
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
if ! /s3downloader --dataset-names $DATASETS; then
|
||||
@ -81,25 +66,20 @@ fi
|
||||
|
||||
chmod 777 -R /var/lib/clickhouse
|
||||
|
||||
while /bin/true; do
|
||||
merge_client_files_in_background
|
||||
sleep 2
|
||||
done &
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DATABASE test"
|
||||
|
||||
kill_clickhouse
|
||||
start_clickhouse
|
||||
|
||||
sleep 10
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
@ -109,15 +89,10 @@ fi
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
kill_clickhouse
|
||||
|
||||
wait_llvm_profdata
|
||||
|
||||
sleep 3
|
||||
|
||||
wait_llvm_profdata # 100% merged all parts
|
||||
|
||||
|
||||
cp /*.profraw /profraw ||:
|
||||
|
@ -1,4 +1,4 @@
|
||||
# docker build -t yandex/clickhouse-stateless-with-coverage-test .
|
||||
# docker build -t yandex/clickhouse-stateless-test-with-coverage .
|
||||
# TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs)
|
||||
FROM yandex/clickhouse-test-base
|
||||
|
||||
@ -28,7 +28,9 @@ RUN apt-get update -y \
|
||||
lsof \
|
||||
unixodbc \
|
||||
wget \
|
||||
qemu-user-static
|
||||
qemu-user-static \
|
||||
procps \
|
||||
psmisc
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
|
||||
|
@ -2,27 +2,41 @@
|
||||
|
||||
kill_clickhouse () {
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
pkill -f "clickhouse-server" 2>/dev/null
|
||||
|
||||
for _ in {1..10}
|
||||
|
||||
for _ in {1..120}
|
||||
do
|
||||
if ! kill -0 "$(pgrep -u clickhouse)"; then
|
||||
echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S'
|
||||
break
|
||||
else
|
||||
echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 10
|
||||
fi
|
||||
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
|
||||
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "Will try to send second kill signal for sure"
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
sleep 5
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
if pkill -0 -f "clickhouse-server"
|
||||
then
|
||||
pstree -apgT
|
||||
jobs
|
||||
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_clickhouse () {
|
||||
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
chmod 777 /
|
||||
@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/
|
||||
|
||||
start_clickhouse
|
||||
|
||||
sleep 10
|
||||
|
||||
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
@ -13,9 +13,9 @@ cmake .. \
|
||||
-DENABLE_CLICKHOUSE_SERVER=ON \
|
||||
-DENABLE_CLICKHOUSE_CLIENT=ON \
|
||||
-DUSE_STATIC_LIBRARIES=OFF \
|
||||
-DCLICKHOUSE_SPLIT_BINARY=ON \
|
||||
-DSPLIT_SHARED_LIBRARIES=ON \
|
||||
-DENABLE_LIBRARIES=OFF \
|
||||
-DUSE_UNWIND=ON \
|
||||
-DENABLE_UTILS=OFF \
|
||||
-DENABLE_TESTS=OFF
|
||||
```
|
||||
|
@ -17,7 +17,6 @@ toc_title: Third-Party Libraries Used
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -273,13 +273,15 @@ SELECT
|
||||
sum(Duration) AS Duration
|
||||
FROM UAct
|
||||
GROUP BY UserID
|
||||
```
|
||||
|
||||
``` text
|
||||
┌──────────────UserID─┬─PageViews─┬─Duration─┐
|
||||
│ 4324182021466249494 │ 6 │ 185 │
|
||||
└─────────────────────┴───────────┴──────────┘
|
||||
```
|
||||
|
||||
``` sqk
|
||||
``` sql
|
||||
select count() FROM UAct
|
||||
```
|
||||
|
||||
|
@ -53,6 +53,42 @@ Example of setting the addresses of the ZooKeeper cluster:
|
||||
</zookeeper>
|
||||
```
|
||||
|
||||
ClickHouse also supports to store replicas meta information in the auxiliary ZooKeeper cluster by providing ZooKeeper cluster name and path as engine arguments.
|
||||
In other word, it supports to store the metadata of differnt tables in different ZooKeeper clusters.
|
||||
|
||||
Example of setting the addresses of the auxiliary ZooKeeper cluster:
|
||||
|
||||
``` xml
|
||||
<auxiliary_zookeepers>
|
||||
<zookeeper2>
|
||||
<node index="1">
|
||||
<host>example_2_1</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node index="2">
|
||||
<host>example_2_2</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node index="3">
|
||||
<host>example_2_3</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
</zookeeper2>
|
||||
<zookeeper3>
|
||||
<node index="1">
|
||||
<host>example_3_1</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
</zookeeper3>
|
||||
</auxiliary_zookeepers>
|
||||
```
|
||||
|
||||
To store table datameta in a auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with
|
||||
ReplicatedMergeTree engine as follow:
|
||||
|
||||
```
|
||||
CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('zookeeper_name_configured_in_auxiliary_zookeepers:path', 'replica_name') ...
|
||||
```
|
||||
You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table).
|
||||
|
||||
If ZooKeeper isn’t set in the config file, you can’t create replicated tables, and any existing replicated tables will be read-only.
|
||||
@ -152,7 +188,7 @@ You can specify default arguments for `Replicated` table engine in the server co
|
||||
|
||||
```xml
|
||||
<default_replica_path>/clickhouse/tables/{shard}/{database}/{table}</default_replica_path>
|
||||
<default_replica_name>{replica}</default_replica_path>
|
||||
<default_replica_name>{replica}</default_replica_name>
|
||||
```
|
||||
|
||||
In this case, you can omit arguments when creating tables:
|
||||
|
@ -11,7 +11,7 @@ By going through this tutorial, you’ll learn how to set up a simple ClickHouse
|
||||
|
||||
## Single Node Setup {#single-node-setup}
|
||||
|
||||
To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
|
||||
To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do not support them.
|
||||
|
||||
For example, you have chosen `deb` packages and executed:
|
||||
|
||||
|
@ -5,7 +5,7 @@ toc_title: Overview
|
||||
|
||||
# What Is ClickHouse? {#what-is-clickhouse}
|
||||
|
||||
ClickHouse is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).
|
||||
ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).
|
||||
|
||||
In a “normal” row-oriented DBMS, data is stored in this order:
|
||||
|
||||
|
@ -111,7 +111,7 @@ toc_title: Adopters
|
||||
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
|
||||
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
|
||||
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
|
||||
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
|
||||
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
|
||||
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
|
||||
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
|
||||
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
|
||||
|
@ -44,11 +44,10 @@ stages, such as query planning or distributed queries.
|
||||
|
||||
To be useful, the tracing information has to be exported to a monitoring system
|
||||
that supports OpenTelemetry, such as Jaeger or Prometheus. ClickHouse avoids
|
||||
a dependency on a particular monitoring system, instead only
|
||||
providing the tracing data conforming to the standard. A natural way to do so
|
||||
in an SQL RDBMS is a system table. OpenTelemetry trace span information
|
||||
a dependency on a particular monitoring system, instead only providing the
|
||||
tracing data through a system table. OpenTelemetry trace span information
|
||||
[required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span)
|
||||
is stored in the system table called `system.opentelemetry_span_log`.
|
||||
is stored in the `system.opentelemetry_span_log` table.
|
||||
|
||||
The table must be enabled in the server configuration, see the `opentelemetry_span_log`
|
||||
element in the default config file `config.xml`. It is enabled by default.
|
||||
@ -67,3 +66,31 @@ The table has the following columns:
|
||||
|
||||
The tags or attributes are saved as two parallel arrays, containing the keys
|
||||
and values. Use `ARRAY JOIN` to work with them.
|
||||
|
||||
## Integration with monitoring systems
|
||||
|
||||
At the moment, there is no ready tool that can export the tracing data from
|
||||
ClickHouse to a monitoring system.
|
||||
|
||||
For testing, it is possible to setup the export using a materialized view with the URL engine over the `system.opentelemetry_span_log` table, which would push the arriving log data to an HTTP endpoint of a trace collector. For example, to push the minimal span data to a Zipkin instance running at `http://localhost:9411`, in Zipkin v2 JSON format:
|
||||
|
||||
```sql
|
||||
CREATE MATERIALIZED VIEW default.zipkin_spans
|
||||
ENGINE = URL('http://127.0.0.1:9411/api/v2/spans', 'JSONEachRow')
|
||||
SETTINGS output_format_json_named_tuples_as_objects = 1,
|
||||
output_format_json_array_of_rows = 1 AS
|
||||
SELECT
|
||||
lower(hex(reinterpretAsFixedString(trace_id))) AS traceId,
|
||||
lower(hex(parent_span_id)) AS parentId,
|
||||
lower(hex(span_id)) AS id,
|
||||
operation_name AS name,
|
||||
start_time_us AS timestamp,
|
||||
finish_time_us - start_time_us AS duration,
|
||||
cast(tuple('clickhouse'), 'Tuple(serviceName text)') AS localEndpoint,
|
||||
cast(tuple(
|
||||
attribute.values[indexOf(attribute.names, 'db.statement')]),
|
||||
'Tuple("db.statement" text)') AS tags
|
||||
FROM system.opentelemetry_span_log
|
||||
```
|
||||
|
||||
In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive.
|
||||
|
@ -2317,4 +2317,10 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
|
||||
|
||||
Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format.
|
||||
|
||||
Default value: `\N`.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
70
docs/en/operations/system-tables/replicated_fetches.md
Normal file
70
docs/en/operations/system-tables/replicated_fetches.md
Normal file
@ -0,0 +1,70 @@
|
||||
# system.replicated_fetches {#system_tables-replicated_fetches}
|
||||
|
||||
Contains information about currently running background fetches.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database.
|
||||
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table.
|
||||
|
||||
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started.
|
||||
|
||||
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1.
|
||||
|
||||
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches.
|
||||
|
||||
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches.
|
||||
|
||||
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition.
|
||||
|
||||
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part.
|
||||
|
||||
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part.
|
||||
|
||||
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica.
|
||||
|
||||
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica.
|
||||
|
||||
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica.
|
||||
|
||||
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme.
|
||||
|
||||
- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier.
|
||||
|
||||
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression.
|
||||
|
||||
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
database: default
|
||||
table: t
|
||||
elapsed: 7.243039876
|
||||
progress: 0.41832135995612835
|
||||
result_part_name: all_0_0_0
|
||||
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
|
||||
partition_id: all
|
||||
total_size_bytes_compressed: 1052783726
|
||||
bytes_read_compressed: 440401920
|
||||
source_replica_path: /clickhouse/test/t/replicas/1
|
||||
source_replica_hostname: node1
|
||||
source_replica_port: 9009
|
||||
interserver_scheme: http
|
||||
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
|
||||
to_detached: 0
|
||||
thread_id: 54
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->
|
@ -1,42 +1,42 @@
|
||||
# ClickHouse obfuscator
|
||||
|
||||
Simple tool for table data obfuscation.
|
||||
A simple tool for table data obfuscation.
|
||||
|
||||
It reads input table and produces output table, that retain some properties of input, but contains different data.
|
||||
It allows to publish almost real production data for usage in benchmarks.
|
||||
It reads an input table and produces an output table, that retains some properties of input, but contains different data.
|
||||
It allows publishing almost real production data for usage in benchmarks.
|
||||
|
||||
It is designed to retain the following properties of data:
|
||||
- cardinalities of values (number of distinct values) for every column and for every tuple of columns;
|
||||
- conditional cardinalities: number of distinct values of one column under condition on value of another column;
|
||||
- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats;
|
||||
- probability distributions of length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, NULLs;
|
||||
- cardinalities of values (number of distinct values) for every column and every tuple of columns;
|
||||
- conditional cardinalities: number of distinct values of one column under the condition on the value of another column;
|
||||
- probability distributions of the absolute value of integers; the sign of signed integers; exponent and sign for floats;
|
||||
- probability distributions of the length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, `NULL`s;
|
||||
|
||||
- data compression ratio when compressed with LZ77 and entropy family of codecs;
|
||||
- continuity (magnitude of difference) of time values across table; continuity of floating point values.
|
||||
- date component of DateTime values;
|
||||
- continuity (magnitude of difference) of time values across the table; continuity of floating-point values;
|
||||
- date component of `DateTime` values;
|
||||
|
||||
- UTF-8 validity of string values;
|
||||
- string values continue to look somewhat natural.
|
||||
- string values look natural.
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregation and sorting will work at almost the same speed
|
||||
reading data, filtering, aggregatio, and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed.
|
||||
Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret.
|
||||
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.
|
||||
Some transformations are one to one and could be reversed, so you need to have a large seed and keep it in secret.
|
||||
|
||||
It use some cryptographic primitives to transform data, but from the cryptographic point of view,
|
||||
It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it.
|
||||
It uses some cryptographic primitives to transform data but from the cryptographic point of view, it doesn't do it properly, that is why you should not consider the result as secure unless you have another reason. The result may retain some data you don't want to publish.
|
||||
|
||||
It may retain some data you don't want to publish.
|
||||
|
||||
It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data.
|
||||
For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value.
|
||||
So, the user will be able to count exact ratio of mobile traffic.
|
||||
It always leaves 0, 1, -1 numbers, dates, lengths of arrays, and null flags exactly as in source data.
|
||||
For example, you have a column `IsMobile` in your table with values 0 and 1. In transformed data, it will have the same value.
|
||||
|
||||
Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address.
|
||||
If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others,
|
||||
It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them.
|
||||
And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters.
|
||||
So, the user will be able to count the exact ratio of mobile traffic.
|
||||
|
||||
This tool works fine only with reasonable amount of data (at least 1000s of rows).
|
||||
Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address.
|
||||
If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them.
|
||||
You should look at the working algorithm of this tool works, and fine-tune its command line parameters.
|
||||
|
||||
This tool works fine only with an average amount of data (at least 1000s of rows).
|
||||
|
@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
|
||||
└────────┘
|
||||
```
|
||||
|
||||
The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`.
|
||||
|
||||
Now you can use the `groupArray` function to create an array from the `y` column:
|
||||
|
||||
``` sql
|
||||
|
@ -4,4 +4,59 @@ toc_priority: 5
|
||||
|
||||
# avg {#agg_function-avg}
|
||||
|
||||
Calculates the average. Only works for numbers. The result is always Float64.
|
||||
Calculates the arithmetic mean.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
avgWeighted(x)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `x` — Values.
|
||||
|
||||
`x` must be
|
||||
[Integer](../../../sql-reference/data-types/int-uint.md),
|
||||
[floating-point](../../../sql-reference/data-types/float.md), or
|
||||
[Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `NaN` if the supplied parameter is empty.
|
||||
- Mean otherwise.
|
||||
|
||||
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avg(x) FROM values('x Int8', 0, 1, 2, 3, 4, 5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avg(x)─┐
|
||||
│ 2.5 │
|
||||
└────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE table test (t UInt8) ENGINE = Memory;
|
||||
SELECT avg(t) FROM test
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avg(x)─┐
|
||||
│ nan │
|
||||
└────────┘
|
||||
```
|
||||
|
@ -14,17 +14,21 @@ avgWeighted(x, weight)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
|
||||
- `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
|
||||
- `x` — Values.
|
||||
- `weight` — Weights of the values.
|
||||
|
||||
Type of `x` and `weight` must be the same.
|
||||
`x` and `weight` must both be
|
||||
[Integer](../../../sql-reference/data-types/int-uint.md),
|
||||
[floating-point](../../../sql-reference/data-types/float.md), or
|
||||
[Decimal](../../../sql-reference/data-types/decimal.md),
|
||||
but may have different types.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Weighted mean.
|
||||
- `NaN`. If all the weights are equal to 0.
|
||||
- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty.
|
||||
- Weighted mean otherwise.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -42,3 +46,54 @@ Result:
|
||||
│ 8 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avgWeighted(x, w)
|
||||
FROM values('x Int8, w Float64', (4, 1), (1, 0), (10, 2))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ 8 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT avgWeighted(x, w)
|
||||
FROM values('x Int8, w Int8', (0, 0), (1, 0), (10, 0))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ nan │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE table test (t UInt8) ENGINE = Memory;
|
||||
SELECT avgWeighted(t) FROM test
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─avgWeighted(x, weight)─┐
|
||||
│ nan │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
@ -0,0 +1,37 @@
|
||||
---
|
||||
toc_priority: 150
|
||||
---
|
||||
|
||||
## initializeAggregation {#initializeaggregation}
|
||||
|
||||
Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`.
|
||||
Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
initializeAggregation (aggregate_function, column_1, column_2);
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
|
||||
- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument.
|
||||
For example for functions with the suffix `State` the return type will be `AggregateFunction`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
|
||||
```
|
||||
Result:
|
||||
|
||||
┌─uniqMerge(state)─┐
|
||||
│ 3 │
|
||||
└──────────────────┘
|
@ -0,0 +1,53 @@
|
||||
## rankCorr {#agg_function-rankcorr}
|
||||
|
||||
Computes a rank correlation coefficient.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
rankCorr(x, y)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
- Returns a rank correlation coefficient of the ranks of x and y. The value of the correlation coefficient ranges from -1 to +1. If less than two arguments are passed, the function will return an exception. The value close to +1 denotes a high linear relationship, and with an increase of one random variable, the second random variable also increases. The value close to -1 denotes a high linear relationship, and with an increase of one random variable, the second random variable decreases. The value close or equal to 0 denotes no relationship between the two random variables.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT rankCorr(number, number) FROM numbers(100);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─rankCorr(number, number)─┐
|
||||
│ 1 │
|
||||
└──────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT roundBankers(rankCorr(exp(number), sin(number)), 3) FROM numbers(100);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─roundBankers(rankCorr(exp(number), sin(number)), 3)─┐
|
||||
│ -0.037 │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
**See Also**
|
||||
|
||||
- [Spearman's rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
|
@ -67,9 +67,8 @@ Leap seconds are not accounted for.
|
||||
|
||||
## toUnixTimestamp {#to-unix-timestamp}
|
||||
|
||||
For DateTime argument: converts value to its internal numeric representation (Unix Timestamp).
|
||||
For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
|
||||
For Date argument: the behaviour is unspecified.
|
||||
For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
|
||||
For String argument: converts the input string to the datetime according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -535,18 +534,7 @@ dateDiff('unit', startdate, enddate, [timezone])
|
||||
|
||||
- `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
Supported values:
|
||||
|
||||
| unit |
|
||||
| ---- |
|
||||
|second |
|
||||
|minute |
|
||||
|hour |
|
||||
|day |
|
||||
|week |
|
||||
|month |
|
||||
|quarter |
|
||||
|year |
|
||||
Supported values: second, minute, hour, day, week, month, quarter, year.
|
||||
|
||||
- `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
|
@ -15,10 +15,18 @@ A function that allows grouping multiple columns.
|
||||
For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function.
|
||||
Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Tuple](../../sql-reference/functions/tuple-functions.md#tuple)
|
||||
|
||||
## tupleElement(tuple, n), operator x.N {#tupleelementtuple-n-operator-x-n}
|
||||
|
||||
A function that allows getting a column from a tuple.
|
||||
‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple.
|
||||
There is no cost to execute the function.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [TupleElement](../../sql-reference/functions/tuple-functions.md#tupleelement)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/in_functions/) <!--hide-->
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
toc_priority: 66
|
||||
toc_priority: 67
|
||||
toc_title: Other
|
||||
---
|
||||
|
||||
|
118
docs/en/sql-reference/functions/tuple-functions.md
Normal file
118
docs/en/sql-reference/functions/tuple-functions.md
Normal file
@ -0,0 +1,118 @@
|
||||
---
|
||||
toc_priority: 66
|
||||
toc_title: Tuples
|
||||
---
|
||||
|
||||
# Functions for Working with Tuples {#tuple-functions}
|
||||
|
||||
## Tuple {#tuple}
|
||||
|
||||
A function that allows grouping multiple columns.
|
||||
For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function.
|
||||
Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tuple(x, y, …)
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Operator (x, y, …)](../../sql-reference/functions/in-functions.md#tuplex-y-operator-x-y)
|
||||
|
||||
## TupleElement {#tupleelement}
|
||||
|
||||
A function that allows getting a column from a tuple.
|
||||
‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple.
|
||||
There is no cost to execute the function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tupleElement(tuple, n)
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Operator x.N](../../sql-reference/functions/in-functions.md#tupleelementtuple-n-operator-x-n)
|
||||
|
||||
## Untuple {#untuple}
|
||||
|
||||
Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
untuple(x)
|
||||
```
|
||||
|
||||
You can use the `EXCEPT` expression to skip columns as a result of the query.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- None.
|
||||
|
||||
**Examples**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─key─┬─v1─┬─v2─┬─v3─┬─v4─┬─v5─┬─v6────────┐
|
||||
│ 1 │ 10 │ 20 │ 40 │ 30 │ 15 │ (33,'ab') │
|
||||
│ 2 │ 25 │ 65 │ 70 │ 40 │ 6 │ (44,'cd') │
|
||||
│ 3 │ 57 │ 30 │ 20 │ 10 │ 5 │ (55,'ef') │
|
||||
│ 4 │ 55 │ 12 │ 7 │ 80 │ 90 │ (66,'gh') │
|
||||
│ 5 │ 30 │ 50 │ 70 │ 25 │ 55 │ (77,'kl') │
|
||||
└─────┴────┴────┴────┴────┴────┴───────────┘
|
||||
```
|
||||
|
||||
Example of using a `Tuple`-type column as the `untuple` function parameter:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT untuple(v6) FROM kv;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─_ut_1─┬─_ut_2─┐
|
||||
│ 33 │ ab │
|
||||
│ 44 │ cd │
|
||||
│ 55 │ ef │
|
||||
│ 66 │ gh │
|
||||
│ 77 │ kl │
|
||||
└───────┴───────┘
|
||||
```
|
||||
|
||||
Example of using an `EXCEPT` expression:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT untuple((* EXCEPT (v2, v3),)) FROM kv;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─key─┬─v1─┬─v4─┬─v5─┬─v6────────┐
|
||||
│ 1 │ 10 │ 30 │ 15 │ (33,'ab') │
|
||||
│ 2 │ 25 │ 40 │ 6 │ (44,'cd') │
|
||||
│ 3 │ 57 │ 10 │ 5 │ (55,'ef') │
|
||||
│ 4 │ 55 │ 80 │ 90 │ (66,'gh') │
|
||||
│ 5 │ 30 │ 25 │ 55 │ (77,'kl') │
|
||||
└─────┴────┴────┴────┴───────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Tuple](../../sql-reference/data-types/tuple.md)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-functions/) <!--hide-->
|
@ -115,7 +115,21 @@ Returns the “first significant subdomain”. This is a non-standard concept sp
|
||||
|
||||
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above).
|
||||
|
||||
For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
For example:
|
||||
|
||||
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`.
|
||||
- `cutToFirstSignificantSubdomain('tr') = ''`.
|
||||
|
||||
### cutToFirstSignificantSubdomainWithWWW {#cuttofirstsignificantsubdomainwithwww}
|
||||
|
||||
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www".
|
||||
|
||||
For example:
|
||||
|
||||
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('tr') = ''`.
|
||||
|
||||
### port(URL\[, default_port = 0\]) {#port}
|
||||
|
||||
|
@ -27,9 +27,9 @@ It is applicable when selecting data from tables that use the [MergeTree](../../
|
||||
|
||||
### Drawbacks {#drawbacks}
|
||||
|
||||
Queries that use `FINAL` are executed not as fast as similar queries that don’t, because:
|
||||
Queries that use `FINAL` are executed slightly slower than similar queries that don’t, because:
|
||||
|
||||
- Query is executed in a single thread and data is merged during query execution.
|
||||
- Data is merged during query execution.
|
||||
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
|
||||
|
||||
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##}
|
||||
|
@ -6,7 +6,7 @@ toc_title: GROUP BY
|
||||
|
||||
`GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows:
|
||||
|
||||
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”.
|
||||
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”.
|
||||
- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
|
||||
- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.
|
||||
|
||||
@ -45,6 +45,154 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v
|
||||
|
||||
If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value.
|
||||
|
||||
## WITH ROLLUP Modifier {#with-rollup-modifier}
|
||||
|
||||
`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table.
|
||||
|
||||
The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression.
|
||||
|
||||
In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line.
|
||||
|
||||
!!! note "Note"
|
||||
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
```
|
||||
As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left:
|
||||
|
||||
- `GROUP BY year, month, day`;
|
||||
- `GROUP BY year, month` (and `day` column is filled with zeros);
|
||||
- `GROUP BY year` (now `month, day` columns are both filled with zeros);
|
||||
- and totals (and all three key expression columns are zeros).
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
## WITH CUBE Modifier {#with-cube-modifier}
|
||||
|
||||
`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table.
|
||||
|
||||
In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line.
|
||||
|
||||
!!! note "Note"
|
||||
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
|
||||
```
|
||||
|
||||
As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations:
|
||||
|
||||
- `GROUP BY year, month, day`
|
||||
- `GROUP BY year, month`
|
||||
- `GROUP BY year, day`
|
||||
- `GROUP BY year`
|
||||
- `GROUP BY month, day`
|
||||
- `GROUP BY month`
|
||||
- `GROUP BY day`
|
||||
- and totals.
|
||||
|
||||
Columns, excluded from `GROUP BY`, are filled with zeros.
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 0 │ 5 │ 2 │
|
||||
│ 2019 │ 0 │ 5 │ 1 │
|
||||
│ 2020 │ 0 │ 15 │ 2 │
|
||||
│ 2019 │ 0 │ 15 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 5 │ 2 │
|
||||
│ 0 │ 10 │ 15 │ 1 │
|
||||
│ 0 │ 10 │ 5 │ 1 │
|
||||
│ 0 │ 1 │ 15 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 0 │ 4 │
|
||||
│ 0 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 5 │ 3 │
|
||||
│ 0 │ 0 │ 15 │ 3 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## WITH TOTALS Modifier {#with-totals-modifier}
|
||||
|
||||
If the `WITH TOTALS` modifier is specified, another row will be calculated. This row will have key columns containing default values (zeros or empty lines), and columns of aggregate functions with the values calculated across all the rows (the “total” values).
|
||||
@ -88,8 +236,6 @@ SELECT
|
||||
FROM hits
|
||||
```
|
||||
|
||||
However, in contrast to standard SQL, if the table doesn’t have any rows (either there aren’t any at all, or there aren’t any after using WHERE to filter), an empty result is returned, and not the result from one of the rows containing the initial values of aggregate functions.
|
||||
|
||||
As opposed to MySQL (and conforming to standard SQL), you can’t get some value of some column that is not in a key or aggregate function (except constant expressions). To work around this, you can use the ‘any’ aggregate function (get the first encountered value) or ‘min/max’.
|
||||
|
||||
Example:
|
||||
@ -105,10 +251,6 @@ GROUP BY domain
|
||||
|
||||
For every different key value encountered, `GROUP BY` calculates a set of aggregate function values.
|
||||
|
||||
`GROUP BY` is not supported for array columns.
|
||||
|
||||
A constant can’t be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`.
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
||||
Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.
|
||||
|
@ -20,12 +20,12 @@ SELECT [DISTINCT] expr_list
|
||||
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
|
||||
[PREWHERE expr]
|
||||
[WHERE expr]
|
||||
[GROUP BY expr_list] [WITH TOTALS]
|
||||
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
|
||||
[HAVING expr]
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
[LIMIT [n, ]m] [WITH TIES]
|
||||
[UNION ALL ...]
|
||||
[UNION ...]
|
||||
[INTO OUTFILE filename]
|
||||
[FORMAT format]
|
||||
```
|
||||
@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li
|
||||
- [SELECT clause](#select-clause)
|
||||
- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md)
|
||||
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
|
||||
- [UNION ALL clause](../../../sql-reference/statements/select/union-all.md)
|
||||
- [UNION clause](../../../sql-reference/statements/select/union-all.md)
|
||||
- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md)
|
||||
- [FORMAT clause](../../../sql-reference/statements/select/format.md)
|
||||
|
||||
@ -159,4 +159,111 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN
|
||||
|
||||
For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation.
|
||||
|
||||
{## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##}
|
||||
## SELECT modifiers {#select-modifiers}
|
||||
|
||||
You can use the following modifiers in `SELECT` queries.
|
||||
|
||||
### APPLY {#apply-modifier}
|
||||
|
||||
Allows you to invoke some function for each row returned by an outer table expression of a query.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> APPLY( <func> ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
|
||||
INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
|
||||
SELECT * APPLY(sum) FROM columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
|
||||
│ 220 │ 18 │ 347 │
|
||||
└────────┴────────┴────────┘
|
||||
```
|
||||
|
||||
### EXCEPT {#except-modifier}
|
||||
|
||||
Specifies the names of one or more columns to exclude from the result. All matching column names are omitted from the output.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT * EXCEPT (i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌──j─┬───k─┐
|
||||
│ 10 │ 324 │
|
||||
│ 8 │ 23 │
|
||||
└────┴─────┘
|
||||
```
|
||||
|
||||
### REPLACE {#replace-modifier}
|
||||
|
||||
Specifies one or more [expression aliases](../../../sql-reference/syntax.md#syntax-expression_aliases). Each alias must match a column name from the `SELECT *` statement. In the output column list, the column that matches the alias is replaced by the expression in that `REPLACE`.
|
||||
|
||||
This modifier does not change the names or order of columns. However, it can change the value and the value type.
|
||||
|
||||
**Syntax:**
|
||||
|
||||
``` sql
|
||||
SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌───i─┬──j─┬───k─┐
|
||||
│ 101 │ 10 │ 324 │
|
||||
│ 121 │ 8 │ 23 │
|
||||
└─────┴────┴─────┘
|
||||
```
|
||||
|
||||
### Modifier Combinations {#modifier-combinations}
|
||||
|
||||
You can use each modifier separately or combine them.
|
||||
|
||||
**Examples:**
|
||||
|
||||
Using the same modifier multiple times.
|
||||
|
||||
``` sql
|
||||
SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
|
||||
│ 2 │ 3 │
|
||||
└──────────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
Using multiple modifiers in a single query.
|
||||
|
||||
``` sql
|
||||
SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
|
||||
```
|
||||
|
||||
```
|
||||
┌─sum(plus(i, 1))─┬─sum(k)─┐
|
||||
│ 222 │ 347 │
|
||||
└─────────────────┴────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
|
||||
<!--hide-->
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
toc_title: UNION ALL
|
||||
toc_title: UNION
|
||||
---
|
||||
|
||||
# UNION ALL Clause {#union-all-clause}
|
||||
@ -25,10 +25,13 @@ Type casting is performed for unions. For example, if two queries being combined
|
||||
|
||||
Queries that are parts of `UNION ALL` can’t be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
|
||||
|
||||
## Limitations {#limitations}
|
||||
# UNION DISTINCT Clause {#union-distinct-clause}
|
||||
The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`.
|
||||
|
||||
# UNION Clause {#union-clause}
|
||||
By default, `UNION` has the same behavior as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception.
|
||||
|
||||
Only `UNION ALL` is supported. The regular `UNION` (`UNION DISTINCT`) is not supported. If you need `UNION DISTINCT`, you can write `SELECT DISTINCT` from a subquery containing `UNION ALL`.
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
||||
Queries that are parts of `UNION ALL` can be run simultaneously, and their results can be mixed together.
|
||||
Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together.
|
||||
|
@ -19,7 +19,6 @@ toc_title: Bibliotecas de terceros utilizadas
|
||||
| Más información | [Licencia de 3 cláusulas BSD](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| H3 | [Licencia Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [Licencia de 3 cláusulas BSD](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [Licencia BSD de 2 cláusulas](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Licencia Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [Información adicional](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -21,7 +21,6 @@ toc_title: "\u06A9\u062A\u0627\u0628\u062E\u0627\u0646\u0647 \u0647\u0627\u06CC
|
||||
| googletest | [لیسانس 3 بند](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| اچ 3 | [نمایی مجوز 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [لیسانس 3 بند](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| لیبتری | [لیسانس 2 بند](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| شکنجه نوجوان | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| لیبیدوید | [مجوز زلب](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| نوشیدن شراب | [الجی پی ال2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -19,7 +19,6 @@ toc_title: "Biblioth\xE8ques Tierces Utilis\xE9es"
|
||||
| googletest | [Licence BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Licence Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [Licence BSD 3-Clause](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [Licence BSD 2-Clause](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Licence Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -20,7 +20,6 @@ toc_title: "\u30B5\u30FC\u30C9\u30D1\u30FC\u30C6\u30A3\u88FD\u30E9\u30A4\u30D6\u
|
||||
| googletest | [BSD3条項ライセンス](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apacheライセンス2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD3条項ライセンス](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD2条項ライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlibライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -18,7 +18,6 @@ toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -2187,4 +2187,10 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x);
|
||||
|
||||
Значение по умолчанию: `1`.
|
||||
|
||||
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
|
||||
|
||||
Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV.
|
||||
|
||||
Значение по умолчанию: `\N`.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
|
||||
|
70
docs/ru/operations/system-tables/replicated_fetches.md
Normal file
70
docs/ru/operations/system-tables/replicated_fetches.md
Normal file
@ -0,0 +1,70 @@
|
||||
# system.replicated_fetches {#system_tables-replicated_fetches}
|
||||
|
||||
Содержит информацию о выполняемых в данный момент фоновых операциях скачивания кусков данных с других реплик.
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
|
||||
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
|
||||
|
||||
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — время, прошедшее от момента начала скачивания куска, в секундах.
|
||||
|
||||
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — доля выполненной работы от 0 до 1.
|
||||
|
||||
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — имя скачиваемого куска.
|
||||
|
||||
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к скачиваемому куску.
|
||||
|
||||
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции.
|
||||
|
||||
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в скачиваемом куске в байтах.
|
||||
|
||||
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатой информации, считанной из скачиваемого куска, в байтах.
|
||||
|
||||
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к исходной реплике.
|
||||
|
||||
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста исходной реплики.
|
||||
|
||||
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — номер порта исходной реплики.
|
||||
|
||||
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — имя межсерверной схемы.
|
||||
|
||||
- `URI` ([String](../../sql-reference/data-types/string.md)) — универсальный идентификатор ресурса.
|
||||
|
||||
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на использование выражения `TO DETACHED` в текущих фоновых операциях.
|
||||
|
||||
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор потока.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
database: default
|
||||
table: t
|
||||
elapsed: 7.243039876
|
||||
progress: 0.41832135995612835
|
||||
result_part_name: all_0_0_0
|
||||
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
|
||||
partition_id: all
|
||||
total_size_bytes_compressed: 1052783726
|
||||
bytes_read_compressed: 440401920
|
||||
source_replica_path: /clickhouse/test/t/replicas/1
|
||||
source_replica_hostname: node1
|
||||
source_replica_port: 9009
|
||||
interserver_scheme: http
|
||||
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
|
||||
to_detached: 0
|
||||
thread_id: 54
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->
|
43
docs/ru/operations/utilities/clickhouse-obfuscator.md
Normal file
43
docs/ru/operations/utilities/clickhouse-obfuscator.md
Normal file
@ -0,0 +1,43 @@
|
||||
# Обфускатор ClickHouse
|
||||
|
||||
Простой инструмент для обфускации табличных данных.
|
||||
|
||||
Он считывает данные входной таблицы и создает выходную таблицу, которая сохраняет некоторые свойства входных данных, но при этом содержит другие данные.
|
||||
|
||||
Это позволяет публиковать практически реальные данные и использовать их в тестах на производительность.
|
||||
|
||||
Обфускатор предназначен для сохранения следующих свойств данных:
|
||||
- кардинальность (количество уникальных данных) для каждого столбца и каждого кортежа столбцов;
|
||||
- условная кардинальность: количество уникальных данных одного столбца в соответствии со значением другого столбца;
|
||||
- вероятностные распределения абсолютного значения целых чисел; знак числа типа Int; показатель степени и знак для чисел с плавающей запятой;
|
||||
- вероятностное распределение длины строк;
|
||||
- вероятность нулевых значений чисел; пустые строки и массивы, `NULL`;
|
||||
- степень сжатия данных алгоритмом LZ77 и семейством энтропийных кодеков;
|
||||
|
||||
- непрерывность (величина разницы) значений времени в таблице; непрерывность значений с плавающей запятой;
|
||||
- дату из значений `DateTime`;
|
||||
|
||||
- кодировка UTF-8 значений строки;
|
||||
- строковые значения выглядят естественным образом.
|
||||
|
||||
|
||||
Большинство перечисленных выше свойств пригодны для тестирования производительности. Чтение данных, фильтрация, агрегирование и сортировка будут работать почти с той же скоростью, что и исходные данные, благодаря сохраненной кардинальности, величине, степени сжатия и т. д.
|
||||
|
||||
Он работает детерминированно. Вы задаёте значение инициализатора, а преобразование полностью определяется входными данными и инициализатором.
|
||||
|
||||
Некоторые преобразования выполняются один к одному, и их можно отменить. Поэтому нужно использовать большое значение инициализатора и хранить его в секрете.
|
||||
|
||||
|
||||
Обфускатор использует некоторые криптографические примитивы для преобразования данных, но, с криптографической точки зрения, результат будет небезопасным. В нем могут сохраниться данные, которые не следует публиковать.
|
||||
|
||||
|
||||
Он всегда оставляет без изменений числа 0, 1, -1, даты, длины массивов и нулевые флаги.
|
||||
Например, если у вас есть столбец `IsMobile` в таблице со значениями 0 и 1, то в преобразованных данных он будет иметь то же значение.
|
||||
|
||||
Таким образом, пользователь сможет посчитать точное соотношение мобильного трафика.
|
||||
|
||||
Давайте рассмотрим случай, когда у вас есть какие-то личные данные в таблице (например, электронная почта пользователя), и вы не хотите их публиковать.
|
||||
Если ваша таблица достаточно большая и содержит несколько разных электронных почтовых адресов, и ни один из них не встречается часто, то обфускатор полностью анонимизирует все данные. Но, если у вас есть небольшое количество разных значений в столбце, он может скопировать некоторые из них.
|
||||
В этом случае вам следует посмотреть на алгоритм работы инструмента и настроить параметры командной строки.
|
||||
|
||||
Обфускатор полезен в работе со средним объемом данных (не менее 1000 строк).
|
@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
|
||||
└────────┘
|
||||
```
|
||||
|
||||
Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`.
|
||||
|
||||
Теперь с помощью функции `groupArray` сформируем массив из столбца `y`:
|
||||
|
||||
``` sql
|
||||
|
@ -0,0 +1,40 @@
|
||||
---
|
||||
toc_priority: 150
|
||||
---
|
||||
|
||||
## initializeAggregation {#initializeaggregation}
|
||||
|
||||
Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`.
|
||||
Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
initializeAggregation (aggregate_function, column_1, column_2);
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string).
|
||||
- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`.
|
||||
|
||||
Пример:
|
||||
|
||||
Возвращаемый тип функций с суффиксом `State` — `AggregateFunction`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
|
||||
```
|
||||
Результат:
|
||||
|
||||
┌─uniqMerge(state)─┐
|
||||
│ 3 │
|
||||
└──────────────────┘
|
@ -0,0 +1,53 @@
|
||||
## rankCorr {#agg_function-rankcorr}
|
||||
|
||||
Вычисляет коэффициент ранговой корреляции.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
rankCorr(x, y)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `x` — Произвольное значение. [Float32](../../../sql-reference/data-types/float.md#float32-float64) или [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
- `y` — Произвольное значение. [Float32](../../../sql-reference/data-types/float.md#float32-float64) или [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Возвращает коэффициент ранговой корреляции рангов x и y. Значение коэффициента корреляции изменяется в пределах от -1 до +1. Если передается менее двух аргументов, функция возвращает исключение. Значение, близкое к +1, указывает на высокую линейную зависимость, и с увеличением одной случайной величины увеличивается и вторая случайная величина. Значение, близкое к -1, указывает на высокую линейную зависимость, и с увеличением одной случайной величины вторая случайная величина уменьшается. Значение, близкое или равное 0, означает отсутствие связи между двумя случайными величинами.
|
||||
|
||||
Тип: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT rankCorr(number, number) FROM numbers(100);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─rankCorr(number, number)─┐
|
||||
│ 1 │
|
||||
└──────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT roundBankers(rankCorr(exp(number), sin(number)), 3) FROM numbers(100);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─roundBankers(rankCorr(exp(number), sin(number)), 3)─┐
|
||||
│ -0.037 │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
**Смотрите также**
|
||||
|
||||
- [Коэффициент ранговой корреляции Спирмена](https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D1%80%D1%80%D0%B5%D0%BB%D1%8F%D1%86%D0%B8%D1%8F#%D0%9A%D0%BE%D1%8D%D1%84%D1%84%D0%B8%D1%86%D0%B8%D0%B5%D0%BD%D1%82_%D1%80%D0%B0%D0%BD%D0%B3%D0%BE%D0%B2%D0%BE%D0%B9_%D0%BA%D0%BE%D1%80%D1%80%D0%B5%D0%BB%D1%8F%D1%86%D0%B8%D0%B8_%D0%A1%D0%BF%D0%B8%D1%80%D0%BC%D0%B5%D0%BD%D0%B0)
|
@ -57,32 +57,31 @@ SELECT
|
||||
|
||||
## toUnixTimestamp {#to-unix-timestamp}
|
||||
|
||||
For DateTime argument: converts value to its internal numeric representation (Unix Timestamp).
|
||||
For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
|
||||
For Date argument: the behaviour is unspecified.
|
||||
Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
|
||||
Для аргумента String, строка конвертируется в дату и время в соответствии с часовым поясом (необязательный второй аргумент, часовой пояс сервера используется по умолчанию).
|
||||
|
||||
**Syntax**
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
toUnixTimestamp(datetime)
|
||||
toUnixTimestamp(str, [timezone])
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Returns the unix timestamp.
|
||||
- Возвращает Unix Timestamp.
|
||||
|
||||
Type: `UInt32`.
|
||||
Тип: `UInt32`.
|
||||
|
||||
**Example**
|
||||
**Пример**
|
||||
|
||||
Query:
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp
|
||||
```
|
||||
|
||||
Result:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─unix_timestamp─┐
|
||||
|
@ -15,10 +15,18 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
|
||||
Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит.
|
||||
Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу.
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Tuple](../../sql-reference/functions/tuple-functions.md#tuple)
|
||||
|
||||
## tupleElement(tuple, n), оператор x.N {#tupleelementtuple-n-operator-x-n}
|
||||
|
||||
Функция, позволяющая достать столбец из кортежа.
|
||||
N - индекс столбца начиная с 1. N должно быть константой. N должно быть целым строго положительным числом не большим размера кортежа.
|
||||
Выполнение функции ничего не стоит.
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [TupleElement](../../sql-reference/functions/tuple-functions.md#tupleelement)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/in_functions/) <!--hide-->
|
||||
|
118
docs/ru/sql-reference/functions/tuple-functions.md
Normal file
118
docs/ru/sql-reference/functions/tuple-functions.md
Normal file
@ -0,0 +1,118 @@
|
||||
---
|
||||
toc_priority: 68
|
||||
toc_title: Функции для работы с кортежами
|
||||
---
|
||||
|
||||
# Функции для работы с кортежами {#tuple-functions}
|
||||
|
||||
## Tuple {#tuple}
|
||||
|
||||
Функция, позволяющая сгруппировать несколько столбцов.
|
||||
Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит.
|
||||
Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
tuple(x, y, …)
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Оператор (x, y, …)](../../sql-reference/functions/in-functions.md#tuplex-y-operator-x-y)
|
||||
|
||||
## TupleElement {#tupleelement}
|
||||
|
||||
Функция, позволяющая достать столбец из кортежа.
|
||||
N - индекс столбца начиная с 1. N должно быть константой. N должно быть целым строго положительным числом не большим размера кортежа.
|
||||
Выполнение функции ничего не стоит.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
tupleElement(tuple, n)
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Оператор x.N](../../sql-reference/functions/in-functions.md#tupleelementtuple-n-operator-x-n)
|
||||
|
||||
## Untuple {#untuple}
|
||||
|
||||
Выполняет синтаксическую подстановку элементов [кортежа](../../sql-reference/data-types/tuple.md#tuplet1-t2) в место вызова.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
untuple(x)
|
||||
```
|
||||
|
||||
Чтобы пропустить некоторые столбцы в результате запроса, вы можете использовать выражение `EXCEPT`.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `x` - функция `tuple`, столбец или кортеж элементов. [Tuple](../../sql-reference/data-types/tuple.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Нет.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Входная таблица:
|
||||
|
||||
``` text
|
||||
┌─key─┬─v1─┬─v2─┬─v3─┬─v4─┬─v5─┬─v6────────┐
|
||||
│ 1 │ 10 │ 20 │ 40 │ 30 │ 15 │ (33,'ab') │
|
||||
│ 2 │ 25 │ 65 │ 70 │ 40 │ 6 │ (44,'cd') │
|
||||
│ 3 │ 57 │ 30 │ 20 │ 10 │ 5 │ (55,'ef') │
|
||||
│ 4 │ 55 │ 12 │ 7 │ 80 │ 90 │ (66,'gh') │
|
||||
│ 5 │ 30 │ 50 │ 70 │ 25 │ 55 │ (77,'kl') │
|
||||
└─────┴────┴────┴────┴────┴────┴───────────┘
|
||||
```
|
||||
|
||||
Пример использования столбца типа `Tuple` в качестве параметра функции `untuple`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT untuple(v6) FROM kv;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─_ut_1─┬─_ut_2─┐
|
||||
│ 33 │ ab │
|
||||
│ 44 │ cd │
|
||||
│ 55 │ ef │
|
||||
│ 66 │ gh │
|
||||
│ 77 │ kl │
|
||||
└───────┴───────┘
|
||||
```
|
||||
|
||||
Пример использования выражения `EXCEPT`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT untuple((* EXCEPT (v2, v3),)) FROM kv;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─key─┬─v1─┬─v4─┬─v5─┬─v6────────┐
|
||||
│ 1 │ 10 │ 30 │ 15 │ (33,'ab') │
|
||||
│ 2 │ 25 │ 40 │ 6 │ (44,'cd') │
|
||||
│ 3 │ 57 │ 10 │ 5 │ (55,'ef') │
|
||||
│ 4 │ 55 │ 80 │ 90 │ (66,'gh') │
|
||||
│ 5 │ 30 │ 25 │ 55 │ (77,'kl') │
|
||||
└─────┴────┴────┴────┴───────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Tuple](../../sql-reference/data-types/tuple.md)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/tuple-functions/) <!--hide-->
|
@ -27,9 +27,9 @@ toc_title: FROM
|
||||
|
||||
### Недостатки {#drawbacks}
|
||||
|
||||
Запросы, которые используют `FINAL` выполняются не так быстро, как аналогичные запросы без него, потому что:
|
||||
Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что:
|
||||
|
||||
- Запрос выполняется в одном потоке, и данные мёржатся во время выполнения запроса.
|
||||
- Данные мёржатся во время выполнения запроса.
|
||||
- Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе.
|
||||
|
||||
**В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##}
|
||||
|
@ -43,6 +43,153 @@ toc_title: GROUP BY
|
||||
|
||||
Если в `GROUP BY` передать несколько ключей, то в результате мы получим все комбинации выборки, как если бы `NULL` был конкретным значением.
|
||||
|
||||
## Модификатор WITH ROLLUP {#with-rollup-modifier}
|
||||
|
||||
Модификатор `WITH ROLLUP` применяется для подсчета подытогов для ключевых выражений. При этом учитывается порядок следования ключевых выражений в списке `GROUP BY`. Подытоги подсчитываются в обратном порядке: сначала для последнего ключевого выражения в списке, потом для предпоследнего и так далее вплоть до самого первого ключевого выражения.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка.
|
||||
|
||||
!!! note "Примечание"
|
||||
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
```
|
||||
|
||||
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из четырех таблиц с подытогами, которые как бы "сворачиваются" справа налево:
|
||||
|
||||
- `GROUP BY year, month, day`;
|
||||
- `GROUP BY year, month` (а колонка `day` заполнена нулями);
|
||||
- `GROUP BY year` (теперь обе колонки `month, day` заполнены нулями);
|
||||
- и общий итог (все три колонки с ключевыми выражениями заполнены нулями).
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
## Модификатор WITH CUBE {#with-cube-modifier}
|
||||
|
||||
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.
|
||||
|
||||
!!! note "Примечание"
|
||||
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
|
||||
```
|
||||
|
||||
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из восьми таблиц с подытогами — по таблице для каждой комбинации ключевых выражений:
|
||||
|
||||
- `GROUP BY year, month, day`
|
||||
- `GROUP BY year, month`
|
||||
- `GROUP BY year, day`
|
||||
- `GROUP BY year`
|
||||
- `GROUP BY month, day`
|
||||
- `GROUP BY month`
|
||||
- `GROUP BY day`
|
||||
- и общий итог.
|
||||
|
||||
Колонки, которые не участвуют в `GROUP BY`, заполнены нулями.
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 0 │ 5 │ 2 │
|
||||
│ 2019 │ 0 │ 5 │ 1 │
|
||||
│ 2020 │ 0 │ 15 │ 2 │
|
||||
│ 2019 │ 0 │ 15 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 5 │ 2 │
|
||||
│ 0 │ 10 │ 15 │ 1 │
|
||||
│ 0 │ 10 │ 5 │ 1 │
|
||||
│ 0 │ 1 │ 15 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 0 │ 4 │
|
||||
│ 0 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 5 │ 3 │
|
||||
│ 0 │ 0 │ 15 │ 3 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## Модификатор WITH TOTALS {#with-totals-modifier}
|
||||
|
||||
Если указан модификатор `WITH TOTALS`, то будет посчитана ещё одна строчка, в которой в столбцах-ключах будут содержаться значения по умолчанию (нули, пустые строки), а в столбцах агрегатных функций - значения, посчитанные по всем строкам («тотальные» значения).
|
||||
@ -86,8 +233,6 @@ SELECT
|
||||
FROM hits
|
||||
```
|
||||
|
||||
Но, в отличие от стандартного SQL, если в таблице нет строк (вообще нет или после фильтрации с помощью WHERE), в качестве результата возвращается пустой результат, а не результат из одной строки, содержащий «начальные» значения агрегатных функций.
|
||||
|
||||
В отличие от MySQL (и в соответствии со стандартом SQL), вы не можете получить какое-нибудь значение некоторого столбца, не входящего в ключ или агрегатную функцию (за исключением константных выражений). Для обхода этого вы можете воспользоваться агрегатной функцией any (получить первое попавшееся значение) или min/max.
|
||||
|
||||
Пример:
|
||||
@ -103,10 +248,6 @@ GROUP BY domain
|
||||
|
||||
GROUP BY вычисляет для каждого встретившегося различного значения ключей, набор значений агрегатных функций.
|
||||
|
||||
Не поддерживается GROUP BY по столбцам-массивам.
|
||||
|
||||
Не поддерживается указание констант в качестве аргументов агрегатных функций. Пример: `sum(1)`. Вместо этого, вы можете избавиться от констант. Пример: `count()`.
|
||||
|
||||
## Детали реализации {#implementation-details}
|
||||
|
||||
Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки.
|
||||
|
@ -18,7 +18,7 @@ SELECT [DISTINCT] expr_list
|
||||
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
|
||||
[PREWHERE expr]
|
||||
[WHERE expr]
|
||||
[GROUP BY expr_list] [WITH TOTALS]
|
||||
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
|
||||
[HAVING expr]
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
|
@ -19,7 +19,6 @@ toc_title: "Kullan\u0131lan \xDC\xE7\xFCnc\xFC Taraf K\xFCt\xFCphaneleri"
|
||||
| googletest | [BSD 3-Clause Lisansı](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| h33 | [Apache Lic 2.0ense 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause Lisansı](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2. 1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -11,7 +11,6 @@
|
||||
| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
|
||||
| googletest | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| 超扫描 | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |
|
||||
|
@ -6,15 +6,16 @@ toc_title: "\u5BFC\u8A00"
|
||||
|
||||
# 示例数据集 {#example-datasets}
|
||||
|
||||
本节介绍如何获取示例数据集并将其导入ClickHouse。
|
||||
本节介绍如何获取示例数据集并将其导入ClickHouse。对于某些数据集,还可以使用示例查询。
|
||||
|
||||
对于某些数据集示例查询也可用。
|
||||
|
||||
- [脱敏的Yandex.Metrica数据集](metrica.md)
|
||||
- [星型基准测试](star-schema.md)
|
||||
- [维基访问数据](wikistat.md)
|
||||
- [Criteo TB级别点击日志](criteo.md)
|
||||
- [AMPLab大数据基准测试](amplab-benchmark.md)
|
||||
- [纽约出租车数据](nyc-taxi.md)
|
||||
- [航班飞行数据](ontime.md)
|
||||
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
|
||||
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
|
||||
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
|
||||
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
|
||||
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
|
||||
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
|
||||
- [OnTime](../../getting-started/example-datasets/ontime.md)
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->
|
||||
|
@ -1,17 +1,17 @@
|
||||
---
|
||||
toc_priority: 21
|
||||
toc_title: "Yandex\u6885\u7279\u91CC\u5361\u6570\u636E"
|
||||
toc_priority: 15
|
||||
toc_title: Yandex.Metrica Data
|
||||
---
|
||||
|
||||
# 脱敏的Yandex.Metrica数据集 {#anonymized-yandex-metrica-data}
|
||||
# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data}
|
||||
|
||||
Dataset由两个表组成,其中包含有关命中的匿名数据 (`hits_v1`)和访问 (`visits_v1`)的Yandex的。梅特里卡 你可以阅读更多关于Yandex的。梅特里卡 [ClickHouse历史](../../introduction/history.md) 科。
|
||||
数据集由两个表组成,包含关于Yandex.Metrica的hits(`hits_v1`)和visit(`visits_v1`)的匿名数据。你可以阅读更多关于Yandex的信息。在[ClickHouse历史](../../introduction/history.md)的Metrica部分。
|
||||
|
||||
数据集由两个表组成,其中任何一个都可以作为压缩表下载 `tsv.xz` 文件或作为准备的分区。 除此之外,该扩展版本 `hits` 包含1亿行的表可作为TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz 并作为准备的分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
|
||||
数据集由两个表组成,他们中的任何一个都可以下载作为一个压缩`tsv.xz`的文件或准备的分区。除此之外,一个扩展版的`hits`表包含1亿行TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz,准备分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz。
|
||||
|
||||
## 从准备好的分区获取表 {#obtaining-tables-from-prepared-partitions}
|
||||
|
||||
下载和导入点击表:
|
||||
下载和导入`hits`表:
|
||||
|
||||
``` bash
|
||||
curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar
|
||||
@ -21,7 +21,7 @@ sudo service clickhouse-server restart
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
|
||||
```
|
||||
|
||||
下载和导入访问:
|
||||
下载和导入`visits`表:
|
||||
|
||||
``` bash
|
||||
curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar
|
||||
@ -31,9 +31,9 @@ sudo service clickhouse-server restart
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
|
||||
```
|
||||
|
||||
## 从压缩TSV文件获取表 {#obtaining-tables-from-compressed-tsv-file}
|
||||
## 从TSV压缩文件获取表 {#obtaining-tables-from-compressed-tsv-file}
|
||||
|
||||
从压缩的TSV文件下载并导入命中:
|
||||
从TSV压缩文件下载并导入`hits`:
|
||||
|
||||
``` bash
|
||||
curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
|
||||
@ -47,7 +47,7 @@ clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL"
|
||||
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
|
||||
```
|
||||
|
||||
从压缩tsv文件下载和导入访问:
|
||||
从压缩tsv文件下载和导入`visits`:
|
||||
|
||||
``` bash
|
||||
curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv
|
||||
@ -63,6 +63,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
|
||||
|
||||
## 查询示例 {#example-queries}
|
||||
|
||||
[点击教程](../../getting-started/tutorial.md) 是基于Yandex的。Metrica数据集和开始使用此数据集的推荐方式是通过教程。
|
||||
[使用教程](../../getting-started/tutorial.md)是以Yandex.Metrica数据集开始教程。
|
||||
|
||||
查询这些表的其他示例可以在 [有状态测试](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) ClickHouse的(它们被命名为 `test.hists` 和 `test.visits` 那里)。
|
||||
可以在ClickHouse的[stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) 中找到对这些表的查询的其他示例(它们被命名为`test.hists`和`test.visits`)。
|
||||
|
@ -1,3 +1,10 @@
|
||||
---
|
||||
machine_translated: true
|
||||
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
|
||||
toc_folder_title: "\u5BFC\u8A00"
|
||||
toc_priority: 2
|
||||
---
|
||||
|
||||
# 入门 {#ru-men}
|
||||
|
||||
如果您是ClickHouse的新手,并希望亲身体验它的性能,首先您需要通过 [安装过程](install.md).
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user