Merge branch 'master' of github.com:ClickHouse/ClickHouse into fix

This commit is contained in:
fenglv 2020-11-28 03:38:29 +00:00
commit 5c5d72379a
795 changed files with 20297 additions and 6803 deletions

View File

@ -2,8 +2,7 @@
name: Documentation issue
about: Report something incorrect or missing in documentation
title: ''
labels: documentation
assignees: BayoNet
labels: comp-documentation
---

2
.gitignore vendored
View File

@ -124,3 +124,5 @@ website/package-lock.json
# Toolchains
/cmake/toolchain/*
*.iml

12
.gitmodules vendored
View File

@ -44,6 +44,7 @@
[submodule "contrib/protobuf"]
path = contrib/protobuf
url = https://github.com/ClickHouse-Extras/protobuf.git
branch = v3.13.0.1
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/ClickHouse-Extras/boost.git
@ -107,6 +108,7 @@
[submodule "contrib/grpc"]
path = contrib/grpc
url = https://github.com/ClickHouse-Extras/grpc.git
branch = v1.33.2
[submodule "contrib/aws"]
path = contrib/aws
url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git
@ -196,7 +198,11 @@
[submodule "contrib/rocksdb"]
path = contrib/rocksdb
url = https://github.com/facebook/rocksdb
branch = v6.11.4
branch = v6.14.5
[submodule "contrib/xz"]
path = contrib/xz
url = https://github.com/xz-mirror/xz
path = contrib/xz
url = https://github.com/xz-mirror/xz
[submodule "contrib/abseil-cpp"]
path = contrib/abseil-cpp
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
branch = lts_2020_02_25

View File

@ -154,17 +154,19 @@ endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
if (OS_LINUX)
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
if (ARCH_AMD64)
set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
elseif (ARCH_AARCH64)
set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
if (ARCH_AMD64)
set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
elseif (ARCH_AARCH64)
set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
endif ()
else ()
message(FATAL_ERROR "Cannot find objcopy.")
endif ()
else ()
message(FATAL_ERROR "Cannot find objcopy.")
endif ()
if (OS_DARWIN)
@ -475,9 +477,6 @@ find_contrib_lib(cityhash)
find_contrib_lib(farmhash)
set (USE_INTERNAL_BTRIE_LIBRARY ON CACHE INTERNAL "")
find_contrib_lib(btrie)
if (ENABLE_TESTS)
include (cmake/find/gtest.cmake)
endif ()

View File

@ -1,6 +1,6 @@
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.tech)
ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real time.
## Useful Links
@ -14,9 +14,3 @@ ClickHouse is an open-source column-oriented database management system that all
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [The Second ClickHouse Meetup East (online)](https://www.eventbrite.com/e/the-second-clickhouse-meetup-east-tickets-126787955187) on October 31, 2020.
* [ClickHouse for Enterprise Meetup (online in Russian)](https://arenadata-events.timepad.ru/event/1465249/) on November 10, 2020.

View File

@ -1,6 +1,7 @@
#pragma once
#include <cassert>
#include <stdexcept> // for std::logic_error
#include <string>
#include <vector>
#include <functional>

View File

@ -3,7 +3,6 @@
/// Macros for convenient usage of Poco logger.
#include <fmt/format.h>
#include <fmt/ostream.h>
#include <Poco/Logger.h>
#include <Poco/Message.h>
#include <Common/CurrentThread.h>

View File

@ -0,0 +1,19 @@
#define _GNU_SOURCE
#include <sys/socket.h>
#include <errno.h>
#include <fcntl.h>
#include "syscall.h"
int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int flg)
{
if (!flg) return accept(fd, addr, len);
int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0);
if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret;
ret = accept(fd, addr, len);
if (ret<0) return ret;
if (flg & SOCK_CLOEXEC)
__syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
if (flg & SOCK_NONBLOCK)
__syscall(SYS_fcntl, ret, F_SETFL, O_NONBLOCK);
return ret;
}

View File

@ -0,0 +1,37 @@
#include <sys/epoll.h>
#include <signal.h>
#include <errno.h>
#include "syscall.h"
int epoll_create(int size)
{
return epoll_create1(0);
}
int epoll_create1(int flags)
{
int r = __syscall(SYS_epoll_create1, flags);
#ifdef SYS_epoll_create
if (r==-ENOSYS && !flags) r = __syscall(SYS_epoll_create, 1);
#endif
return __syscall_ret(r);
}
int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev)
{
return syscall(SYS_epoll_ctl, fd, op, fd2, ev);
}
int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs)
{
int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8);
#ifdef SYS_epoll_wait
if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to);
#endif
return __syscall_ret(r);
}
int epoll_wait(int fd, struct epoll_event *ev, int cnt, int to)
{
return epoll_pwait(fd, ev, cnt, to, 0);
}

View File

@ -0,0 +1,23 @@
#include <sys/eventfd.h>
#include <unistd.h>
#include <errno.h>
#include "syscall.h"
int eventfd(unsigned int count, int flags)
{
int r = __syscall(SYS_eventfd2, count, flags);
#ifdef SYS_eventfd
if (r==-ENOSYS && !flags) r = __syscall(SYS_eventfd, count);
#endif
return __syscall_ret(r);
}
int eventfd_read(int fd, eventfd_t *value)
{
return (sizeof(*value) == read(fd, value, sizeof(*value))) ? 0 : -1;
}
int eventfd_write(int fd, eventfd_t value)
{
return (sizeof(value) == write(fd, &value, sizeof(value))) ? 0 : -1;
}

View File

@ -0,0 +1,45 @@
#include <sys/auxv.h>
#include <unistd.h> // __environ
#include <errno.h>
// We don't have libc struct available here. Compute aux vector manually.
static unsigned long * __auxv = NULL;
static unsigned long __auxv_secure = 0;
static size_t __find_auxv(unsigned long type)
{
size_t i;
for (i = 0; __auxv[i]; i += 2)
{
if (__auxv[i] == type)
return i + 1;
}
return (size_t) -1;
}
__attribute__((constructor)) static void __auxv_init()
{
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
}
unsigned long getauxval(unsigned long type)
{
if (type == AT_SECURE)
return __auxv_secure;
if (__auxv)
{
size_t index = __find_auxv(type);
if (index != ((size_t) -1))
return __auxv[index];
}
errno = ENOENT;
return 0;
}

View File

@ -0,0 +1,8 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <sys/auxv.h>
char * secure_getenv(const char * name)
{
return getauxval(AT_SECURE) ? NULL : getenv(name);
}

View File

@ -13,3 +13,11 @@ long __syscall(syscall_arg_t, ...);
__attribute__((visibility("hidden")))
void *__vdsosym(const char *, const char *);
#define syscall(...) __syscall_ret(__syscall(__VA_ARGS__))
#define socketcall(...) __syscall_ret(__socketcall(__VA_ARGS__))
#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f)
#define socketcall_cp socketcall

View File

@ -40,24 +40,10 @@ static int checkver(Verdef *def, int vsym, const char *vername, char *strings)
#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
extern char** environ;
static Ehdr *eh = NULL;
void *__vdsosym(const char *vername, const char *name);
// We don't have libc struct available here. Compute aux vector manually.
__attribute__((constructor)) static void auxv_init()
{
size_t i, *auxv;
for (i=0; environ[i]; i++);
auxv = (void *)(environ+i+1);
for (i=0; auxv[i] != AT_SYSINFO_EHDR; i+=2)
if (!auxv[i]) return;
if (!auxv[i+1]) return;
eh = (void *)auxv[i+1];
}
void *__vdsosym(const char *vername, const char *name)
{
size_t i;
Ehdr * eh = (void *) getauxval(AT_SYSINFO_EHDR);
if (!eh) return 0;
Phdr *ph = (void *)((char *)eh + eh->e_phoff);
size_t *dynv=0, base=-1;

View File

@ -1,44 +0,0 @@
# - Try to find btrie headers and libraries.
#
# Usage of this module as follows:
#
# find_package(btrie)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# BTRIE_ROOT_DIR Set this variable to the root installation of
# btrie if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# BTRIE_FOUND System has btrie libs/headers
# BTRIE_LIBRARIES The btrie library/libraries
# BTRIE_INCLUDE_DIR The location of btrie headers
find_path(BTRIE_ROOT_DIR
NAMES include/btrie.h
)
find_library(BTRIE_LIBRARIES
NAMES btrie
PATHS ${BTRIE_ROOT_DIR}/lib ${BTRIE_LIBRARIES_PATHS}
)
find_path(BTRIE_INCLUDE_DIR
NAMES btrie.h
PATHS ${BTRIE_ROOT_DIR}/include ${BTRIE_INCLUDE_PATHS}
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(btrie DEFAULT_MSG
BTRIE_LIBRARIES
BTRIE_INCLUDE_DIR
)
mark_as_advanced(
BTRIE_ROOT_DIR
BTRIE_LIBRARIES
BTRIE_INCLUDE_DIR
)

View File

@ -6,11 +6,9 @@ Defines the following variables:
The include directories of the gRPC framework, including the include directories of the C++ wrapper.
``gRPC_LIBRARIES``
The libraries of the gRPC framework.
``gRPC_UNSECURE_LIBRARIES``
The libraries of the gRPC framework without SSL.
``_gRPC_CPP_PLUGIN``
``gRPC_CPP_PLUGIN``
The plugin for generating gRPC client and server C++ stubs from `.proto` files
``_gRPC_PYTHON_PLUGIN``
``gRPC_PYTHON_PLUGIN``
The plugin for generating gRPC client and server Python stubs from `.proto` files
The following :prop_tgt:`IMPORTED` targets are also defined:
@ -19,6 +17,13 @@ The following :prop_tgt:`IMPORTED` targets are also defined:
``grpc_cpp_plugin``
``grpc_python_plugin``
Set the following variables to adjust the behaviour of this script:
``gRPC_USE_UNSECURE_LIBRARIES``
if set gRPC_LIBRARIES will be filled with the unsecure version of the libraries (i.e. without SSL)
instead of the secure ones.
``gRPC_DEBUG`
if set the debug message will be printed.
Add custom commands to process ``.proto`` files to C++::
protobuf_generate_grpc_cpp(<SRCS> <HDRS>
[DESCRIPTORS <DESC>] [EXPORT_MACRO <MACRO>] [<ARGN>...])
@ -242,6 +247,7 @@ find_library(gRPC_LIBRARY NAMES grpc)
find_library(gRPC_CPP_LIBRARY NAMES grpc++)
find_library(gRPC_UNSECURE_LIBRARY NAMES grpc_unsecure)
find_library(gRPC_CPP_UNSECURE_LIBRARY NAMES grpc++_unsecure)
find_library(gRPC_CARES_LIBRARY NAMES cares)
set(gRPC_LIBRARIES)
if(gRPC_USE_UNSECURE_LIBRARIES)
@ -259,6 +265,7 @@ else()
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_LIBRARY})
endif()
endif()
set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CARES_LIBRARY})
# Restore the original find library ordering.
if(gRPC_USE_STATIC_LIBS)
@ -278,11 +285,11 @@ else()
endif()
# Get full path to plugin.
find_program(_gRPC_CPP_PLUGIN
find_program(gRPC_CPP_PLUGIN
NAMES grpc_cpp_plugin
DOC "The plugin for generating gRPC client and server C++ stubs from `.proto` files")
find_program(_gRPC_PYTHON_PLUGIN
find_program(gRPC_PYTHON_PLUGIN
NAMES grpc_python_plugin
DOC "The plugin for generating gRPC client and server Python stubs from `.proto` files")
@ -317,14 +324,14 @@ endif()
#include(FindPackageHandleStandardArgs.cmake)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(gRPC
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR _gRPC_CPP_PLUGIN _gRPC_PYTHON_PLUGIN)
REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY gRPC_CARES_LIBRARY
gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR gRPC_CPP_PLUGIN gRPC_PYTHON_PLUGIN)
if(gRPC_FOUND)
if(gRPC_DEBUG)
message(STATUS "gRPC: INCLUDE_DIRS=${gRPC_INCLUDE_DIRS}")
message(STATUS "gRPC: LIBRARIES=${gRPC_LIBRARIES}")
message(STATUS "gRPC: CPP_PLUGIN=${_gRPC_CPP_PLUGIN}")
message(STATUS "gRPC: PYTHON_PLUGIN=${_gRPC_PYTHON_PLUGIN}")
message(STATUS "gRPC: CPP_PLUGIN=${gRPC_CPP_PLUGIN}")
message(STATUS "gRPC: PYTHON_PLUGIN=${gRPC_PYTHON_PLUGIN}")
endif()
endif()

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54443)
SET(VERSION_REVISION 54444)
SET(VERSION_MAJOR 20)
SET(VERSION_MINOR 12)
SET(VERSION_MINOR 13)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH c53725fb1f846fda074347607ab582fbb9c6f7a1)
SET(VERSION_DESCRIBE v20.12.1.1-prestable)
SET(VERSION_STRING 20.12.1.1)
SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471)
SET(VERSION_DESCRIBE v20.13.1.1-prestable)
SET(VERSION_STRING 20.13.1.1)
# end of autochange

View File

@ -12,13 +12,7 @@ set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# Minimal supported SDK version
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.15")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.15")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mmacosx-version-min=10.15")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.15")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -mmacosx-version-min=10.15")
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
# Global libraries

View File

@ -1,3 +1,4 @@
# Needed when using Apache Avro serialization format
option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES})
if (NOT ENABLE_AVRO)

View File

@ -37,8 +37,8 @@ if(NOT USE_INTERNAL_GRPC_LIBRARY)
if(NOT gRPC_INCLUDE_DIRS OR NOT gRPC_LIBRARIES)
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system gRPC library")
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
elseif(NOT _gRPC_CPP_PLUGIN)
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grcp_cpp_plugin")
elseif(NOT gRPC_CPP_PLUGIN)
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grpc_cpp_plugin")
set(EXTERNAL_GRPC_LIBRARY_FOUND 0)
else()
set(EXTERNAL_GRPC_LIBRARY_FOUND 1)
@ -53,8 +53,8 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
else()
set(gRPC_LIBRARIES grpc grpc++)
endif()
set(_gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
set(_gRPC_PROTOC_EXECUTABLE $<TARGET_FILE:protobuf::protoc>)
set(gRPC_CPP_PLUGIN $<TARGET_FILE:grpc_cpp_plugin>)
set(gRPC_PYTHON_PLUGIN $<TARGET_FILE:grpc_python_plugin>)
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")
@ -62,4 +62,4 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY)
set(USE_GRPC 1)
endif()
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${_gRPC_CPP_PLUGIN}")
message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${gRPC_CPP_PLUGIN}")

View File

@ -1,3 +1,5 @@
# Needed when securely connecting to an external server, e.g.
# clickhouse-client --host ... --secure
option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
if(NOT ENABLE_SSL)

View File

@ -23,7 +23,7 @@ option (WEVERYTHING "Enable -Weverything option with some exceptions." ON)
# Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
# Only in release build because debug has too large stack frames.
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE))
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang"))
add_warning(frame-larger-than=32768)
endif ()

View File

@ -66,10 +66,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
add_subdirectory (libfarmhash)
endif ()
if (USE_INTERNAL_BTRIE_LIBRARY)
add_subdirectory (libbtrie)
endif ()
if (USE_INTERNAL_ZLIB_LIBRARY)
set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")

1
contrib/abseil-cpp vendored Submodule

@ -0,0 +1 @@
Subproject commit 4f3b686f86c3ebaba7e4e926e62a79cb1c659a54

2
contrib/cctz vendored

@ -1 +1 @@
Subproject commit 7a2db4ece6e0f1b246173cbdb62711ae258ee841
Subproject commit 260ba195ef6c489968bae8c88c62a67cdac5ff9d

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit a6570b863cf76c9699580ba51c7827d5bffaac43
Subproject commit 7436366ceb341ba5c00ea29f1645e02a2b70bf93

View File

@ -1,6 +1,7 @@
set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
# Use re2 from ClickHouse contrib, not from gRPC third_party.
if(NOT RE2_INCLUDE_DIR)
message(FATAL_ERROR " grpc: The location of the \"re2\" library is unknown")
endif()
@ -8,6 +9,7 @@ set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_RE2_INCLUDE_DIR "${RE2_INCLUDE_DIR}")
set(_gRPC_RE2_LIBRARIES "${RE2_LIBRARY}")
# Use zlib from ClickHouse contrib, not from gRPC third_party.
if(NOT ZLIB_INCLUDE_DIRS)
message(FATAL_ERROR " grpc: The location of the \"zlib\" library is unknown")
endif()
@ -15,6 +17,7 @@ set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_INCLUDE_DIRS}")
set(_gRPC_ZLIB_LIBRARIES "${ZLIB_LIBRARIES}")
# Use protobuf from ClickHouse contrib, not from gRPC third_party.
if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY)
message(FATAL_ERROR " grpc: The location of the \"protobuf\" library is unknown")
elseif (NOT Protobuf_PROTOC_EXECUTABLE)
@ -29,21 +32,33 @@ set(_gRPC_PROTOBUF_PROTOC "protoc")
set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}")
set(_gRPC_PROTOBUF_PROTOC_LIBRARIES "${Protobuf_PROTOC_LIBRARY}")
# Use OpenSSL from ClickHouse contrib, not from gRPC third_party.
set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR})
set(_gRPC_SSL_LIBRARIES ${OPENSSL_LIBRARIES})
# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party.
set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt")
message(FATAL_ERROR " grpc: submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
endif()
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
# Choose to build static or shared library for c-ares.
if (MAKE_STATIC_LIBRARIES)
set(CARES_STATIC ON CACHE BOOL "" FORCE)
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
else ()
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
set(CARES_SHARED ON CACHE BOOL "" FORCE)
endif ()
# We don't want to build C# extensions.
set(gRPC_BUILD_CSHARP_EXT OFF)
# We don't want to build abseil tests, so we temporarily switch BUILD_TESTING off.
set(_gRPC_ORIG_BUILD_TESTING ${BUILD_TESTING})
set(BUILD_TESTING OFF)
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
set(BUILD_TESTING ${_gRPC_ORIG_BUILD_TESTING})
# The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes,
# so we need to redefine it back.
include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake")

View File

@ -1,6 +0,0 @@
add_library(btrie
src/btrie.c
include/btrie.h
)
target_include_directories (btrie SYSTEM PUBLIC include)

View File

@ -1,23 +0,0 @@
Copyright (c) 2013, CobbLiu
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,160 +0,0 @@
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
#include <stdlib.h>
#include <stdint.h>
/**
* In btrie, each leaf means one bit in ip tree.
* Left means 0, and right means 1.
*/
#define BTRIE_NULL (uintptr_t) -1
#if !defined(BTRIE_MAX_PAGES)
/// 54 ip per page. 8 bytes memory per page when empty
#define BTRIE_MAX_PAGES 1024 * 2048 /// 128m ips , ~16mb ram when empty
// #define BTRIE_MAX_PAGES 1024 * 65535 /// 4g ips (whole ipv4), ~512mb ram when empty
#endif
typedef struct btrie_node_s btrie_node_t;
struct btrie_node_s {
btrie_node_t *right;
btrie_node_t *left;
btrie_node_t *parent;
uintptr_t value;
};
typedef struct btrie_s {
btrie_node_t *root;
btrie_node_t *free; /* free list of btrie */
char *start;
size_t size;
/*
* memory pool.
* memory management(esp free) will be so easy by using this facility.
*/
char *pools[BTRIE_MAX_PAGES];
size_t len;
} btrie_t;
/**
* Create an empty btrie
*
* @Return:
* An ip radix_tree created.
* NULL if creation failed.
*/
btrie_t *btrie_create();
/**
* Destroy the ip radix_tree
*
* @Return:
* OK if deletion succeed.
* ERROR if error occurs while deleting.
*/
int btrie_destroy(btrie_t *tree);
/**
* Count the nodes in the radix tree.
*/
size_t btrie_count(btrie_t *tree);
/**
* Return the allocated number of bytes.
*/
size_t btrie_allocated(btrie_t *tree);
/**
* Add an ipv4 into btrie
*
* @Args:
* key: ip address
* mask: key's mask
* value: value of this IP, may be NULL.
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
uintptr_t value);
/**
* Delete an ipv4 from btrie
*
* @Args:
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
/**
* Find an ipv4 from btrie
*
* @Args:
*
* @Return:
* Value if succeed.
* NULL if failed.
*/
uintptr_t btrie_find(btrie_t *tree, uint32_t key);
/**
* Add an ipv6 into btrie
*
* @Args:
* key: ip address
* mask: key's mask
* value: value of this IP, may be NULL.
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
uintptr_t value);
/**
* Delete an ipv6 from btrie
*
* @Args:
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
/**
* Find an ipv6 from btrie
*
* @Args:
*
* @Return:
* Value if succeed.
* NULL if failed.
*/
uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
#if defined (__cplusplus)
}
#endif

View File

@ -1,460 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include <btrie.h>
#define PAGE_SIZE 4096
static btrie_node_t *
btrie_alloc(btrie_t *tree)
{
btrie_node_t *p;
if (tree->free) {
p = tree->free;
tree->free = tree->free->right;
return p;
}
if (tree->size < sizeof(btrie_node_t)) {
tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
if (tree->start == NULL) {
return NULL;
}
tree->pools[tree->len++] = tree->start;
tree->size = PAGE_SIZE;
}
p = (btrie_node_t *) tree->start;
tree->start += sizeof(btrie_node_t);
tree->size -= sizeof(btrie_node_t);
return p;
}
btrie_t *
btrie_create()
{
btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
if (tree == NULL) {
return NULL;
}
tree->free = NULL;
tree->start = NULL;
tree->size = 0;
memset(tree->pools, 0, sizeof(btrie_t *) * BTRIE_MAX_PAGES);
tree->len = 0;
tree->root = btrie_alloc(tree);
if (tree->root == NULL) {
return NULL;
}
tree->root->right = NULL;
tree->root->left = NULL;
tree->root->parent = NULL;
tree->root->value = BTRIE_NULL;
return tree;
}
static size_t
subtree_weight(btrie_node_t *node)
{
size_t weight = 1;
if (node->left) {
weight += subtree_weight(node->left);
}
if (node->right) {
weight += subtree_weight(node->right);
}
return weight;
}
size_t
btrie_count(btrie_t *tree)
{
if (tree->root == NULL) {
return 0;
}
return subtree_weight(tree->root);
}
size_t
btrie_allocated(btrie_t *tree)
{
return tree->len * PAGE_SIZE;
}
int
btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
uintptr_t value)
{
uint32_t bit;
btrie_node_t *node, *next;
bit = 0x80000000;
node = tree->root;
next = tree->root;
while (bit & mask) {
if (key & bit) {
next = node->right;
} else {
next = node->left;
}
if (next == NULL) {
break;
}
bit >>= 1;
node = next;
}
if (next) {
if (node->value != BTRIE_NULL) {
return -1;
}
node->value = value;
return 0;
}
while (bit & mask) {
next = btrie_alloc(tree);
if (next == NULL) {
return -1;
}
next->right = NULL;
next->left = NULL;
next->parent = node;
next->value = BTRIE_NULL;
if (key & bit) {
node->right = next;
} else {
node->left = next;
}
bit >>= 1;
node = next;
}
node->value = value;
return 0;
}
int
btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
{
uint32_t bit;
btrie_node_t *node;
bit = 0x80000000;
node = tree->root;
while (node && (bit & mask)) {
if (key & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
}
if (node == NULL) {
return -1;
}
if (node->right || node->left) {
if (node->value != BTRIE_NULL) {
node->value = BTRIE_NULL;
return 0;
}
return -1;
}
for ( ;; ) {
if (node->parent->right == node) {
node->parent->right = NULL;
} else {
node->parent->left = NULL;
}
node->right = tree->free;
tree->free = node;
node = node->parent;
if (node->right || node->left) {
break;
}
if (node->value != BTRIE_NULL) {
break;
}
if (node->parent == NULL) {
break;
}
}
return 0;
}
uintptr_t
btrie_find(btrie_t *tree, uint32_t key)
{
uint32_t bit;
uintptr_t value;
btrie_node_t *node;
bit = 0x80000000;
value = BTRIE_NULL;
node = tree->root;
while (node) {
if (node->value != BTRIE_NULL) {
value = node->value;
}
if (key & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
}
return value;
}
int
btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
uintptr_t value)
{
uint8_t bit;
unsigned int i;
btrie_node_t *node, *next;
i = 0;
bit = 0x80;
node = tree->root;
next = tree->root;
while (bit & mask[i]) {
if (key[i] & bit) {
next = node->right;
} else {
next = node->left;
}
if (next == NULL) {
break;
}
bit >>= 1;
node = next;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
if (next) {
if (node->value != BTRIE_NULL) {
return -1;
}
node->value = value;
return 0;
}
while (bit & mask[i]) {
next = btrie_alloc(tree);
if (next == NULL) {
return -1;
}
next->right = NULL;
next->left = NULL;
next->parent = node;
next->value = BTRIE_NULL;
if (key[i] & bit) {
node->right = next;
} else {
node->left = next;
}
bit >>= 1;
node = next;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
node->value = value;
return 0;
}
int
btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
{
uint8_t bit;
unsigned int i;
btrie_node_t *node;
i = 0;
bit = 0x80;
node = tree->root;
while (node && (bit & mask[i])) {
if (key[i] & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
if (node == NULL) {
return -1;
}
if (node->right || node->left) {
if (node->value != BTRIE_NULL) {
node->value = BTRIE_NULL;
return 0;
}
return -1;
}
for ( ;; ) {
if (node->parent->right == node) {
node->parent->right = NULL;
} else {
node->parent->left = NULL;
}
node->right = tree->free;
tree->free = node;
node = node->parent;
if (node->right || node->left) {
break;
}
if (node->value != BTRIE_NULL) {
break;
}
if (node->parent == NULL) {
break;
}
}
return 0;
}
uintptr_t
btrie_find_a6(btrie_t *tree, const uint8_t *key)
{
uint8_t bit;
uintptr_t value;
unsigned int i;
btrie_node_t *node;
i = 0;
bit = 0x80;
value = BTRIE_NULL;
node = tree->root;
while (node) {
if (node->value != BTRIE_NULL) {
value = node->value;
}
if (key[i] & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
if (bit == 0) {
i++;
bit = 0x80;
}
}
return value;
}
int
btrie_destroy(btrie_t *tree)
{
size_t i;
/* free memory pools */
for (i = 0; i < tree->len; i++) {
free(tree->pools[i]);
}
free(tree);
return 0;
}

View File

@ -1,103 +0,0 @@
#include <stdio.h>
#include <btrie.h>
int main()
{
btrie_t *it;
int ret;
uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
it = btrie_create();
if (it == NULL) {
printf("create error!\n");
return 0;
}
//add 101.45.69.50/16
ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
if (ret != 0) {
printf("insert 1 error.\n");
goto error;
}
//add 10.45.69.50/16
ret = btrie_insert(it, 170738994, 0xffff0000, 1);
if (ret != 0) {
printf("insert 2 error.\n");
goto error;
}
//add 10.45.79.50/16
ret = btrie_insert(it, 170741554, 0xffff0000, 1);
if (ret == 0) {
printf("insert 3 error.\n");
goto error;
}
//add 102.45.79.50/24
ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
if (ret != 0) {
printf("insert 4 error.\n");
goto error;
}
ret = btrie_find(it, 170741554);
if (ret == 1) {
printf("test case 1 passed\n");
} else {
printf("test case 1 error\n");
}
ret = btrie_find(it, 170786817);
if (ret != 1) {
printf("test case 2 passed\n");
} else {
printf("test case 2 error\n");
}
ret = btrie_delete(it, 1714245426, 0xffffff00);
if (ret != 0) {
printf("delete 1 error\n");
goto error;
}
ret = btrie_find(it, 1714245426);
if (ret != 1) {
printf("test case 3 passed\n");
} else {
printf("test case 3 error\n");
}
//add dead:beef::/32
ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
if (ret != 0) {
printf("insert 5 error\n");
goto error;
}
ret = btrie_find_a6(it, ip_v6);
if (ret == 1) {
printf("test case 4 passed\n");
} else {
printf("test case 4 error\n");
}
// insert 4m ips
for (size_t ip = 1; ip < 1024 * 1024 * 4; ++ip) {
ret = btrie_insert(it, ip, 0xffffffff, 1);
if (ret != 0) {
printf("insert 5 error (%d) (%zu) .\n", ret, ip);
goto error;
}
}
return 0;
error:
btrie_destroy(it);
printf("test failed\n");
return 1;
}

View File

@ -22,7 +22,16 @@ set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-st
set(LIBUNWIND_ASM_SOURCES
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S
${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S)
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1]
# Workaround these two issues by compiling as C.
#
# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771
if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19)
set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C)
else()
enable_language(ASM)
endif()
set(LIBUNWIND_SOURCES
${LIBUNWIND_CXX_SOURCES}

2
contrib/protobuf vendored

@ -1 +1 @@
Subproject commit 445d1ae73a450b1e94622e7040989aa2048402e3
Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 963314ffd681596ef2738a95249fe4c1163ef87a
Subproject commit 35d8e36ef1b8e3e0759ca81215f855226a0a54bd

View File

@ -347,8 +347,9 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_reader.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc
${ROCKSDB_SOURCE_DIR}/db/builder.cc
${ROCKSDB_SOURCE_DIR}/db/c.cc
@ -394,6 +395,8 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc
${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc
${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc
${ROCKSDB_SOURCE_DIR}/db/output_validator.cc
${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc
${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc
${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc
${ROCKSDB_SOURCE_DIR}/db/repair.cc
@ -451,12 +454,12 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc
${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc
${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc
${ROCKSDB_SOURCE_DIR}/monitoring/stats_dump_scheduler.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc
${ROCKSDB_SOURCE_DIR}/options/cf_options.cc
${ROCKSDB_SOURCE_DIR}/options/configurable.cc
${ROCKSDB_SOURCE_DIR}/options/db_options.cc
${ROCKSDB_SOURCE_DIR}/options/options.cc
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
@ -507,6 +510,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc
${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc
${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc
${ROCKSDB_SOURCE_DIR}/table/table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
@ -515,6 +519,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc
${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.12.1.1) unstable; urgency=low
clickhouse (20.13.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 05 Nov 2020 21:52:47 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 23 Nov 2020 10:29:24 +0300

View File

@ -67,26 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
fi
is_running()
{
pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
}
wait_for_done()
{
timeout=$1
attempts=0
while is_running; do
attempts=$(($attempts + 1))
if [ -n "$timeout" ] && [ $attempts -gt $timeout ]; then
return 1
fi
sleep 1
done
}
die()
{
echo $1 >&2
@ -105,49 +85,7 @@ check_config()
initdb()
{
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path")
if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then
die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}";
fi
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
else
CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR
fi
if ! getent passwd ${CLICKHOUSE_USER} >/dev/null; then
echo "Can't chown to non-existing user ${CLICKHOUSE_USER}"
return
fi
if ! getent group ${CLICKHOUSE_GROUP} >/dev/null; then
echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}"
return
fi
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then
echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]"
fi
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then
if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") = "/" ]; then
echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown."
else
if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then
echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
fi
echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}"
fi
fi
if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then
echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]"
chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/*
echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]"
chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
fi
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
@ -171,17 +109,7 @@ restart()
forcestop()
{
local EXIT_STATUS
EXIT_STATUS=0
echo -n "Stop forcefully $PROGRAM service: "
kill -KILL $(cat "$CLICKHOUSE_PIDFILE")
wait_for_done
echo "DONE"
return $EXIT_STATUS
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
}
@ -261,16 +189,16 @@ main()
service_or_func restart
;;
condstart)
is_running || service_or_func start
service_or_func start
;;
condstop)
is_running && service_or_func stop
service_or_func stop
;;
condrestart)
is_running && service_or_func restart
service_or_func restart
;;
condreload)
is_running && service_or_func restart
service_or_func restart
;;
initdb)
initdb
@ -293,17 +221,7 @@ main()
status()
{
if is_running; then
echo "$PROGRAM service is running"
exit 0
else
if is_cron_disabled; then
echo "$PROGRAM service is stopped";
else
echo "$PROGRAM: process unexpectedly terminated"
fi
exit 3
fi
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
}

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.12.1.*
ARG version=20.13.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -56,6 +56,7 @@ RUN apt-get update \
libprotoc-dev \
libgrpc++-dev \
protobuf-compiler-grpc \
libc-ares-dev \
rapidjson-dev \
libsnappy-dev \
libparquet-dev \

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.12.1.*
ARG version=20.13.1.*
ARG gosu_ver=1.10
RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.12.1.*
ARG version=20.13.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build
ENV OUTPUT_DIR=/output
ENV IGNORE='.*contrib.*'
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \
RUN apt-get update && apt-get install cmake --yes --no-install-recommends
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \
dpkg -i /package_folder/clickhouse-common-static_*.deb; \
llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR}

View File

@ -15,6 +15,9 @@ stage=${stage:-}
# empty parameter.
read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}"
# Run only matching tests.
FASTTEST_FOCUS=${FASTTEST_FOCUS:-""}
FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}")
FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}")
FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}")
@ -101,223 +104,248 @@ function start_server
function clone_root
{
git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
(
cd "$FASTTEST_SOURCE"
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
git checkout FETCH_HEAD
echo 'Clonned merge head'
else
git fetch
git checkout "$COMMIT_SHA"
echo 'Checked out to commit'
fi
else
if [ -v COMMIT_SHA ]; then
git checkout "$COMMIT_SHA"
fi
fi
)
(
cd "$FASTTEST_SOURCE"
if [ "$PULL_REQUEST_NUMBER" != "0" ]; then
if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then
git checkout FETCH_HEAD
echo 'Clonned merge head'
else
git fetch
git checkout "$COMMIT_SHA"
echo 'Checked out to commit'
fi
else
if [ -v COMMIT_SHA ]; then
git checkout "$COMMIT_SHA"
fi
fi
)
}
function clone_submodules
{
(
cd "$FASTTEST_SOURCE"
(
cd "$FASTTEST_SOURCE"
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring contrib/miniselect contrib/xz)
SUBMODULES_TO_UPDATE=(
contrib/boost
contrib/zlib-ng
contrib/libxml2
contrib/poco
contrib/libunwind
contrib/ryu
contrib/fmtlib
contrib/base64
contrib/cctz
contrib/libcpuid
contrib/double-conversion
contrib/libcxx
contrib/libcxxabi
contrib/libc-headers
contrib/lz4
contrib/zstd
contrib/fastops
contrib/rapidjson
contrib/re2
contrib/sparsehash-c11
contrib/croaring
contrib/miniselect
contrib/xz
)
git submodule sync
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
git submodule foreach git reset --hard
git submodule foreach git checkout @ -f
git submodule foreach git clean -xfd
)
git submodule sync
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
git submodule foreach git reset --hard
git submodule foreach git checkout @ -f
git submodule foreach git clean -xfd
)
}
function run_cmake
{
CMAKE_LIBS_CONFIG=(
"-DENABLE_LIBRARIES=0"
"-DENABLE_TESTS=0"
"-DENABLE_UTILS=0"
"-DENABLE_EMBEDDED_COMPILER=0"
"-DENABLE_THINLTO=0"
"-DUSE_UNWIND=1"
)
CMAKE_LIBS_CONFIG=(
"-DENABLE_LIBRARIES=0"
"-DENABLE_TESTS=0"
"-DENABLE_UTILS=0"
"-DENABLE_EMBEDDED_COMPILER=0"
"-DENABLE_THINLTO=0"
"-DUSE_UNWIND=1"
)
# TODO remove this? we don't use ccache anyway. An option would be to download it
# from S3 simultaneously with cloning.
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
export CCACHE_NOHASHDIR=true
export CCACHE_COMPILERCHECK=content
export CCACHE_MAXSIZE=15G
# TODO remove this? we don't use ccache anyway. An option would be to download it
# from S3 simultaneously with cloning.
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
export CCACHE_NOHASHDIR=true
export CCACHE_COMPILERCHECK=content
export CCACHE_MAXSIZE=15G
ccache --show-stats ||:
ccache --zero-stats ||:
ccache --show-stats ||:
ccache --zero-stats ||:
mkdir "$FASTTEST_BUILD" ||:
mkdir "$FASTTEST_BUILD" ||:
(
cd "$FASTTEST_BUILD"
cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
)
(
cd "$FASTTEST_BUILD"
cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt"
)
}
function build
{
(
cd "$FASTTEST_BUILD"
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
fi
ccache --show-stats ||:
)
(
cd "$FASTTEST_BUILD"
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
fi
ccache --show-stats ||:
)
}
function configure
{
clickhouse-client --version
clickhouse-test --help
clickhouse-client --version
clickhouse-test --help
mkdir -p "$FASTTEST_DATA"{,/client-config}
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
# doesn't support SSL
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
mkdir -p "$FASTTEST_DATA"{,/client-config}
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
# doesn't support SSL
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
}
function run_tests
{
clickhouse-server --version
clickhouse-test --help
clickhouse-server --version
clickhouse-test --help
# Kill the server in case we are running locally and not in docker
stop_server ||:
start_server
TESTS_TO_SKIP=(
00105_shard_collations
00109_shard_totals_after_having
00110_external_sort
00302_http_compression
00417_kill_query
00436_convert_charset
00490_special_line_separators_and_characters_outside_of_bmp
00652_replicated_mutations_zookeeper
00682_empty_parts_merge
00701_rollup
00834_cancel_http_readonly_queries_on_client_close
00911_tautological_compare
00926_multimatch
00929_multi_match_edit_distance
01031_mutations_interpreter_and_context
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
01083_expressions_in_engine_arguments
01092_memory_profiler
01098_msgpack_format
01098_temporary_and_external_tables
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
01193_metadata_loading
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01251_dict_is_in_infinite_loop
01259_dictionary_custom_settings_ddl
01268_dictionary_direct_layout
01280_ssd_complex_key_dictionary
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01318_encrypt # Depends on OpenSSL
01318_decrypt # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
01305_replica_create_drop_zookeeper
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
_orc_
arrow
avro
base64
brotli
capnproto
client
ddl_dictionaries
h3
hashing
hdfs
java_hash
json
limit_memory
live_view
memory_leak
memory_limit
mysql
odbc
parallel_alter
parquet
protobuf
secure
sha256
xz
# Not sure why these two fail even in sequential mode. Disabled for now
# to make some progress.
00646_url_engine
00974_query_profiler
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
01504_rocksdb
# Look at DistributedFilesToInsert, so cannot run in parallel.
01460_DistributedFilesToInsert
01541_max_memory_usage_for_user
# Require python libraries like scipy, pandas and numpy
01322_ttest_scipy
01545_system_errors
# Checks system.errors
01563_distributed_query_finish
)
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
# substr is to remove semicolon after test name
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
# We will rerun sequentially any tests that have failed during parallel run.
# They might have failed because there was some interference from other tests
# running concurrently. If they fail even in seqential mode, we will report them.
# FIXME All tests that require exclusive access to the server must be
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
# run them in a separate group after all other tests. This is faster and also
# explicit instead of guessing.
if [[ -n "${FAILED_TESTS[*]}" ]]
then
# Kill the server in case we are running locally and not in docker
stop_server ||:
# Clean the data so that there is no interference from the previous test run.
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
start_server
echo "Going to run again: ${FAILED_TESTS[*]}"
TESTS_TO_SKIP=(
00105_shard_collations
00109_shard_totals_after_having
00110_external_sort
00302_http_compression
00417_kill_query
00436_convert_charset
00490_special_line_separators_and_characters_outside_of_bmp
00652_replicated_mutations_zookeeper
00682_empty_parts_merge
00701_rollup
00834_cancel_http_readonly_queries_on_client_close
00911_tautological_compare
00926_multimatch
00929_multi_match_edit_distance
01031_mutations_interpreter_and_context
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
01083_expressions_in_engine_arguments
01092_memory_profiler
01098_msgpack_format
01098_temporary_and_external_tables
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
01193_metadata_loading
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01251_dict_is_in_infinite_loop
01259_dictionary_custom_settings_ddl
01268_dictionary_direct_layout
01280_ssd_complex_key_dictionary
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01318_encrypt # Depends on OpenSSL
01318_decrypt # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
01305_replica_create_drop_zookeeper
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
_orc_
arrow
avro
base64
brotli
capnproto
client
ddl_dictionaries
h3
hashing
hdfs
java_hash
json
limit_memory
live_view
memory_leak
memory_limit
mysql
odbc
parallel_alter
parquet
protobuf
secure
sha256
xz
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
else
echo "No failed tests"
fi
# Not sure why these two fail even in sequential mode. Disabled for now
# to make some progress.
00646_url_engine
00974_query_profiler
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
01504_rocksdb
# Look at DistributedFilesToInsert, so cannot run in parallel.
01460_DistributedFilesToInsert
01541_max_memory_usage_for_user
# Require python libraries like scipy, pandas and numpy
01322_ttest_scipy
01561_mann_whitney_scipy
01545_system_errors
# Checks system.errors
01563_distributed_query_finish
)
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
# substr is to remove semicolon after test name
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
# We will rerun sequentially any tests that have failed during parallel run.
# They might have failed because there was some interference from other tests
# running concurrently. If they fail even in seqential mode, we will report them.
# FIXME All tests that require exclusive access to the server must be
# explicitly marked as `sequential`, and `clickhouse-test` must detect them and
# run them in a separate group after all other tests. This is faster and also
# explicit instead of guessing.
if [[ -n "${FAILED_TESTS[*]}" ]]
then
stop_server ||:
# Clean the data so that there is no interference from the previous test run.
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
start_server
echo "Going to run again: ${FAILED_TESTS[*]}"
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
else
echo "No failed tests"
fi
}
case "$stage" in

View File

@ -7,4 +7,4 @@ services:
MYSQL_ROOT_PASSWORD: clickhouse
ports:
- 3308:3306
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency

View File

@ -0,0 +1,10 @@
version: '2.3'
services:
mysql1:
image: mysql:5.7
restart: 'no'
environment:
MYSQL_ROOT_PASSWORD: clickhouse
ports:
- 3308:3306
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency

View File

@ -2,7 +2,7 @@ version: '2.3'
services:
mysql8_0:
image: mysql:8.0
restart: always
restart: 'no'
environment:
MYSQL_ROOT_PASSWORD: clickhouse
ports:

View File

@ -25,12 +25,13 @@ RUN apt-get update \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
rsync \
tree \
tzdata \
vim \
wget \
&& pip3 --no-cache-dir install clickhouse_driver scipy \
&& pip3 --no-cache-dir install 'git+https://github.com/mymarilyn/clickhouse-driver.git' scipy \
&& apt-get purge --yes python3-dev g++ \
&& apt-get autoremove --yes \
&& apt-get clean \

View File

@ -14,10 +14,12 @@ import string
import sys
import time
import traceback
import logging
import xml.etree.ElementTree as et
from threading import Thread
from scipy import stats
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds
@ -46,6 +48,8 @@ parser.add_argument('--profile-seconds', type=int, default=0, help='For how many
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
args = parser.parse_args()
reportStageEnd('start')
@ -139,44 +143,37 @@ reportStageEnd('before-connect')
# Open connections
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
all_connections = [clickhouse_driver.Client(**server) for server in servers]
# Force settings_is_important to fail queries on unknown settings.
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
for i, s in enumerate(servers):
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
reportStageEnd('connect')
# Run drop queries, ignoring errors. Do this before all other activity, because
# clickhouse_driver disconnects on error (this is not configurable), and the new
# connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
try:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
except:
pass
if not args.use_existing_tables:
# Run drop queries, ignoring errors. Do this before all other activity,
# because clickhouse_driver disconnects on error (this is not configurable),
# and the new connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
try:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
except:
pass
reportStageEnd('drop-1')
reportStageEnd('drop-1')
# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
# that is not in master, but the queries can still run. If we have multiple
# settings and one of them throws an exception, all previous settings for this
# connection will be reset, because the driver reconnects on error (not
# configurable). So the end result is uncertain, but hopefully we'll be able to
# run at least some queries.
settings = root.findall('settings/*')
for conn_index, c in enumerate(all_connections):
for s in settings:
try:
q = f"set {s.tag} = '{s.text}'"
c.execute(q)
print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
except:
print(traceback.format_exc(), file=sys.stderr)
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
# (https://github.com/mymarilyn/clickhouse-driver/pull/142)
c.settings[s.tag] = s.text
reportStageEnd('settings')
@ -194,37 +191,40 @@ for t in tables:
reportStageEnd('preconditions')
# Run create and fill queries. We will run them simultaneously for both servers,
# to save time.
# The weird search is to keep the relative order of elements, which matters, and
# etree doesn't support the appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')]
create_queries = substitute_parameters(create_query_templates)
if not args.use_existing_tables:
# Run create and fill queries. We will run them simultaneously for both
# servers, to save time. The weird XML search + filter is because we want to
# keep the relative order of elements, and etree doesn't support the
# appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*')
if q.tag in ('create_query', 'fill_query')]
create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on errors,
# and temporary tables are destroyed. We want to be able to continue after some
# errors.
for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'",
file = sys.stderr)
sys.exit(1)
# Disallow temporary tables, because the clickhouse_driver reconnects on
# errors, and temporary tables are destroyed. We want to be able to continue
# after some errors.
for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'",
file = sys.stderr)
sys.exit(1)
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
threads = [Thread(target = do_create, args = (connection, index, create_queries))
for index, connection in enumerate(all_connections)]
threads = [
Thread(target = do_create, args = (connection, index, create_queries))
for index, connection in enumerate(all_connections)]
for t in threads:
t.start()
for t in threads:
t.start()
for t in threads:
t.join()
for t in threads:
t.join()
reportStageEnd('create')
reportStageEnd('create')
# By default, test all queries.
queries_to_run = range(0, len(test_queries))
@ -403,10 +403,11 @@ print(f'profile-total\t{profile_total_seconds}')
reportStageEnd('run')
# Run drop queries
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
if not args.keep_created_tables and not args.use_existing_tables:
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
reportStageEnd('drop-2')
reportStageEnd('drop-2')

View File

@ -10,6 +10,11 @@ RUN apt-get update --yes \
gpg-agent \
debsig-verify \
strace \
protobuf-compiler \
protobuf-compiler-grpc \
libprotoc-dev \
libgrpc++-dev \
libc-ares-dev \
--yes --no-install-recommends
#RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add -
@ -33,7 +38,8 @@ RUN set -x \
&& dpkg -i "${PKG_VERSION}.deb"
CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF && ninja re2_st \
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \
&& ninja re2_st clickhouse_grpc_protos \
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \
plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log

View File

@ -1,12 +1,12 @@
# docker build -t yandex/clickhouse-stateful-test-with-coverage .
FROM yandex/clickhouse-stateless-test
FROM yandex/clickhouse-stateless-test-with-coverage
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
python3-requests
python3-requests procps psmisc
COPY s3downloader /s3downloader
COPY run.sh /run.sh

View File

@ -1,40 +1,44 @@
#!/bin/bash
kill_clickhouse () {
kill "$(pgrep -u clickhouse)" 2>/dev/null
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
pkill -f "clickhouse-server" 2>/dev/null
for _ in {1..10}
for _ in {1..120}
do
if ! kill -0 "$(pgrep -u clickhouse)"; then
echo "No clickhouse process"
break
else
echo "Process $(pgrep -u clickhouse) still alive"
sleep 10
fi
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
sleep 1
done
if pkill -0 -f "clickhouse-server"
then
pstree -apgT
jobs
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
return 1
fi
}
start_clickhouse () {
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
}
wait_llvm_profdata () {
while kill -0 "$(pgrep llvm-profdata-10)"
counter=0
until clickhouse-client --query "SELECT 1"
do
echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive"
sleep 3
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
sleep 0.5
counter=$((counter + 1))
done
}
merge_client_files_in_background () {
client_files=$(ls /client_*profraw 2>/dev/null)
if [ -n "$client_files" ]
then
llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw"
rm "$client_files"
fi
}
chmod 777 /
@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/
# install test configs
/usr/share/clickhouse-test/config/install.sh
function start()
{
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
timeout 120 service clickhouse-server start
sleep 0.5
counter=$((counter + 1))
done
}
start
start_clickhouse
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
if ! /s3downloader --dataset-names $DATASETS; then
@ -81,25 +66,20 @@ fi
chmod 777 -R /var/lib/clickhouse
while /bin/true; do
merge_client_files_in_background
sleep 2
done &
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DATABASE test"
kill_clickhouse
start_clickhouse
sleep 10
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
@ -109,15 +89,10 @@ fi
# more idiologically correct.
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
kill_clickhouse
wait_llvm_profdata
sleep 3
wait_llvm_profdata # 100% merged all parts
cp /*.profraw /profraw ||:

View File

@ -1,4 +1,4 @@
# docker build -t yandex/clickhouse-stateless-with-coverage-test .
# docker build -t yandex/clickhouse-stateless-test-with-coverage .
# TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs)
FROM yandex/clickhouse-test-base
@ -28,7 +28,9 @@ RUN apt-get update -y \
lsof \
unixodbc \
wget \
qemu-user-static
qemu-user-static \
procps \
psmisc
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \

View File

@ -2,27 +2,41 @@
kill_clickhouse () {
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
kill "$(pgrep -u clickhouse)" 2>/dev/null
pkill -f "clickhouse-server" 2>/dev/null
for _ in {1..10}
for _ in {1..120}
do
if ! kill -0 "$(pgrep -u clickhouse)"; then
echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S'
break
else
echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S'
sleep 10
fi
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
sleep 1
done
echo "Will try to send second kill signal for sure"
kill "$(pgrep -u clickhouse)" 2>/dev/null
sleep 5
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
if pkill -0 -f "clickhouse-server"
then
pstree -apgT
jobs
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
return 1
fi
}
start_clickhouse () {
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
sleep 0.5
counter=$((counter + 1))
done
}
chmod 777 /
@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/
start_clickhouse
sleep 10
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi

View File

@ -13,9 +13,9 @@ cmake .. \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DCLICKHOUSE_SPLIT_BINARY=ON \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \
-DENABLE_TESTS=OFF
```

View File

@ -17,7 +17,6 @@ toc_title: Third-Party Libraries Used
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -273,13 +273,15 @@ SELECT
sum(Duration) AS Duration
FROM UAct
GROUP BY UserID
```text
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┐
│ 4324182021466249494 │ 6 │ 185 │
└─────────────────────┴───────────┴──────────┘
```
``` sqk
``` sql
select count() FROM UAct
```

View File

@ -53,6 +53,42 @@ Example of setting the addresses of the ZooKeeper cluster:
</zookeeper>
```
ClickHouse also supports to store replicas meta information in the auxiliary ZooKeeper cluster by providing ZooKeeper cluster name and path as engine arguments.
In other word, it supports to store the metadata of differnt tables in different ZooKeeper clusters.
Example of setting the addresses of the auxiliary ZooKeeper cluster:
``` xml
<auxiliary_zookeepers>
<zookeeper2>
<node index="1">
<host>example_2_1</host>
<port>2181</port>
</node>
<node index="2">
<host>example_2_2</host>
<port>2181</port>
</node>
<node index="3">
<host>example_2_3</host>
<port>2181</port>
</node>
</zookeeper2>
<zookeeper3>
<node index="1">
<host>example_3_1</host>
<port>2181</port>
</node>
</zookeeper3>
</auxiliary_zookeepers>
```
To store table datameta in a auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with
ReplicatedMergeTree engine as follow:
```
CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('zookeeper_name_configured_in_auxiliary_zookeepers:path', 'replica_name') ...
```
You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table).
If ZooKeeper isnt set in the config file, you cant create replicated tables, and any existing replicated tables will be read-only.
@ -152,7 +188,7 @@ You can specify default arguments for `Replicated` table engine in the server co
```xml
<default_replica_path>/clickhouse/tables/{shard}/{database}/{table}</default_replica_path>
<default_replica_name>{replica}</default_replica_path>
<default_replica_name>{replica}</default_replica_name>
```
In this case, you can omit arguments when creating tables:

View File

@ -11,7 +11,7 @@ By going through this tutorial, youll learn how to set up a simple ClickHouse
## Single Node Setup {#single-node-setup}
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do not support them.
For example, you have chosen `deb` packages and executed:

View File

@ -5,7 +5,7 @@ toc_title: Overview
# What Is ClickHouse? {#what-is-clickhouse}
ClickHouse is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).
ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).
In a “normal” row-oriented DBMS, data is stored in this order:

View File

@ -111,7 +111,7 @@ toc_title: Adopters
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |

View File

@ -44,11 +44,10 @@ stages, such as query planning or distributed queries.
To be useful, the tracing information has to be exported to a monitoring system
that supports OpenTelemetry, such as Jaeger or Prometheus. ClickHouse avoids
a dependency on a particular monitoring system, instead only
providing the tracing data conforming to the standard. A natural way to do so
in an SQL RDBMS is a system table. OpenTelemetry trace span information
a dependency on a particular monitoring system, instead only providing the
tracing data through a system table. OpenTelemetry trace span information
[required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span)
is stored in the system table called `system.opentelemetry_span_log`.
is stored in the `system.opentelemetry_span_log` table.
The table must be enabled in the server configuration, see the `opentelemetry_span_log`
element in the default config file `config.xml`. It is enabled by default.
@ -67,3 +66,31 @@ The table has the following columns:
The tags or attributes are saved as two parallel arrays, containing the keys
and values. Use `ARRAY JOIN` to work with them.
## Integration with monitoring systems
At the moment, there is no ready tool that can export the tracing data from
ClickHouse to a monitoring system.
For testing, it is possible to setup the export using a materialized view with the URL engine over the `system.opentelemetry_span_log` table, which would push the arriving log data to an HTTP endpoint of a trace collector. For example, to push the minimal span data to a Zipkin instance running at `http://localhost:9411`, in Zipkin v2 JSON format:
```sql
CREATE MATERIALIZED VIEW default.zipkin_spans
ENGINE = URL('http://127.0.0.1:9411/api/v2/spans', 'JSONEachRow')
SETTINGS output_format_json_named_tuples_as_objects = 1,
output_format_json_array_of_rows = 1 AS
SELECT
lower(hex(reinterpretAsFixedString(trace_id))) AS traceId,
lower(hex(parent_span_id)) AS parentId,
lower(hex(span_id)) AS id,
operation_name AS name,
start_time_us AS timestamp,
finish_time_us - start_time_us AS duration,
cast(tuple('clickhouse'), 'Tuple(serviceName text)') AS localEndpoint,
cast(tuple(
attribute.values[indexOf(attribute.names, 'db.statement')]),
'Tuple("db.statement" text)') AS tags
FROM system.opentelemetry_span_log
```
In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive.

View File

@ -2317,4 +2317,10 @@ Possible values:
Default value: `1`.
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format.
Default value: `\N`.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -0,0 +1,70 @@
# system.replicated_fetches {#system_tables-replicated_fetches}
Contains information about currently running background fetches.
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database.
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table.
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started.
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1.
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches.
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition.
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part.
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part.
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica.
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica.
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica.
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme.
- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier.
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
**Example**
``` sql
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
database: default
table: t
elapsed: 7.243039876
progress: 0.41832135995612835
result_part_name: all_0_0_0
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
partition_id: all
total_size_bytes_compressed: 1052783726
bytes_read_compressed: 440401920
source_replica_path: /clickhouse/test/t/replicas/1
source_replica_hostname: node1
source_replica_port: 9009
interserver_scheme: http
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
to_detached: 0
thread_id: 54
```
**See Also**
- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->

View File

@ -1,42 +1,42 @@
# ClickHouse obfuscator
Simple tool for table data obfuscation.
It reads input table and produces output table, that retain some properties of input, but contains different data.
It allows to publish almost real production data for usage in benchmarks.
It is designed to retain the following properties of data:
- cardinalities of values (number of distinct values) for every column and for every tuple of columns;
- conditional cardinalities: number of distinct values of one column under condition on value of another column;
- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats;
- probability distributions of length of strings;
- probability of zero values of numbers; empty strings and arrays, NULLs;
- data compression ratio when compressed with LZ77 and entropy family of codecs;
- continuity (magnitude of difference) of time values across table; continuity of floating point values.
- date component of DateTime values;
- UTF-8 validity of string values;
- string values continue to look somewhat natural.
Most of the properties above are viable for performance testing:
reading data, filtering, aggregation and sorting will work at almost the same speed
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed.
Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret.
It use some cryptographic primitives to transform data, but from the cryptographic point of view,
It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it.
It may retain some data you don't want to publish.
It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data.
For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value.
So, the user will be able to count exact ratio of mobile traffic.
Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address.
If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others,
It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them.
And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters.
This tool works fine only with reasonable amount of data (at least 1000s of rows).
# ClickHouse obfuscator
A simple tool for table data obfuscation.
It reads an input table and produces an output table, that retains some properties of input, but contains different data.
It allows publishing almost real production data for usage in benchmarks.
It is designed to retain the following properties of data:
- cardinalities of values (number of distinct values) for every column and every tuple of columns;
- conditional cardinalities: number of distinct values of one column under the condition on the value of another column;
- probability distributions of the absolute value of integers; the sign of signed integers; exponent and sign for floats;
- probability distributions of the length of strings;
- probability of zero values of numbers; empty strings and arrays, `NULL`s;
- data compression ratio when compressed with LZ77 and entropy family of codecs;
- continuity (magnitude of difference) of time values across the table; continuity of floating-point values;
- date component of `DateTime` values;
- UTF-8 validity of string values;
- string values look natural.
Most of the properties above are viable for performance testing:
reading data, filtering, aggregatio, and sorting will work at almost the same speed
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.
Some transformations are one to one and could be reversed, so you need to have a large seed and keep it in secret.
It uses some cryptographic primitives to transform data but from the cryptographic point of view, it doesn't do it properly, that is why you should not consider the result as secure unless you have another reason. The result may retain some data you don't want to publish.
It always leaves 0, 1, -1 numbers, dates, lengths of arrays, and null flags exactly as in source data.
For example, you have a column `IsMobile` in your table with values 0 and 1. In transformed data, it will have the same value.
So, the user will be able to count the exact ratio of mobile traffic.
Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address.
If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them.
You should look at the working algorithm of this tool works, and fine-tune its command line parameters.
This tool works fine only with an average amount of data (at least 1000s of rows).

View File

@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
└────────┘
```
The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`.
Now you can use the `groupArray` function to create an array from the `y` column:
``` sql

View File

@ -4,4 +4,59 @@ toc_priority: 5
# avg {#agg_function-avg}
Calculates the average. Only works for numbers. The result is always Float64.
Calculates the arithmetic mean.
**Syntax**
``` sql
avgWeighted(x)
```
**Parameter**
- `x` — Values.
`x` must be
[Integer](../../../sql-reference/data-types/int-uint.md),
[floating-point](../../../sql-reference/data-types/float.md), or
[Decimal](../../../sql-reference/data-types/decimal.md).
**Returned value**
- `NaN` if the supplied parameter is empty.
- Mean otherwise.
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT avg(x) FROM values('x Int8', 0, 1, 2, 3, 4, 5)
```
Result:
``` text
┌─avg(x)─┐
│ 2.5 │
└────────┘
```
**Example**
Query:
``` sql
CREATE table test (t UInt8) ENGINE = Memory;
SELECT avg(t) FROM test
```
Result:
``` text
┌─avg(x)─┐
│ nan │
└────────┘
```

View File

@ -14,17 +14,21 @@ avgWeighted(x, weight)
**Parameters**
- `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
- `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md).
- `x` — Values.
- `weight` — Weights of the values.
Type of `x` and `weight` must be the same.
`x` and `weight` must both be
[Integer](../../../sql-reference/data-types/int-uint.md),
[floating-point](../../../sql-reference/data-types/float.md), or
[Decimal](../../../sql-reference/data-types/decimal.md),
but may have different types.
**Returned value**
- Weighted mean.
- `NaN`. If all the weights are equal to 0.
- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty.
- Weighted mean otherwise.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
**Example**
@ -42,3 +46,54 @@ Result:
│ 8 │
└────────────────────────┘
```
**Example**
Query:
``` sql
SELECT avgWeighted(x, w)
FROM values('x Int8, w Float64', (4, 1), (1, 0), (10, 2))
```
Result:
``` text
┌─avgWeighted(x, weight)─┐
│ 8 │
└────────────────────────┘
```
**Example**
Query:
``` sql
SELECT avgWeighted(x, w)
FROM values('x Int8, w Int8', (0, 0), (1, 0), (10, 0))
```
Result:
``` text
┌─avgWeighted(x, weight)─┐
│ nan │
└────────────────────────┘
```
**Example**
Query:
``` sql
CREATE table test (t UInt8) ENGINE = Memory;
SELECT avgWeighted(t) FROM test
```
Result:
``` text
┌─avgWeighted(x, weight)─┐
│ nan │
└────────────────────────┘
```

View File

@ -0,0 +1,37 @@
---
toc_priority: 150
---
## initializeAggregation {#initializeaggregation}
Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`.
Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`.
**Syntax**
``` sql
initializeAggregation (aggregate_function, column_1, column_2);
```
**Parameters**
- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
**Returned value(s)**
Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument.
For example for functions with the suffix `State` the return type will be `AggregateFunction`.
**Example**
Query:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
```
Result:
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘

View File

@ -0,0 +1,53 @@
## rankCorr {#agg_function-rankcorr}
Computes a rank correlation coefficient.
**Syntax**
``` sql
rankCorr(x, y)
```
**Parameters**
- `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
- `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Returned value(s)**
- Returns a rank correlation coefficient of the ranks of x and y. The value of the correlation coefficient ranges from -1 to +1. If less than two arguments are passed, the function will return an exception. The value close to +1 denotes a high linear relationship, and with an increase of one random variable, the second random variable also increases. The value close to -1 denotes a high linear relationship, and with an increase of one random variable, the second random variable decreases. The value close or equal to 0 denotes no relationship between the two random variables.
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Example**
Query:
``` sql
SELECT rankCorr(number, number) FROM numbers(100);
```
Result:
``` text
┌─rankCorr(number, number)─┐
│ 1 │
└──────────────────────────┘
```
Query:
``` sql
SELECT roundBankers(rankCorr(exp(number), sin(number)), 3) FROM numbers(100);
```
Result:
``` text
┌─roundBankers(rankCorr(exp(number), sin(number)), 3)─┐
│ -0.037 │
└─────────────────────────────────────────────────────┘
```
**See Also**
- [Spearman's rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)

View File

@ -67,9 +67,8 @@ Leap seconds are not accounted for.
## toUnixTimestamp {#to-unix-timestamp}
For DateTime argument: converts value to its internal numeric representation (Unix Timestamp).
For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
For Date argument: the behaviour is unspecified.
For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
For String argument: converts the input string to the datetime according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
**Syntax**
@ -535,18 +534,7 @@ dateDiff('unit', startdate, enddate, [timezone])
- `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
Supported values:
| unit |
| ---- |
|second |
|minute |
|hour |
|day |
|week |
|month |
|quarter |
|year |
Supported values: second, minute, hour, day, week, month, quarter, year.
- `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).

View File

@ -15,10 +15,18 @@ A function that allows grouping multiple columns.
For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function.
Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples cant be written to a table.
**See Also**
- [Tuple](../../sql-reference/functions/tuple-functions.md#tuple)
## tupleElement(tuple, n), operator x.N {#tupleelementtuple-n-operator-x-n}
A function that allows getting a column from a tuple.
N is the column index, starting from 1. N must be a constant. N must be a constant. N must be a strict postive integer no greater than the size of the tuple.
There is no cost to execute the function.
**See Also**
- [TupleElement](../../sql-reference/functions/tuple-functions.md#tupleelement)
[Original article](https://clickhouse.tech/docs/en/query_language/functions/in_functions/) <!--hide-->

View File

@ -1,5 +1,5 @@
---
toc_priority: 66
toc_priority: 67
toc_title: Other
---

View File

@ -0,0 +1,118 @@
---
toc_priority: 66
toc_title: Tuples
---
# Functions for Working with Tuples {#tuple-functions}
## Tuple {#tuple}
A function that allows grouping multiple columns.
For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function.
Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples cant be written to a table.
**Syntax**
``` sql
tuple(x, y, …)
```
**See Also**
- [Operator (x, y, …)](../../sql-reference/functions/in-functions.md#tuplex-y-operator-x-y)
## TupleElement {#tupleelement}
A function that allows getting a column from a tuple.
N is the column index, starting from 1. N must be a constant. N must be a constant. N must be a strict postive integer no greater than the size of the tuple.
There is no cost to execute the function.
**Syntax**
``` sql
tupleElement(tuple, n)
```
**See Also**
- [Operator x.N](../../sql-reference/functions/in-functions.md#tupleelementtuple-n-operator-x-n)
## Untuple {#untuple}
Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location.
**Syntax**
``` sql
untuple(x)
```
You can use the `EXCEPT` expression to skip columns as a result of the query.
**Parameters**
- `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- None.
**Examples**
Input table:
``` text
┌─key─┬─v1─┬─v2─┬─v3─┬─v4─┬─v5─┬─v6────────┐
│ 1 │ 10 │ 20 │ 40 │ 30 │ 15 │ (33,'ab') │
│ 2 │ 25 │ 65 │ 70 │ 40 │ 6 │ (44,'cd') │
│ 3 │ 57 │ 30 │ 20 │ 10 │ 5 │ (55,'ef') │
│ 4 │ 55 │ 12 │ 7 │ 80 │ 90 │ (66,'gh') │
│ 5 │ 30 │ 50 │ 70 │ 25 │ 55 │ (77,'kl') │
└─────┴────┴────┴────┴────┴────┴───────────┘
```
Example of using a `Tuple`-type column as the `untuple` function parameter:
Query:
``` sql
SELECT untuple(v6) FROM kv;
```
Result:
``` text
┌─_ut_1─┬─_ut_2─┐
│ 33 │ ab │
│ 44 │ cd │
│ 55 │ ef │
│ 66 │ gh │
│ 77 │ kl │
└───────┴───────┘
```
Example of using an `EXCEPT` expression:
Query:
``` sql
SELECT untuple((* EXCEPT (v2, v3),)) FROM kv;
```
Result:
``` text
┌─key─┬─v1─┬─v4─┬─v5─┬─v6────────┐
│ 1 │ 10 │ 30 │ 15 │ (33,'ab') │
│ 2 │ 25 │ 40 │ 6 │ (44,'cd') │
│ 3 │ 57 │ 10 │ 5 │ (55,'ef') │
│ 4 │ 55 │ 80 │ 90 │ (66,'gh') │
│ 5 │ 30 │ 25 │ 55 │ (77,'kl') │
└─────┴────┴────┴────┴───────────┘
```
**See Also**
- [Tuple](../../sql-reference/data-types/tuple.md)
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-functions/) <!--hide-->

View File

@ -115,7 +115,21 @@ Returns the “first significant subdomain”. This is a non-standard concept sp
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above).
For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
For example:
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`.
- `cutToFirstSignificantSubdomain('tr') = ''`.
### cutToFirstSignificantSubdomainWithWWW {#cuttofirstsignificantsubdomainwithwww}
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www".
For example:
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`.
- `cutToFirstSignificantSubdomain('tr') = ''`.
### port(URL\[, default_port = 0\]) {#port}

View File

@ -27,9 +27,9 @@ It is applicable when selecting data from tables that use the [MergeTree](../../
### Drawbacks {#drawbacks}
Queries that use `FINAL` are executed not as fast as similar queries that dont, because:
Queries that use `FINAL` are executed slightly slower than similar queries that dont, because:
- Query is executed in a single thread and data is merged during query execution.
- Data is merged during query execution.
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havet happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##}

View File

@ -6,7 +6,7 @@ toc_title: GROUP BY
`GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows:
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”.
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”.
- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.
@ -45,6 +45,154 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v
If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value.
## WITH ROLLUP Modifier {#with-rollup-modifier}
`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table.
The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression.
In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line.
!!! note "Note"
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
**Example**
Consider the table t:
```text
┌─year─┬─month─┬─day─┐
│ 2019 │ 1 │ 5 │
│ 2019 │ 1 │ 15 │
│ 2020 │ 1 │ 5 │
│ 2020 │ 1 │ 15 │
│ 2020 │ 10 │ 5 │
│ 2020 │ 10 │ 15 │
└──────┴───────┴─────┘
```
Query:
```sql
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
```
As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left:
- `GROUP BY year, month, day`;
- `GROUP BY year, month` (and `day` column is filled with zeros);
- `GROUP BY year` (now `month, day` columns are both filled with zeros);
- and totals (and all three key expression columns are zeros).
```text
┌─year─┬─month─┬─day─┬─count()─┐
│ 2020 │ 10 │ 15 │ 1 │
│ 2020 │ 1 │ 5 │ 1 │
│ 2019 │ 1 │ 5 │ 1 │
│ 2020 │ 1 │ 15 │ 1 │
│ 2019 │ 1 │ 15 │ 1 │
│ 2020 │ 10 │ 5 │ 1 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2019 │ 1 │ 0 │ 2 │
│ 2020 │ 1 │ 0 │ 2 │
│ 2020 │ 10 │ 0 │ 2 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2019 │ 0 │ 0 │ 2 │
│ 2020 │ 0 │ 0 │ 4 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 0 │ 0 │ 6 │
└──────┴───────┴─────┴─────────┘
```
## WITH CUBE Modifier {#with-cube-modifier}
`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table.
In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line.
!!! note "Note"
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
**Example**
Consider the table t:
```text
┌─year─┬─month─┬─day─┐
│ 2019 │ 1 │ 5 │
│ 2019 │ 1 │ 15 │
│ 2020 │ 1 │ 5 │
│ 2020 │ 1 │ 15 │
│ 2020 │ 10 │ 5 │
│ 2020 │ 10 │ 15 │
└──────┴───────┴─────┘
```
Query:
```sql
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
```
As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations:
- `GROUP BY year, month, day`
- `GROUP BY year, month`
- `GROUP BY year, day`
- `GROUP BY year`
- `GROUP BY month, day`
- `GROUP BY month`
- `GROUP BY day`
- and totals.
Columns, excluded from `GROUP BY`, are filled with zeros.
```text
┌─year─┬─month─┬─day─┬─count()─┐
│ 2020 │ 10 │ 15 │ 1 │
│ 2020 │ 1 │ 5 │ 1 │
│ 2019 │ 1 │ 5 │ 1 │
│ 2020 │ 1 │ 15 │ 1 │
│ 2019 │ 1 │ 15 │ 1 │
│ 2020 │ 10 │ 5 │ 1 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2019 │ 1 │ 0 │ 2 │
│ 2020 │ 1 │ 0 │ 2 │
│ 2020 │ 10 │ 0 │ 2 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2020 │ 0 │ 5 │ 2 │
│ 2019 │ 0 │ 5 │ 1 │
│ 2020 │ 0 │ 15 │ 2 │
│ 2019 │ 0 │ 15 │ 1 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2019 │ 0 │ 0 │ 2 │
│ 2020 │ 0 │ 0 │ 4 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 1 │ 5 │ 2 │
│ 0 │ 10 │ 15 │ 1 │
│ 0 │ 10 │ 5 │ 1 │
│ 0 │ 1 │ 15 │ 2 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 1 │ 0 │ 4 │
│ 0 │ 10 │ 0 │ 2 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 0 │ 5 │ 3 │
│ 0 │ 0 │ 15 │ 3 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 0 │ 0 │ 6 │
└──────┴───────┴─────┴─────────┘
```
## WITH TOTALS Modifier {#with-totals-modifier}
If the `WITH TOTALS` modifier is specified, another row will be calculated. This row will have key columns containing default values (zeros or empty lines), and columns of aggregate functions with the values calculated across all the rows (the “total” values).
@ -88,8 +236,6 @@ SELECT
FROM hits
```
However, in contrast to standard SQL, if the table doesnt have any rows (either there arent any at all, or there arent any after using WHERE to filter), an empty result is returned, and not the result from one of the rows containing the initial values of aggregate functions.
As opposed to MySQL (and conforming to standard SQL), you cant get some value of some column that is not in a key or aggregate function (except constant expressions). To work around this, you can use the any aggregate function (get the first encountered value) or min/max.
Example:
@ -105,10 +251,6 @@ GROUP BY domain
For every different key value encountered, `GROUP BY` calculates a set of aggregate function values.
`GROUP BY` is not supported for array columns.
A constant cant be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`.
## Implementation Details {#implementation-details}
Aggregation is one of the most important features of a column-oriented DBMS, and thus its implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.

View File

@ -20,12 +20,12 @@ SELECT [DISTINCT] expr_list
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
[PREWHERE expr]
[WHERE expr]
[GROUP BY expr_list] [WITH TOTALS]
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m] [WITH TIES]
[UNION ALL ...]
[UNION ...]
[INTO OUTFILE filename]
[FORMAT format]
```
@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li
- [SELECT clause](#select-clause)
- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md)
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
- [UNION ALL clause](../../../sql-reference/statements/select/union-all.md)
- [UNION clause](../../../sql-reference/statements/select/union-all.md)
- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md)
- [FORMAT clause](../../../sql-reference/statements/select/format.md)
@ -159,4 +159,111 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN
For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation.
{## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##}
## SELECT modifiers {#select-modifiers}
You can use the following modifiers in `SELECT` queries.
### APPLY {#apply-modifier}
Allows you to invoke some function for each row returned by an outer table expression of a query.
**Syntax:**
``` sql
SELECT <expr> APPLY( <func> ) FROM [db.]table_name
```
**Example:**
``` sql
CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
SELECT * APPLY(sum) FROM columns_transformers;
```
```
┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
│ 220 │ 18 │ 347 │
└────────┴────────┴────────┘
```
### EXCEPT {#except-modifier}
Specifies the names of one or more columns to exclude from the result. All matching column names are omitted from the output.
**Syntax:**
``` sql
SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
```
**Example:**
``` sql
SELECT * EXCEPT (i) from columns_transformers;
```
```
┌──j─┬───k─┐
│ 10 │ 324 │
│ 8 │ 23 │
└────┴─────┘
```
### REPLACE {#replace-modifier}
Specifies one or more [expression aliases](../../../sql-reference/syntax.md#syntax-expression_aliases). Each alias must match a column name from the `SELECT *` statement. In the output column list, the column that matches the alias is replaced by the expression in that `REPLACE`.
This modifier does not change the names or order of columns. However, it can change the value and the value type.
**Syntax:**
``` sql
SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
```
**Example:**
``` sql
SELECT * REPLACE(i + 1 AS i) from columns_transformers;
```
```
┌───i─┬──j─┬───k─┐
│ 101 │ 10 │ 324 │
│ 121 │ 8 │ 23 │
└─────┴────┴─────┘
```
### Modifier Combinations {#modifier-combinations}
You can use each modifier separately or combine them.
**Examples:**
Using the same modifier multiple times.
``` sql
SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
```
```
┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
│ 2 │ 3 │
└──────────────────────────┴──────────────────────────┘
```
Using multiple modifiers in a single query.
``` sql
SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
```
```
┌─sum(plus(i, 1))─┬─sum(k)─┐
│ 222 │ 347 │
└─────────────────┴────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
<!--hide-->

View File

@ -1,5 +1,5 @@
---
toc_title: UNION ALL
toc_title: UNION
---
# UNION ALL Clause {#union-all-clause}
@ -25,10 +25,13 @@ Type casting is performed for unions. For example, if two queries being combined
Queries that are parts of `UNION ALL` cant be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
## Limitations {#limitations}
# UNION DISTINCT Clause {#union-distinct-clause}
The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`.
# UNION Clause {#union-clause}
By default, `UNION` has the same behavior as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception.
Only `UNION ALL` is supported. The regular `UNION` (`UNION DISTINCT`) is not supported. If you need `UNION DISTINCT`, you can write `SELECT DISTINCT` from a subquery containing `UNION ALL`.
## Implementation Details {#implementation-details}
Queries that are parts of `UNION ALL` can be run simultaneously, and their results can be mixed together.
Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together.

View File

@ -19,7 +19,6 @@ toc_title: Bibliotecas de terceros utilizadas
| Más información | [Licencia de 3 cláusulas BSD](https://github.com/google/googletest/blob/master/LICENSE) |
| H3 | [Licencia Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [Licencia de 3 cláusulas BSD](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [Licencia BSD de 2 cláusulas](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Licencia Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [Información adicional](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -21,7 +21,6 @@ toc_title: "\u06A9\u062A\u0627\u0628\u062E\u0627\u0646\u0647 \u0647\u0627\u06CC
| googletest | [لیسانس 3 بند](https://github.com/google/googletest/blob/master/LICENSE) |
| اچ 3 | [نمایی مجوز 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [لیسانس 3 بند](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| لیبتری | [لیسانس 2 بند](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| شکنجه نوجوان | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| لیبیدوید | [مجوز زلب](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| نوشیدن شراب | [الجی پی ال2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -19,7 +19,6 @@ toc_title: "Biblioth\xE8ques Tierces Utilis\xE9es"
| googletest | [Licence BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Licence Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [Licence BSD 3-Clause](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [Licence BSD 2-Clause](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Licence Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -20,7 +20,6 @@ toc_title: "\u30B5\u30FC\u30C9\u30D1\u30FC\u30C6\u30A3\u88FD\u30E9\u30A4\u30D6\u
| googletest | [BSD3条項ライセンス](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Apacheライセンス2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD3条項ライセンス](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD2条項ライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlibライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -18,7 +18,6 @@ toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
| h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -2187,4 +2187,10 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x);
Значение по умолчанию: `1`.
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV.
Значение по умолчанию: `\N`.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

View File

@ -0,0 +1,70 @@
# system.replicated_fetches {#system_tables-replicated_fetches}
Содержит информацию о выполняемых в данный момент фоновых операциях скачивания кусков данных с других реплик.
Столбцы:
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — время, прошедшее от момента начала скачивания куска, в секундах.
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — доля выполненной работы от 0 до 1.
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — имя скачиваемого куска.
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к скачиваемому куску.
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции.
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в скачиваемом куске в байтах.
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатой информации, считанной из скачиваемого куска, в байтах.
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к исходной реплике.
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста исходной реплики.
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — номер порта исходной реплики.
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — имя межсерверной схемы.
- `URI` ([String](../../sql-reference/data-types/string.md)) — универсальный идентификатор ресурса.
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на использование выражения `TO DETACHED` в текущих фоновых операциях.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор потока.
**Пример**
``` sql
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
database: default
table: t
elapsed: 7.243039876
progress: 0.41832135995612835
result_part_name: all_0_0_0
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
partition_id: all
total_size_bytes_compressed: 1052783726
bytes_read_compressed: 440401920
source_replica_path: /clickhouse/test/t/replicas/1
source_replica_hostname: node1
source_replica_port: 9009
interserver_scheme: http
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
to_detached: 0
thread_id: 54
```
**Смотрите также**
- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated)
[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->

View File

@ -0,0 +1,43 @@
# Обфускатор ClickHouse
Простой инструмент для обфускации табличных данных.
Он считывает данные входной таблицы и создает выходную таблицу, которая сохраняет некоторые свойства входных данных, но при этом содержит другие данные.
Это позволяет публиковать практически реальные данные и использовать их в тестах на производительность.
Обфускатор предназначен для сохранения следующих свойств данных:
- кардинальность (количество уникальных данных) для каждого столбца и каждого кортежа столбцов;
- условная кардинальность: количество уникальных данных одного столбца в соответствии со значением другого столбца;
- вероятностные распределения абсолютного значения целых чисел; знак числа типа Int; показатель степени и знак для чисел с плавающей запятой;
- вероятностное распределение длины строк;
- вероятность нулевых значений чисел; пустые строки и массивы, `NULL`;
- степень сжатия данных алгоритмом LZ77 и семейством энтропийных кодеков;
- непрерывность (величина разницы) значений времени в таблице; непрерывность значений с плавающей запятой;
- дату из значений `DateTime`;
- кодировка UTF-8 значений строки;
- строковые значения выглядят естественным образом.
Большинство перечисленных выше свойств пригодны для тестирования производительности. Чтение данных, фильтрация, агрегирование и сортировка будут работать почти с той же скоростью, что и исходные данные, благодаря сохраненной кардинальности, величине, степени сжатия и т. д.
Он работает детерминированно. Вы задаёте значение инициализатора, а преобразование полностью определяется входными данными и инициализатором.
Некоторые преобразования выполняются один к одному, и их можно отменить. Поэтому нужно использовать большое значение инициализатора и хранить его в секрете.
Обфускатор использует некоторые криптографические примитивы для преобразования данных, но, с криптографической точки зрения, результат будет небезопасным. В нем могут сохраниться данные, которые не следует публиковать.
Он всегда оставляет без изменений числа 0, 1, -1, даты, длины массивов и нулевые флаги.
Например, если у вас есть столбец `IsMobile` в таблице со значениями 0 и 1, то в преобразованных данных он будет иметь то же значение.
Таким образом, пользователь сможет посчитать точное соотношение мобильного трафика.
Давайте рассмотрим случай, когда у вас есть какие-то личные данные в таблице (например, электронная почта пользователя), и вы не хотите их публиковать.
Если ваша таблица достаточно большая и содержит несколько разных электронных почтовых адресов, и ни один из них не встречается часто, то обфускатор полностью анонимизирует все данные. Но, если у вас есть небольшое количество разных значений в столбце, он может скопировать некоторые из них.
В этом случае вам следует посмотреть на алгоритм работы инструмента и настроить параметры командной строки.
Обфускатор полезен в работе со средним объемом данных (не менее 1000 строк).

View File

@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
└────────┘
```
Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`.
Теперь с помощью функции `groupArray` сформируем массив из столбца `y`:
``` sql

View File

@ -0,0 +1,40 @@
---
toc_priority: 150
---
## initializeAggregation {#initializeaggregation}
Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`.
Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`.
**Синтаксис**
``` sql
initializeAggregation (aggregate_function, column_1, column_2);
```
**Параметры**
- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string).
- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`.
Пример:
Возвращаемый тип функций с суффиксом `State``AggregateFunction`.
**Пример**
Запрос:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
```
Результат:
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘

View File

@ -0,0 +1,53 @@
## rankCorr {#agg_function-rankcorr}
Вычисляет коэффициент ранговой корреляции.
**Синтаксис**
``` sql
rankCorr(x, y)
```
**Параметры**
- `x` — Произвольное значение. [Float32](../../../sql-reference/data-types/float.md#float32-float64) или [Float64](../../../sql-reference/data-types/float.md#float32-float64).
- `y` — Произвольное значение. [Float32](../../../sql-reference/data-types/float.md#float32-float64) или [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Возвращаемое значение**
- Возвращает коэффициент ранговой корреляции рангов x и y. Значение коэффициента корреляции изменяется в пределах от -1 до +1. Если передается менее двух аргументов, функция возвращает исключение. Значение, близкое к +1, указывает на высокую линейную зависимость, и с увеличением одной случайной величины увеличивается и вторая случайная величина. Значение, близкое к -1, указывает на высокую линейную зависимость, и с увеличением одной случайной величины вторая случайная величина уменьшается. Значение, близкое или равное 0, означает отсутствие связи между двумя случайными величинами.
Тип: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Пример**
Запрос:
``` sql
SELECT rankCorr(number, number) FROM numbers(100);
```
Результат:
``` text
┌─rankCorr(number, number)─┐
│ 1 │
└──────────────────────────┘
```
Запрос:
``` sql
SELECT roundBankers(rankCorr(exp(number), sin(number)), 3) FROM numbers(100);
```
Результат:
``` text
┌─roundBankers(rankCorr(exp(number), sin(number)), 3)─┐
│ -0.037 │
└─────────────────────────────────────────────────────┘
```
**Смотрите также**
- [Коэффициент ранговой корреляции Спирмена](https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D1%80%D1%80%D0%B5%D0%BB%D1%8F%D1%86%D0%B8%D1%8F#%D0%9A%D0%BE%D1%8D%D1%84%D1%84%D0%B8%D1%86%D0%B8%D0%B5%D0%BD%D1%82_%D1%80%D0%B0%D0%BD%D0%B3%D0%BE%D0%B2%D0%BE%D0%B9_%D0%BA%D0%BE%D1%80%D1%80%D0%B5%D0%BB%D1%8F%D1%86%D0%B8%D0%B8_%D0%A1%D0%BF%D0%B8%D1%80%D0%BC%D0%B5%D0%BD%D0%B0)

View File

@ -57,32 +57,31 @@ SELECT
## toUnixTimestamp {#to-unix-timestamp}
For DateTime argument: converts value to its internal numeric representation (Unix Timestamp).
For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
For Date argument: the behaviour is unspecified.
Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
Для аргумента String, строка конвертируется в дату и время в соответствии с часовым поясом (необязательный второй аргумент, часовой пояс сервера используется по умолчанию).
**Syntax**
**Синтаксис**
``` sql
toUnixTimestamp(datetime)
toUnixTimestamp(str, [timezone])
```
**Returned value**
**Возвращаемое значение**
- Returns the unix timestamp.
- Возвращает Unix Timestamp.
Type: `UInt32`.
Тип: `UInt32`.
**Example**
**Пример**
Query:
Запрос:
``` sql
SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp
```
Result:
Результат:
``` text
┌─unix_timestamp─┐
@ -490,4 +489,4 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g')
└────────────────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) <!--hide-->

View File

@ -15,10 +15,18 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит.
Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу.
**Смотрите также**
- [Tuple](../../sql-reference/functions/tuple-functions.md#tuple)
## tupleElement(tuple, n), оператор x.N {#tupleelementtuple-n-operator-x-n}
Функция, позволяющая достать столбец из кортежа.
N - индекс столбца начиная с 1. N должно быть константой. N должно быть целым строго положительным числом не большим размера кортежа.
Выполнение функции ничего не стоит.
**Смотрите также**
- [TupleElement](../../sql-reference/functions/tuple-functions.md#tupleelement)
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/in_functions/) <!--hide-->

View File

@ -0,0 +1,118 @@
---
toc_priority: 68
toc_title: Функции для работы с кортежами
---
# Функции для работы с кортежами {#tuple-functions}
## Tuple {#tuple}
Функция, позволяющая сгруппировать несколько столбцов.
Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит.
Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу.
**Синтаксис**
``` sql
tuple(x, y, …)
```
**Смотрите также**
- [Оператор (x, y, …)](../../sql-reference/functions/in-functions.md#tuplex-y-operator-x-y)
## TupleElement {#tupleelement}
Функция, позволяющая достать столбец из кортежа.
N - индекс столбца начиная с 1. N должно быть константой. N должно быть целым строго положительным числом не большим размера кортежа.
Выполнение функции ничего не стоит.
**Синтаксис**
``` sql
tupleElement(tuple, n)
```
**Смотрите также**
- [Оператор x.N](../../sql-reference/functions/in-functions.md#tupleelementtuple-n-operator-x-n)
## Untuple {#untuple}
Выполняет синтаксическую подстановку элементов [кортежа](../../sql-reference/data-types/tuple.md#tuplet1-t2) в место вызова.
**Синтаксис**
``` sql
untuple(x)
```
Чтобы пропустить некоторые столбцы в результате запроса, вы можете использовать выражение `EXCEPT`.
**Параметры**
- `x` - функция `tuple`, столбец или кортеж элементов. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Нет.
**Примеры**
Входная таблица:
``` text
┌─key─┬─v1─┬─v2─┬─v3─┬─v4─┬─v5─┬─v6────────┐
│ 1 │ 10 │ 20 │ 40 │ 30 │ 15 │ (33,'ab') │
│ 2 │ 25 │ 65 │ 70 │ 40 │ 6 │ (44,'cd') │
│ 3 │ 57 │ 30 │ 20 │ 10 │ 5 │ (55,'ef') │
│ 4 │ 55 │ 12 │ 7 │ 80 │ 90 │ (66,'gh') │
│ 5 │ 30 │ 50 │ 70 │ 25 │ 55 │ (77,'kl') │
└─────┴────┴────┴────┴────┴────┴───────────┘
```
Пример использования столбца типа `Tuple` в качестве параметра функции `untuple`:
Запрос:
``` sql
SELECT untuple(v6) FROM kv;
```
Результат:
``` text
┌─_ut_1─┬─_ut_2─┐
│ 33 │ ab │
│ 44 │ cd │
│ 55 │ ef │
│ 66 │ gh │
│ 77 │ kl │
└───────┴───────┘
```
Пример использования выражения `EXCEPT`:
Запрос:
``` sql
SELECT untuple((* EXCEPT (v2, v3),)) FROM kv;
```
Результат:
``` text
┌─key─┬─v1─┬─v4─┬─v5─┬─v6────────┐
│ 1 │ 10 │ 30 │ 15 │ (33,'ab') │
│ 2 │ 25 │ 40 │ 6 │ (44,'cd') │
│ 3 │ 57 │ 10 │ 5 │ (55,'ef') │
│ 4 │ 55 │ 80 │ 90 │ (66,'gh') │
│ 5 │ 30 │ 25 │ 55 │ (77,'kl') │
└─────┴────┴────┴────┴───────────┘
```
**Смотрите также**
- [Tuple](../../sql-reference/data-types/tuple.md)
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/tuple-functions/) <!--hide-->

View File

@ -27,9 +27,9 @@ toc_title: FROM
### Недостатки {#drawbacks}
Запросы, которые используют `FINAL` выполняются не так быстро, как аналогичные запросы без него, потому что:
Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что:
- Запрос выполняется в одном потоке, и данные мёржатся во время выполнения запроса.
- Данные мёржатся во время выполнения запроса.
- Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе.
**В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##}

View File

@ -43,6 +43,153 @@ toc_title: GROUP BY
Если в `GROUP BY` передать несколько ключей, то в результате мы получим все комбинации выборки, как если бы `NULL` был конкретным значением.
## Модификатор WITH ROLLUP {#with-rollup-modifier}
Модификатор `WITH ROLLUP` применяется для подсчета подытогов для ключевых выражений. При этом учитывается порядок следования ключевых выражений в списке `GROUP BY`. Подытоги подсчитываются в обратном порядке: сначала для последнего ключевого выражения в списке, потом для предпоследнего и так далее вплоть до самого первого ключевого выражения.
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка.
!!! note "Примечание"
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
**Пример**
Рассмотрим таблицу t:
```text
┌─year─┬─month─┬─day─┐
│ 2019 │ 1 │ 5 │
│ 2019 │ 1 │ 15 │
│ 2020 │ 1 │ 5 │
│ 2020 │ 1 │ 15 │
│ 2020 │ 10 │ 5 │
│ 2020 │ 10 │ 15 │
└──────┴───────┴─────┘
```
Запрос:
```sql
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
```
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из четырех таблиц с подытогами, которые как бы "сворачиваются" справа налево:
- `GROUP BY year, month, day`;
- `GROUP BY year, month` (а колонка `day` заполнена нулями);
- `GROUP BY year` (теперь обе колонки `month, day` заполнены нулями);
- и общий итог (все три колонки с ключевыми выражениями заполнены нулями).
```text
┌─year─┬─month─┬─day─┬─count()─┐
│ 2020 │ 10 │ 15 │ 1 │
│ 2020 │ 1 │ 5 │ 1 │
│ 2019 │ 1 │ 5 │ 1 │
│ 2020 │ 1 │ 15 │ 1 │
│ 2019 │ 1 │ 15 │ 1 │
│ 2020 │ 10 │ 5 │ 1 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2019 │ 1 │ 0 │ 2 │
│ 2020 │ 1 │ 0 │ 2 │
│ 2020 │ 10 │ 0 │ 2 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2019 │ 0 │ 0 │ 2 │
│ 2020 │ 0 │ 0 │ 4 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 0 │ 0 │ 6 │
└──────┴───────┴─────┴─────────┘
```
## Модификатор WITH CUBE {#with-cube-modifier}
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.
!!! note "Примечание"
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
**Пример**
Рассмотрим таблицу t:
```text
┌─year─┬─month─┬─day─┐
│ 2019 │ 1 │ 5 │
│ 2019 │ 1 │ 15 │
│ 2020 │ 1 │ 5 │
│ 2020 │ 1 │ 15 │
│ 2020 │ 10 │ 5 │
│ 2020 │ 10 │ 15 │
└──────┴───────┴─────┘
```
Query:
```sql
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
```
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из восьми таблиц с подытогами — по таблице для каждой комбинации ключевых выражений:
- `GROUP BY year, month, day`
- `GROUP BY year, month`
- `GROUP BY year, day`
- `GROUP BY year`
- `GROUP BY month, day`
- `GROUP BY month`
- `GROUP BY day`
- и общий итог.
Колонки, которые не участвуют в `GROUP BY`, заполнены нулями.
```text
┌─year─┬─month─┬─day─┬─count()─┐
│ 2020 │ 10 │ 15 │ 1 │
│ 2020 │ 1 │ 5 │ 1 │
│ 2019 │ 1 │ 5 │ 1 │
│ 2020 │ 1 │ 15 │ 1 │
│ 2019 │ 1 │ 15 │ 1 │
│ 2020 │ 10 │ 5 │ 1 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2019 │ 1 │ 0 │ 2 │
│ 2020 │ 1 │ 0 │ 2 │
│ 2020 │ 10 │ 0 │ 2 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2020 │ 0 │ 5 │ 2 │
│ 2019 │ 0 │ 5 │ 1 │
│ 2020 │ 0 │ 15 │ 2 │
│ 2019 │ 0 │ 15 │ 1 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 2019 │ 0 │ 0 │ 2 │
│ 2020 │ 0 │ 0 │ 4 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 1 │ 5 │ 2 │
│ 0 │ 10 │ 15 │ 1 │
│ 0 │ 10 │ 5 │ 1 │
│ 0 │ 1 │ 15 │ 2 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 1 │ 0 │ 4 │
│ 0 │ 10 │ 0 │ 2 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 0 │ 5 │ 3 │
│ 0 │ 0 │ 15 │ 3 │
└──────┴───────┴─────┴─────────┘
┌─year─┬─month─┬─day─┬─count()─┐
│ 0 │ 0 │ 0 │ 6 │
└──────┴───────┴─────┴─────────┘
```
## Модификатор WITH TOTALS {#with-totals-modifier}
Если указан модификатор `WITH TOTALS`, то будет посчитана ещё одна строчка, в которой в столбцах-ключах будут содержаться значения по умолчанию (нули, пустые строки), а в столбцах агрегатных функций - значения, посчитанные по всем строкам («тотальные» значения).
@ -86,8 +233,6 @@ SELECT
FROM hits
```
Но, в отличие от стандартного SQL, если в таблице нет строк (вообще нет или после фильтрации с помощью WHERE), в качестве результата возвращается пустой результат, а не результат из одной строки, содержащий «начальные» значения агрегатных функций.
В отличие от MySQL (и в соответствии со стандартом SQL), вы не можете получить какое-нибудь значение некоторого столбца, не входящего в ключ или агрегатную функцию (за исключением константных выражений). Для обхода этого вы можете воспользоваться агрегатной функцией any (получить первое попавшееся значение) или min/max.
Пример:
@ -103,10 +248,6 @@ GROUP BY domain
GROUP BY вычисляет для каждого встретившегося различного значения ключей, набор значений агрегатных функций.
Не поддерживается GROUP BY по столбцам-массивам.
Не поддерживается указание констант в качестве аргументов агрегатных функций. Пример: `sum(1)`. Вместо этого, вы можете избавиться от констант. Пример: `count()`.
## Детали реализации {#implementation-details}
Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки.

View File

@ -18,7 +18,7 @@ SELECT [DISTINCT] expr_list
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
[PREWHERE expr]
[WHERE expr]
[GROUP BY expr_list] [WITH TOTALS]
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]

View File

@ -19,7 +19,6 @@ toc_title: "Kullan\u0131lan \xDC\xE7\xFCnc\xFC Taraf K\xFCt\xFCphaneleri"
| googletest | [BSD 3-Clause Lisansı](https://github.com/google/googletest/blob/master/LICENSE) |
| h33 | [Apache Lic 2.0ense 2.0](https://github.com/uber/h3/blob/master/LICENSE) |
| hyperscan | [BSD 3-Clause Lisansı](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD 2-Clause Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2. 1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -11,7 +11,6 @@
| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
| googletest | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE) |
| 超扫描 | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE) |
| libbtrie | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
| libdivide | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) |

View File

@ -6,15 +6,16 @@ toc_title: "\u5BFC\u8A00"
# 示例数据集 {#example-datasets}
本节介绍如何获取示例数据集并将其导入ClickHouse。
本节介绍如何获取示例数据集并将其导入ClickHouse。对于某些数据集还可以使用示例查询。
对于某些数据集示例查询也可用。
- [脱敏的Yandex.Metrica数据集](metrica.md)
- [星型基准测试](star-schema.md)
- [维基访问数据](wikistat.md)
- [Criteo TB级别点击日志](criteo.md)
- [AMPLab大数据基准测试](amplab-benchmark.md)
- [纽约出租车数据](nyc-taxi.md)
- [航班飞行数据](ontime.md)
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -1,17 +1,17 @@
---
toc_priority: 21
toc_title: "Yandex\u6885\u7279\u91CC\u5361\u6570\u636E"
toc_priority: 15
toc_title: Yandex.Metrica Data
---
# 脱敏的Yandex.Metrica数据集 {#anonymized-yandex-metrica-data}
# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data}
Dataset由两个表组成其中包含有关命中的匿名数据 (`hits_v1`)和访问 (`visits_v1`的Yandex的。梅特里卡 你可以阅读更多关于Yandex的。梅特里卡 [ClickHouse历史](../../introduction/history.md) 科
数据集由两个表组成包含关于Yandex.Metrica的hits(`hits_v1`)和visit(`visits_v1`)的匿名数据。你可以阅读更多关于Yandex的信息。在[ClickHouse历史](../../introduction/history.md)的Metrica部分
数据集由两个表组成,其中任何一个都可以作为压缩表下载 `tsv.xz` 文件或作为准备的分区。 除此之外,该扩展版本 `hits` 包含1亿行的表可作为TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz 并作为准备的分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
数据集由两个表组成,他们中的任何一个都可以下载作为一个压缩`tsv.xz`的文件或准备的分区。除此之外,一个扩展版的`hits`表包含1亿行TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz准备分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz。
## 从准备好的分区获取表 {#obtaining-tables-from-prepared-partitions}
下载和导入点击表:
下载和导入`hits`表:
``` bash
curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar
@ -21,7 +21,7 @@ sudo service clickhouse-server restart
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
```
下载和导入访问:
下载和导入`visits`表:
``` bash
curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar
@ -31,9 +31,9 @@ sudo service clickhouse-server restart
clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
```
## 从压缩TSV文件获取表 {#obtaining-tables-from-compressed-tsv-file}
## 从TSV压缩文件获取表 {#obtaining-tables-from-compressed-tsv-file}
压缩的TSV文件下载并导入命中:
TSV压缩文件下载并导入`hits`:
``` bash
curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
@ -47,7 +47,7 @@ clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL"
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
```
从压缩tsv文件下载和导入访问:
从压缩tsv文件下载和导入`visits`:
``` bash
curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv
@ -63,6 +63,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
## 查询示例 {#example-queries}
[点击教程](../../getting-started/tutorial.md) 是基于Yandex的。Metrica数据集和开始使用此数据集的推荐方式是通过教程。
[使用教程](../../getting-started/tutorial.md)是以Yandex.Metrica数据集开始教程。
查询这些表的其他示例可以在 [有状态测试](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) ClickHouse的它们被命名为 `test.hists``test.visits` 那里)
可以在ClickHouse的[stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) 中找到对这些表的查询的其他示例(它们被命名为`test.hists`和`test.visits`)

Some files were not shown because too many files have changed in this diff Show More