mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge branch 'master' into fixes_for_replicated_merge_tree2
This commit is contained in:
commit
ea77ff9022
@ -1,7 +1,7 @@
|
||||
# To run clang-tidy from CMake, build ClickHouse with -DENABLE_CLANG_TIDY=1. To show all warnings, it is
|
||||
# recommended to pass "-k0" to Ninja.
|
||||
|
||||
# Enable all checks + disale selected checks. Feel free to remove disabled checks from below list if
|
||||
# Enable all checks + disable selected checks. Feel free to remove disabled checks from below list if
|
||||
# a) the new check is not controversial (this includes many checks in readability-* and google-*) or
|
||||
# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
|
||||
|
||||
|
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1,3 +1,4 @@
|
||||
contrib/* linguist-vendored
|
||||
*.h linguist-language=C++
|
||||
tests/queries/0_stateless/data_json/* binary
|
||||
tests/queries/0_stateless/*.reference -crlf
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -284,3 +284,6 @@
|
||||
[submodule "contrib/llvm-project"]
|
||||
path = contrib/llvm-project
|
||||
url = https://github.com/ClickHouse/llvm-project.git
|
||||
[submodule "contrib/corrosion"]
|
||||
path = contrib/corrosion
|
||||
url = https://github.com/corrosion-rs/corrosion.git
|
||||
|
@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(ClickHouse LANGUAGES C CXX ASM)
|
||||
|
||||
@ -495,6 +495,14 @@ endif ()
|
||||
|
||||
enable_testing() # Enable for tests without binary
|
||||
|
||||
option(ENABLE_EXTERNAL_OPENSSL "This option is insecure and not recommended for any occasions. If it is enabled, it allows building with alternative OpenSSL library. By default, ClickHouse is using BoringSSL, which is better. Do not use this option." OFF)
|
||||
|
||||
if (ENABLE_EXTERNAL_OPENSSL)
|
||||
message (STATUS "Build and uses OpenSSL library instead of BoringSSL. This is strongly discouraged. Your build of ClickHouse will be unsupported.")
|
||||
set(ENABLE_SSL 1)
|
||||
target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations")
|
||||
endif ()
|
||||
|
||||
# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc
|
||||
if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
|
||||
set (CLICKHOUSE_ETC_DIR "/etc")
|
||||
@ -567,13 +575,17 @@ function (add_native_target)
|
||||
set_property (GLOBAL APPEND PROPERTY NATIVE_BUILD_TARGETS ${ARGV})
|
||||
endfunction (add_native_target)
|
||||
|
||||
set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.")
|
||||
include_directories(${ConfigIncludePath})
|
||||
set(CONFIG_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.")
|
||||
include_directories(${CONFIG_INCLUDE_PATH})
|
||||
|
||||
# Add as many warnings as possible for our own code.
|
||||
include (cmake/warnings.cmake)
|
||||
include (cmake/print_flags.cmake)
|
||||
|
||||
if (ENABLE_RUST)
|
||||
add_subdirectory (rust)
|
||||
endif()
|
||||
|
||||
add_subdirectory (base)
|
||||
add_subdirectory (src)
|
||||
add_subdirectory (programs)
|
||||
|
@ -5,6 +5,7 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
## Useful Links
|
||||
|
||||
* [Official website](https://clickhouse.com/) has a quick high-level overview of ClickHouse on the main page.
|
||||
* [ClickHouse Cloud](https://clickhouse.com/cloud) ClickHouse as a service, built by the creators and maintainers.
|
||||
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
|
||||
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||
@ -15,5 +16,5 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
|
||||
|
||||
## Upcoming events
|
||||
* [**v22.9 Release Webinar**](https://clickhouse.com/company/events/v22-9-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
|
||||
* [**ClickHouse for Analytics @ Barracuda Networks**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/288140358/) Join us for this in person meetup hosted by our friends at Barracuda in Bay Area.
|
||||
* [**v22.10 Release Webinar**](https://clickhouse.com/company/events/v22-10-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
|
||||
* [**Introducing ClickHouse Cloud**](https://clickhouse.com/company/events/cloud-beta) Introducing ClickHouse as a service, built by creators and maintainers of the fastest OLAP database on earth. Join Tanya Bragin for a detailed walkthrough of ClickHouse Cloud capabilities, as well as a peek behind the curtain to understand the unique architecture that makes our service tick.
|
||||
|
@ -23,7 +23,7 @@ namespace
|
||||
{
|
||||
|
||||
/// Trim ending whitespace inplace
|
||||
void trim(String & s)
|
||||
void rightTrim(String & s)
|
||||
{
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end());
|
||||
}
|
||||
@ -441,7 +441,7 @@ LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
|
||||
return (errno != EAGAIN) ? ABORT : RESET_LINE;
|
||||
input = cinput;
|
||||
|
||||
trim(input);
|
||||
rightTrim(input);
|
||||
return INPUT_LINE;
|
||||
}
|
||||
|
||||
@ -512,6 +512,9 @@ void ReplxxLineReader::openInteractiveHistorySearch()
|
||||
/// NOTE: You can use one of the following to configure the behaviour additionally:
|
||||
/// - SKIM_DEFAULT_OPTIONS
|
||||
/// - FZF_DEFAULT_OPTS
|
||||
///
|
||||
/// And also note, that fzf and skim is 95% compatible (at least option
|
||||
/// that is used here)
|
||||
std::string fuzzy_finder_command = fmt::format(
|
||||
"{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}",
|
||||
fuzzy_finder, history_file.getPath(), output_file.getPath());
|
||||
@ -521,7 +524,8 @@ void ReplxxLineReader::openInteractiveHistorySearch()
|
||||
{
|
||||
if (executeCommand(argv) == 0)
|
||||
{
|
||||
const std::string & new_query = readFile(output_file.getPath());
|
||||
std::string new_query = readFile(output_file.getPath());
|
||||
rightTrim(new_query);
|
||||
rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size()));
|
||||
}
|
||||
}
|
||||
|
@ -123,11 +123,15 @@
|
||||
/// - tries to print failed assertion into server log
|
||||
/// It can be used for all assertions except heavy ones.
|
||||
/// Heavy assertions (that run loops or call complex functions) are allowed in debug builds only.
|
||||
/// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds,
|
||||
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
|
||||
#if !defined(chassert)
|
||||
#if defined(ABORT_ON_LOGICAL_ERROR)
|
||||
#define chassert(x) static_cast<bool>(x) ? void(0) : abortOnFailedAssertion(#x)
|
||||
#define UNREACHABLE() abort()
|
||||
#else
|
||||
#define chassert(x) ((void)0)
|
||||
#define UNREACHABLE() __builtin_unreachable()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -142,7 +146,9 @@
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function
|
||||
|
||||
/// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
|
||||
/// Consider adding a comment before using these macros.
|
||||
/// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of
|
||||
/// suppressing them in the whole function
|
||||
/// Consider adding a comment when using these macros.
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }())
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }())
|
||||
|
||||
@ -159,9 +165,9 @@
|
||||
# define TSA_REQUIRES_SHARED(...)
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x)
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x)
|
||||
# define TSA_READ_ONE_THREAD(x)
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x) (x)
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x)
|
||||
# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x)
|
||||
#endif
|
||||
|
||||
/// A template function for suppressing warnings about unused variables or function results.
|
||||
|
@ -1,6 +1,7 @@
|
||||
#if defined(OS_LINUX)
|
||||
# include <sys/syscall.h>
|
||||
#endif
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <base/safeExit.h>
|
||||
#include <base/defines.h>
|
||||
@ -11,7 +12,7 @@
|
||||
/// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately,
|
||||
/// while connection handling threads are still run.
|
||||
(void)syscall(SYS_exit_group, code);
|
||||
__builtin_unreachable();
|
||||
UNREACHABLE();
|
||||
#else
|
||||
_exit(code);
|
||||
#endif
|
||||
|
@ -176,6 +176,249 @@ void __explicit_bzero_chk(void * buf, size_t len, size_t unused)
|
||||
}
|
||||
|
||||
|
||||
#include <unistd.h>
|
||||
#include "syscall.h"
|
||||
|
||||
ssize_t copy_file_range(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, unsigned flags)
|
||||
{
|
||||
return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags);
|
||||
}
|
||||
|
||||
|
||||
long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, unsigned flags)
|
||||
{
|
||||
return syscall(SYS_splice, fd_in, off_in, fd_out, off_out, len, flags);
|
||||
}
|
||||
|
||||
|
||||
#define _BSD_SOURCE
|
||||
#include <sys/stat.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if !defined(__aarch64__)
|
||||
struct statx {
|
||||
uint32_t stx_mask;
|
||||
uint32_t stx_blksize;
|
||||
uint64_t stx_attributes;
|
||||
uint32_t stx_nlink;
|
||||
uint32_t stx_uid;
|
||||
uint32_t stx_gid;
|
||||
uint16_t stx_mode;
|
||||
uint16_t pad1;
|
||||
uint64_t stx_ino;
|
||||
uint64_t stx_size;
|
||||
uint64_t stx_blocks;
|
||||
uint64_t stx_attributes_mask;
|
||||
struct {
|
||||
int64_t tv_sec;
|
||||
uint32_t tv_nsec;
|
||||
int32_t pad;
|
||||
} stx_atime, stx_btime, stx_ctime, stx_mtime;
|
||||
uint32_t stx_rdev_major;
|
||||
uint32_t stx_rdev_minor;
|
||||
uint32_t stx_dev_major;
|
||||
uint32_t stx_dev_minor;
|
||||
uint64_t spare[14];
|
||||
};
|
||||
#endif
|
||||
|
||||
int statx(int fd, const char *restrict path, int flag,
|
||||
unsigned int mask, struct statx *restrict statxbuf)
|
||||
{
|
||||
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
|
||||
}
|
||||
|
||||
|
||||
#include <syscall.h>
|
||||
|
||||
ssize_t getrandom(void *buf, size_t buflen, unsigned flags)
|
||||
{
|
||||
/// There was cancellable syscall (syscall_cp), but I don't care too.
|
||||
return syscall(SYS_getrandom, buf, buflen, flags);
|
||||
}
|
||||
|
||||
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define ALIGN (sizeof(size_t))
|
||||
#define ONES ((size_t)-1/UCHAR_MAX)
|
||||
#define HIGHS (ONES * (UCHAR_MAX/2+1))
|
||||
#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
|
||||
|
||||
char *__strchrnul(const char *s, int c)
|
||||
{
|
||||
c = (unsigned char)c;
|
||||
if (!c) return (char *)s + strlen(s);
|
||||
|
||||
#ifdef __GNUC__
|
||||
typedef size_t __attribute__((__may_alias__)) word;
|
||||
const word *w;
|
||||
for (; (uintptr_t)s % ALIGN; s++)
|
||||
if (!*s || *(unsigned char *)s == c) return (char *)s;
|
||||
size_t k = ONES * c;
|
||||
for (w = (void *)s; !HASZERO(*w) && !HASZERO(*w^k); w++);
|
||||
s = (void *)w;
|
||||
#endif
|
||||
for (; *s && *(unsigned char *)s != c; s++);
|
||||
return (char *)s;
|
||||
}
|
||||
|
||||
int __execvpe(const char *file, char *const argv[], char *const envp[])
|
||||
{
|
||||
const char *p, *z, *path = getenv("PATH");
|
||||
size_t l, k;
|
||||
int seen_eacces = 0;
|
||||
|
||||
errno = ENOENT;
|
||||
if (!*file) return -1;
|
||||
|
||||
if (strchr(file, '/'))
|
||||
return execve(file, argv, envp);
|
||||
|
||||
if (!path) path = "/usr/local/bin:/bin:/usr/bin";
|
||||
k = strnlen(file, NAME_MAX+1);
|
||||
if (k > NAME_MAX) {
|
||||
errno = ENAMETOOLONG;
|
||||
return -1;
|
||||
}
|
||||
l = strnlen(path, PATH_MAX-1)+1;
|
||||
|
||||
for(p=path; ; p=z) {
|
||||
char b[l+k+1];
|
||||
z = __strchrnul(p, ':');
|
||||
if (z-p >= l) {
|
||||
if (!*z++) break;
|
||||
continue;
|
||||
}
|
||||
memcpy(b, p, z-p);
|
||||
b[z-p] = '/';
|
||||
memcpy(b+(z-p)+(z>p), file, k+1);
|
||||
execve(b, argv, envp);
|
||||
switch (errno) {
|
||||
case EACCES:
|
||||
seen_eacces = 1;
|
||||
case ENOENT:
|
||||
case ENOTDIR:
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
if (!*z++) break;
|
||||
}
|
||||
if (seen_eacces) errno = EACCES;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
#include "spawn.h"
|
||||
|
||||
int posix_spawnp(pid_t *restrict res, const char *restrict file,
|
||||
const posix_spawn_file_actions_t *fa,
|
||||
const posix_spawnattr_t *restrict attr,
|
||||
char *const argv[restrict], char *const envp[restrict])
|
||||
{
|
||||
posix_spawnattr_t spawnp_attr = { 0 };
|
||||
if (attr) spawnp_attr = *attr;
|
||||
spawnp_attr.__fn = (void *)__execvpe;
|
||||
return posix_spawn(res, file, fa, &spawnp_attr, argv, envp);
|
||||
}
|
||||
|
||||
#define FDOP_CLOSE 1
|
||||
#define FDOP_DUP2 2
|
||||
#define FDOP_OPEN 3
|
||||
#define FDOP_CHDIR 4
|
||||
#define FDOP_FCHDIR 5
|
||||
|
||||
#define ENOMEM 12
|
||||
#define EBADF 9
|
||||
|
||||
struct fdop {
|
||||
struct fdop *next, *prev;
|
||||
int cmd, fd, srcfd, oflag;
|
||||
mode_t mode;
|
||||
char path[];
|
||||
};
|
||||
|
||||
int posix_spawn_file_actions_init(posix_spawn_file_actions_t *fa) {
|
||||
fa->__actions = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int posix_spawn_file_actions_addchdir_np(posix_spawn_file_actions_t *restrict fa, const char *restrict path) {
|
||||
struct fdop *op = malloc(sizeof *op + strlen(path) + 1);
|
||||
if (!op) return ENOMEM;
|
||||
op->cmd = FDOP_CHDIR;
|
||||
op->fd = -1;
|
||||
strcpy(op->path, path);
|
||||
if ((op->next = fa->__actions)) op->next->prev = op;
|
||||
op->prev = 0;
|
||||
fa->__actions = op;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *fa, int fd) {
|
||||
if (fd < 0) return EBADF;
|
||||
struct fdop *op = malloc(sizeof *op);
|
||||
if (!op) return ENOMEM;
|
||||
op->cmd = FDOP_CLOSE;
|
||||
op->fd = fd;
|
||||
if ((op->next = fa->__actions)) op->next->prev = op;
|
||||
op->prev = 0;
|
||||
fa->__actions = op;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *fa, int srcfd, int fd) {
|
||||
if (srcfd < 0 || fd < 0) return EBADF;
|
||||
struct fdop *op = malloc(sizeof *op);
|
||||
if (!op) return ENOMEM;
|
||||
op->cmd = FDOP_DUP2;
|
||||
op->srcfd = srcfd;
|
||||
op->fd = fd;
|
||||
if ((op->next = fa->__actions)) op->next->prev = op;
|
||||
op->prev = 0;
|
||||
fa->__actions = op;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int posix_spawn_file_actions_addfchdir_np(posix_spawn_file_actions_t *fa, int fd) {
|
||||
if (fd < 0) return EBADF;
|
||||
struct fdop *op = malloc(sizeof *op);
|
||||
if (!op) return ENOMEM;
|
||||
op->cmd = FDOP_FCHDIR;
|
||||
op->fd = fd;
|
||||
if ((op->next = fa->__actions)) op->next->prev = op;
|
||||
op->prev = 0;
|
||||
fa->__actions = op;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int posix_spawn_file_actions_addopen(posix_spawn_file_actions_t *restrict fa, int fd, const char *restrict path, int flags, mode_t mode) {
|
||||
if (fd < 0) return EBADF;
|
||||
struct fdop *op = malloc(sizeof *op + strlen(path) + 1);
|
||||
if (!op) return ENOMEM;
|
||||
op->cmd = FDOP_OPEN;
|
||||
op->fd = fd;
|
||||
op->oflag = flags;
|
||||
op->mode = mode;
|
||||
strcpy(op->path, path);
|
||||
if ((op->next = fa->__actions)) op->next->prev = op;
|
||||
op->prev = 0;
|
||||
fa->__actions = op;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int posix_spawn_file_actions_destroy(posix_spawn_file_actions_t *fa) {
|
||||
struct fdop *op = fa->__actions, *next;
|
||||
while (op) {
|
||||
next = op->next;
|
||||
free(op);
|
||||
op = next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
32
base/glibc-compatibility/spawn.h
Normal file
32
base/glibc-compatibility/spawn.h
Normal file
@ -0,0 +1,32 @@
|
||||
#ifndef _SPAWN_H
|
||||
#define _SPAWN_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <features.h>
|
||||
|
||||
typedef struct {
|
||||
int __flags;
|
||||
pid_t __pgrp;
|
||||
sigset_t __def, __mask;
|
||||
int __prio, __pol;
|
||||
void *__fn;
|
||||
char __pad[64-sizeof(void *)];
|
||||
} posix_spawnattr_t;
|
||||
|
||||
typedef struct {
|
||||
int __pad0[2];
|
||||
void *__actions;
|
||||
int __pad[16];
|
||||
} posix_spawn_file_actions_t;
|
||||
|
||||
int posix_spawn(pid_t *__restrict, const char *__restrict, const posix_spawn_file_actions_t *,
|
||||
const posix_spawnattr_t *__restrict, char *const *__restrict, char *const *__restrict);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
2
contrib/AMQP-CPP
vendored
2
contrib/AMQP-CPP
vendored
@ -1 +1 @@
|
||||
Subproject commit 1a6c51f4ac51ac56610fa95081bd2f349911375a
|
||||
Subproject commit 818c2d8ad96a08a5d20fece7d1e1e8855a2b0860
|
8
contrib/CMakeLists.txt
vendored
8
contrib/CMakeLists.txt
vendored
@ -74,7 +74,11 @@ add_contrib (re2-cmake re2)
|
||||
add_contrib (xz-cmake xz)
|
||||
add_contrib (brotli-cmake brotli)
|
||||
add_contrib (double-conversion-cmake double-conversion)
|
||||
add_contrib (boringssl-cmake boringssl)
|
||||
if (NOT ENABLE_EXTERNAL_OPENSSL)
|
||||
add_contrib (boringssl-cmake boringssl)
|
||||
else ()
|
||||
add_contrib (openssl-cmake openssl)
|
||||
endif ()
|
||||
add_contrib (poco-cmake poco)
|
||||
add_contrib (croaring-cmake croaring)
|
||||
add_contrib (zstd-cmake zstd)
|
||||
@ -92,6 +96,8 @@ add_contrib (openldap-cmake openldap)
|
||||
add_contrib (grpc-cmake grpc)
|
||||
add_contrib (msgpack-c-cmake msgpack-c)
|
||||
|
||||
add_contrib (corrosion-cmake corrosion)
|
||||
|
||||
if (ENABLE_FUZZING)
|
||||
add_contrib (libprotobuf-mutator-cmake libprotobuf-mutator)
|
||||
endif()
|
||||
|
@ -4,6 +4,11 @@ if (NOT ENABLE_AMQPCPP)
|
||||
message(STATUS "Not using AMQP-CPP")
|
||||
return()
|
||||
endif()
|
||||
if (OS_FREEBSD)
|
||||
message(STATUS "Not using AMQP-CPP because libuv is disabled")
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
||||
# can be removed once libuv build on MacOS with GCC is possible
|
||||
if (NOT TARGET ch_contrib::uv)
|
||||
|
2
contrib/cctz
vendored
2
contrib/cctz
vendored
@ -1 +1 @@
|
||||
Subproject commit 49c656c62fbd36a1bc20d64c476853bdb7cf7bb9
|
||||
Subproject commit 7a454c25c7d16053bcd327cdd16329212a08fa4a
|
1
contrib/corrosion
vendored
Submodule
1
contrib/corrosion
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit d9dfdefaa3d9ec4ba1245c7070727359c65c7869
|
46
contrib/corrosion-cmake/CMakeLists.txt
Normal file
46
contrib/corrosion-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,46 @@
|
||||
if (NOT ENABLE_LIBRARIES)
|
||||
set(DEFAULT_ENABLE_RUST FALSE)
|
||||
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
|
||||
message(STATUS "Rust is not available on aarch64-apple-darwin")
|
||||
set(DEFAULT_ENABLE_RUST FALSE)
|
||||
else()
|
||||
list (APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")
|
||||
find_package(Rust)
|
||||
set(DEFAULT_ENABLE_RUST ${Rust_FOUND})
|
||||
endif()
|
||||
|
||||
option(ENABLE_RUST "Enable rust" ${DEFAULT_ENABLE_RUST})
|
||||
|
||||
message(STATUS ${ENABLE_RUST})
|
||||
|
||||
if(NOT ENABLE_RUST)
|
||||
message(STATUS "Not using rust")
|
||||
return()
|
||||
endif()
|
||||
|
||||
message(STATUS "Checking Rust toolchain for current target")
|
||||
|
||||
if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
|
||||
set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
|
||||
endif()
|
||||
|
||||
if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
|
||||
set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
|
||||
endif()
|
||||
|
||||
if((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
|
||||
set(Rust_CARGO_TARGET "x86_64-apple-darwin")
|
||||
endif()
|
||||
|
||||
if((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
|
||||
set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
|
||||
endif()
|
||||
|
||||
if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
|
||||
set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
|
||||
endif()
|
||||
|
||||
message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
|
||||
|
||||
# Define function corrosion_import_crate()
|
||||
include ("${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake/Corrosion.cmake")
|
@ -578,6 +578,12 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
||||
list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c")
|
||||
endif()
|
||||
|
||||
if (ENABLE_EXTERNAL_OPENSSL)
|
||||
list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c")
|
||||
list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c")
|
||||
endif ()
|
||||
|
||||
|
||||
target_sources(_krb5 PRIVATE
|
||||
${ALL_SRCS}
|
||||
)
|
||||
|
@ -59,6 +59,12 @@ set(SRCS
|
||||
|
||||
add_library(_libpq ${SRCS})
|
||||
|
||||
if (ENABLE_EXTERNAL_OPENSSL)
|
||||
add_definitions(-DHAVE_BIO_METH_NEW)
|
||||
add_definitions(-DHAVE_HMAC_CTX_NEW)
|
||||
add_definitions(-DHAVE_HMAC_CTX_FREE)
|
||||
endif ()
|
||||
|
||||
target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
|
||||
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include")
|
||||
target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs")
|
||||
|
2
contrib/llvm-project
vendored
2
contrib/llvm-project
vendored
@ -1 +1 @@
|
||||
Subproject commit 6ca2b5b3927226f6bcf6c656f502ff5d012ad9b6
|
||||
Subproject commit 3a39038345a400e7e767811b142a94355d511215
|
@ -1,4 +1,4 @@
|
||||
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined" OR NOT USE_STATIC_LIBRARIES)
|
||||
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
||||
else()
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
||||
@ -6,15 +6,16 @@ endif()
|
||||
|
||||
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
|
||||
|
||||
# If USE_STATIC_LIBRARIES=0 was passed to CMake, we'll still build LLVM statically to keep complexity minimal.
|
||||
|
||||
if (NOT ENABLE_EMBEDDED_COMPILER)
|
||||
message(STATUS "Not using LLVM")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# TODO: Enable shared library build
|
||||
# TODO: Enable compilation on AArch64
|
||||
|
||||
set (LLVM_VERSION "13.0.0bundled")
|
||||
set (LLVM_VERSION "15.0.0bundled")
|
||||
set (LLVM_INCLUDE_DIRS
|
||||
"${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include"
|
||||
"${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm/include"
|
||||
@ -62,9 +63,6 @@ set (REQUIRED_LLVM_LIBRARIES
|
||||
# list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
|
||||
# endif ()
|
||||
|
||||
# ld: unknown option: --color-diagnostics
|
||||
# set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "")
|
||||
|
||||
set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind
|
||||
set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") # Skip internal compiler selection
|
||||
set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") # With exception handling
|
||||
@ -80,6 +78,7 @@ set(LLVM_ENABLE_LIBXML2 0 CACHE INTERNAL "")
|
||||
set(LLVM_ENABLE_LIBEDIT 0 CACHE INTERNAL "")
|
||||
set(LLVM_ENABLE_LIBPFM 0 CACHE INTERNAL "")
|
||||
set(LLVM_ENABLE_ZLIB 0 CACHE INTERNAL "")
|
||||
set(LLVM_ENABLE_ZSTD 0 CACHE INTERNAL "")
|
||||
set(LLVM_ENABLE_Z3_SOLVER 0 CACHE INTERNAL "")
|
||||
set(LLVM_INCLUDE_TOOLS 0 CACHE INTERNAL "")
|
||||
set(LLVM_BUILD_TOOLS 0 CACHE INTERNAL "")
|
||||
@ -96,9 +95,6 @@ set(LLVM_INCLUDE_DOCS 0 CACHE INTERNAL "")
|
||||
set(LLVM_ENABLE_OCAMLDOC 0 CACHE INTERNAL "")
|
||||
set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "")
|
||||
|
||||
# C++20 is currently not supported due to ambiguous operator != etc.
|
||||
set (CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm")
|
||||
set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
|
||||
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
|
||||
|
@ -3,6 +3,33 @@
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/test-util:$FROM_TAG
|
||||
|
||||
# Rust toolchain and libraries
|
||||
ENV RUSTUP_HOME=/rust/rustup
|
||||
ENV CARGO_HOME=/rust/cargo
|
||||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
|
||||
RUN chmod 777 -R /rust
|
||||
ENV PATH="/rust/cargo/env:${PATH}"
|
||||
ENV PATH="/rust/cargo/bin:${PATH}"
|
||||
RUN rustup target add aarch64-unknown-linux-gnu && \
|
||||
rustup target add x86_64-apple-darwin && \
|
||||
rustup target add x86_64-unknown-freebsd && \
|
||||
rustup target add aarch64-apple-darwin && \
|
||||
rustup target add powerpc64le-unknown-linux-gnu
|
||||
RUN apt-get install \
|
||||
gcc-aarch64-linux-gnu \
|
||||
build-essential \
|
||||
libc6 \
|
||||
libc6-dev \
|
||||
libc6-dev-arm64-cross \
|
||||
--yes
|
||||
|
||||
# Install CMake 3.20+ for Rust compilation
|
||||
# Used https://askubuntu.com/a/1157132 as reference
|
||||
RUN apt purge cmake --yes
|
||||
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
|
||||
RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main'
|
||||
RUN apt update && apt install cmake --yes
|
||||
|
||||
ENV CC=clang-${LLVM_VERSION}
|
||||
ENV CXX=clang++-${LLVM_VERSION}
|
||||
|
||||
|
@ -19,6 +19,12 @@ RUN apt-get update \
|
||||
pv \
|
||||
--yes --no-install-recommends
|
||||
|
||||
# Install CMake 3.20+ for Rust compilation
|
||||
RUN apt purge cmake --yes
|
||||
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
|
||||
RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main'
|
||||
RUN apt update && apt install cmake --yes
|
||||
|
||||
RUN pip3 install numpy scipy pandas Jinja2
|
||||
|
||||
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
|
||||
|
6
docker/test/fuzzer/allow-nullable-key.xml
Normal file
6
docker/test/fuzzer/allow-nullable-key.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<clickhouse>
|
||||
<!-- Allow nullable key to avoid errors while fuzzing definitions of tables -->
|
||||
<merge_tree>
|
||||
<allow_nullable_key>1</allow_nullable_key>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
@ -94,6 +94,7 @@ function configure
|
||||
# TODO figure out which ones are needed
|
||||
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
|
||||
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
|
||||
cp -av --dereference "$script_dir"/allow-nullable-key.xml db/config.d
|
||||
|
||||
cat > db/config.d/core.xml <<EOL
|
||||
<clickhouse>
|
||||
@ -240,6 +241,7 @@ quit
|
||||
--receive_data_timeout_ms=10000 \
|
||||
--stacktrace \
|
||||
--query-fuzzer-runs=1000 \
|
||||
--create-query-fuzzer-runs=50 \
|
||||
--queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
|
||||
$NEW_TESTS_OPT \
|
||||
> >(tail -n 100000 > fuzzer.log) \
|
||||
|
@ -35,6 +35,8 @@ RUN apt-get update \
|
||||
tzdata \
|
||||
vim \
|
||||
wget \
|
||||
rustc \
|
||||
cargo \
|
||||
&& pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \
|
||||
&& apt-get purge --yes python3-dev g++ \
|
||||
&& apt-get autoremove --yes \
|
||||
|
@ -11,6 +11,7 @@ RUN apt-get update -y \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
awscli \
|
||||
brotli \
|
||||
lz4 \
|
||||
expect \
|
||||
golang \
|
||||
lsof \
|
||||
@ -35,12 +36,13 @@ RUN apt-get update -y \
|
||||
tree \
|
||||
unixodbc \
|
||||
wget \
|
||||
rustc \
|
||||
cargo \
|
||||
zstd \
|
||||
file \
|
||||
pv \
|
||||
&& apt-get clean
|
||||
|
||||
|
||||
RUN pip3 install numpy scipy pandas Jinja2
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
|
23
docs/changelogs/v22.6.9.11-stable.md
Normal file
23
docs/changelogs/v22.6.9.11-stable.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2022
|
||||
---
|
||||
|
||||
# 2022 Changelog
|
||||
|
||||
### ClickHouse release v22.6.9.11-stable (9ec61dcac49) FIXME as compared to v22.6.8.35-stable (b91dc59a565)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#42089](https://github.com/ClickHouse/ClickHouse/issues/42089): Replace back `clickhouse su` command with `sudo -u` in start in order to respect limits in `/etc/security/limits.conf`. [#41847](https://github.com/ClickHouse/ClickHouse/pull/41847) ([Eugene Konkov](https://github.com/ekonkov)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#41558](https://github.com/ClickHouse/ClickHouse/issues/41558): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#41504](https://github.com/ClickHouse/ClickHouse/issues/41504): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -38,13 +38,13 @@ For other Linux distribution - check the availability of the [prebuild packages]
|
||||
#### Use the latest clang for Builds
|
||||
|
||||
``` bash
|
||||
export CC=clang-14
|
||||
export CXX=clang++-14
|
||||
export CC=clang-15
|
||||
export CXX=clang++-15
|
||||
```
|
||||
|
||||
In this example we use version 14 that is the latest as of Feb 2022.
|
||||
In this example we use version 15 that is the latest as of Sept 2022.
|
||||
|
||||
Gcc can also be used though it is discouraged.
|
||||
Gcc cannot be used.
|
||||
|
||||
### Checkout ClickHouse Sources {#checkout-clickhouse-sources}
|
||||
|
||||
|
@ -122,7 +122,7 @@ If you use Arch or Gentoo, you probably know it yourself how to install CMake.
|
||||
|
||||
## C++ Compiler {#c-compiler}
|
||||
|
||||
Compilers Clang starting from version 12 is supported for building ClickHouse.
|
||||
Compilers Clang starting from version 15 is supported for building ClickHouse.
|
||||
|
||||
Clang should be used instead of gcc. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations.
|
||||
|
||||
@ -146,7 +146,7 @@ While inside the `build` directory, configure your build by running CMake. Befor
|
||||
export CC=clang CXX=clang++
|
||||
cmake ..
|
||||
|
||||
If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-14 CXX=clang++-14`. The clang version will be in the script output.
|
||||
If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output.
|
||||
|
||||
The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.
|
||||
|
||||
@ -178,7 +178,7 @@ If you get the message: `ninja: error: loading 'build.ninja': No such file or di
|
||||
|
||||
Upon the successful start of the building process, you’ll see the build progress - the number of processed tasks and the total number of tasks.
|
||||
|
||||
While building messages about protobuf files in libhdfs2 library like `libprotobuf WARNING` may show up. They affect nothing and are safe to be ignored.
|
||||
While building messages about LLVM library may show up. They affect nothing and are safe to be ignored.
|
||||
|
||||
Upon successful build you get an executable file `ClickHouse/<build_dir>/programs/clickhouse`:
|
||||
|
||||
@ -272,15 +272,10 @@ Most probably some of the builds will fail at first times. This is due to the fa
|
||||
|
||||
You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation, semantic highlighting, search and indexing. The code snapshot is updated daily.
|
||||
|
||||
You can use GitHub integrated code browser [here](https://github.dev/ClickHouse/ClickHouse).
|
||||
|
||||
Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual.
|
||||
|
||||
## Faster builds for development: Split build configuration {#split-build}
|
||||
|
||||
ClickHouse is normally statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that for every change the entire binary needs to be re-linked, which is slow and inconvenient for development. As an alternative, you can instead build dynamically linked shared libraries, allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation:
|
||||
```
|
||||
-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1
|
||||
```
|
||||
|
||||
If you are not interested in functionality provided by third-party libraries, you can further speed up the build using `cmake` options
|
||||
```
|
||||
-DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0
|
||||
|
@ -1,8 +1,7 @@
|
||||
position: 10
|
||||
position: 1
|
||||
label: 'Example Datasets'
|
||||
collapsible: true
|
||||
collapsed: true
|
||||
link:
|
||||
type: generated-index
|
||||
title: Example Datasets
|
||||
slug: /en/getting-started/example-datasets
|
||||
type: doc
|
||||
id: en/getting-started/example-datasets/
|
||||
|
@ -1,9 +1,16 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/cell-towers
|
||||
sidebar_label: Cell Towers
|
||||
sidebar_position: 3
|
||||
title: "Cell Towers"
|
||||
---
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
import CodeBlock from '@theme/CodeBlock';
|
||||
import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md';
|
||||
import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md';
|
||||
|
||||
This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
|
||||
|
||||
As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
|
||||
@ -13,6 +20,26 @@ OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4
|
||||
|
||||
## Get the Dataset {#get-the-dataset}
|
||||
|
||||
<Tabs groupId="deployMethod">
|
||||
<TabItem value="serverless" label="ClickHouse Cloud" default>
|
||||
|
||||
ClickHouse Cloud provides an easy-button for uploading this dataset from S3. Log in to your ClickHouse Cloud organization, or create a free trial at [ClickHouse.cloud](https://clickhouse.cloud).
|
||||
<ActionsMenu menu="Load Data" />
|
||||
|
||||
Choose the **Cell Towers** dataset from the **Sample data** tab, and **Load data**:
|
||||
|
||||
![Load cell towers dataset](@site/docs/en/_snippets/images/cloud-load-data-sample.png)
|
||||
|
||||
Examine the schema of the cell_towers table:
|
||||
```sql
|
||||
DESCRIBE TABLE cell_towers
|
||||
```
|
||||
|
||||
<SQLConsoleDetail />
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="selfmanaged" label="Self-managed">
|
||||
|
||||
1. Download the snapshot of the dataset from February 2021: [cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB).
|
||||
|
||||
2. Validate the integrity (optional step):
|
||||
@ -56,7 +83,10 @@ ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
|
||||
clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv
|
||||
```
|
||||
|
||||
## Examples {#examples}
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Example queries {#examples}
|
||||
|
||||
1. A number of cell towers by type:
|
||||
|
||||
@ -101,18 +131,31 @@ So, the top countries are: the USA, Germany, and Russia.
|
||||
|
||||
You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values.
|
||||
|
||||
|
||||
## Use case {#use-case}
|
||||
## Use case: Incorporate geo data {#use-case}
|
||||
|
||||
Using `pointInPolygon` function.
|
||||
|
||||
1. Create a table where we will store polygons:
|
||||
|
||||
<Tabs groupId="deployMethod">
|
||||
<TabItem value="serverless" label="ClickHouse Cloud" default>
|
||||
|
||||
```sql
|
||||
CREATE TABLE moscow (polygon Array(Tuple(Float64, Float64)))
|
||||
ORDER BY polygon;
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="selfmanaged" label="Self-managed">
|
||||
|
||||
```sql
|
||||
CREATE TEMPORARY TABLE
|
||||
moscow (polygon Array(Tuple(Float64, Float64)));
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
2. This is a rough shape of Moscow (without "new Moscow"):
|
||||
|
||||
```sql
|
||||
|
File diff suppressed because one or more lines are too long
@ -13,16 +13,6 @@ Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
|
||||
|
||||
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
|
||||
|
||||
## Download the Dataset {#download-dataset}
|
||||
|
||||
Run the command:
|
||||
|
||||
```bash
|
||||
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
|
||||
```
|
||||
|
||||
Download will take about 2 minutes with good internet connection.
|
||||
|
||||
## Create the Table {#create-table}
|
||||
|
||||
```sql
|
||||
@ -41,31 +31,49 @@ CREATE TABLE uk_price_paid
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String),
|
||||
category UInt8
|
||||
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2);
|
||||
```
|
||||
|
||||
## Preprocess and Import Data {#preprocess-import-data}
|
||||
## Preprocess and Insert the Data {#preprocess-import-data}
|
||||
|
||||
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
|
||||
We will use the `url` function to stream the data into ClickHouse. We need to preprocess some of the incoming data first, which includes:
|
||||
- splitting the `postcode` to two different columns - `postcode1` and `postcode2`, which is better for storage and queries
|
||||
- converting the `time` field to date as it only contains 00:00 time
|
||||
- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis
|
||||
- transforming `type` and `duration` to more readable `Enum` fields using the [transform](../../sql-reference/functions/other-functions.md#transform) function
|
||||
- transforming the `is_new` field from a single-character string (`Y`/`N`) to a [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 or 1
|
||||
- drop the last two columns since they all have the same value (which is 0)
|
||||
|
||||
In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
|
||||
The `url` function streams the data from the web server into your ClickHouse table. The following command inserts 5 million rows into the `uk_price_paid` table:
|
||||
|
||||
The preprocessing is:
|
||||
- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
|
||||
- coverting the `time` field to date as it only contains 00:00 time;
|
||||
- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis;
|
||||
- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform);
|
||||
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1.
|
||||
|
||||
Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
|
||||
|
||||
```bash
|
||||
clickhouse-local --input-format CSV --structure '
|
||||
uuid String,
|
||||
price UInt32,
|
||||
time DateTime,
|
||||
```sql
|
||||
INSERT INTO uk_price_paid
|
||||
WITH
|
||||
splitByChar(' ', postcode) AS p
|
||||
SELECT
|
||||
toUInt32(price_string) AS price,
|
||||
parseDateTimeBestEffortUS(time) AS date,
|
||||
p[1] AS postcode1,
|
||||
p[2] AS postcode2,
|
||||
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
|
||||
b = 'Y' AS is_new,
|
||||
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
|
||||
addr1,
|
||||
addr2,
|
||||
street,
|
||||
locality,
|
||||
town,
|
||||
district,
|
||||
county
|
||||
FROM url(
|
||||
'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv',
|
||||
'CSV',
|
||||
'uuid_string String,
|
||||
price_string String,
|
||||
time String,
|
||||
postcode String,
|
||||
a String,
|
||||
b String,
|
||||
@ -78,154 +86,136 @@ clickhouse-local --input-format CSV --structure '
|
||||
district String,
|
||||
county String,
|
||||
d String,
|
||||
e String
|
||||
' --query "
|
||||
WITH splitByChar(' ', postcode) AS p
|
||||
SELECT
|
||||
price,
|
||||
toDate(time) AS date,
|
||||
p[1] AS postcode1,
|
||||
p[2] AS postcode2,
|
||||
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
|
||||
b = 'Y' AS is_new,
|
||||
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
|
||||
addr1,
|
||||
addr2,
|
||||
street,
|
||||
locality,
|
||||
town,
|
||||
district,
|
||||
county,
|
||||
d = 'B' AS category
|
||||
FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
|
||||
e String'
|
||||
) SETTINGS max_http_get_redirects=10;
|
||||
```
|
||||
|
||||
It will take about 40 seconds.
|
||||
Wait for the data to insert - it will take a minute or two depending on the network speed.
|
||||
|
||||
## Validate the Data {#validate-data}
|
||||
|
||||
Query:
|
||||
Let's verify it worked by seeing how many rows were inserted:
|
||||
|
||||
```sql
|
||||
SELECT count() FROM uk_price_paid;
|
||||
SELECT count()
|
||||
FROM uk_price_paid
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌──count()─┐
|
||||
│ 26321785 │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
The size of dataset in ClickHouse is just 278 MiB, check it.
|
||||
|
||||
Query:
|
||||
At the time this query was executed, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid';
|
||||
SELECT formatReadableSize(total_bytes)
|
||||
FROM system.tables
|
||||
WHERE name = 'uk_price_paid'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─formatReadableSize(total_bytes)─┐
|
||||
│ 278.80 MiB │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
Notice the size of the table is just 221.43 MiB!
|
||||
|
||||
## Run Some Queries {#run-queries}
|
||||
|
||||
Let's run some queries to analyze the data:
|
||||
|
||||
### Query 1. Average Price Per Year {#average-price}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year;
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 1000000, 80
|
||||
)
|
||||
FROM uk_price_paid
|
||||
GROUP BY year
|
||||
ORDER BY year
|
||||
```
|
||||
|
||||
Result:
|
||||
The result looks like:
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
|
||||
│ 1995 │ 67932 │ █████▍ │
|
||||
│ 1996 │ 71505 │ █████▋ │
|
||||
│ 1997 │ 78532 │ ██████▎ │
|
||||
│ 1998 │ 85436 │ ██████▋ │
|
||||
│ 1999 │ 96037 │ ███████▋ │
|
||||
│ 2000 │ 107479 │ ████████▌ │
|
||||
│ 2001 │ 118885 │ █████████▌ │
|
||||
│ 2002 │ 137941 │ ███████████ │
|
||||
│ 2003 │ 155889 │ ████████████▍ │
|
||||
│ 2004 │ 178885 │ ██████████████▎ │
|
||||
│ 2005 │ 189351 │ ███████████████▏ │
|
||||
│ 2006 │ 203528 │ ████████████████▎ │
|
||||
│ 2007 │ 219378 │ █████████████████▌ │
|
||||
│ 1995 │ 67934 │ █████▍ │
|
||||
│ 1996 │ 71508 │ █████▋ │
|
||||
│ 1997 │ 78536 │ ██████▎ │
|
||||
│ 1998 │ 85441 │ ██████▋ │
|
||||
│ 1999 │ 96038 │ ███████▋ │
|
||||
│ 2000 │ 107487 │ ████████▌ │
|
||||
│ 2001 │ 118888 │ █████████▌ │
|
||||
│ 2002 │ 137948 │ ███████████ │
|
||||
│ 2003 │ 155893 │ ████████████▍ │
|
||||
│ 2004 │ 178888 │ ██████████████▎ │
|
||||
│ 2005 │ 189359 │ ███████████████▏ │
|
||||
│ 2006 │ 203532 │ ████████████████▎ │
|
||||
│ 2007 │ 219375 │ █████████████████▌ │
|
||||
│ 2008 │ 217056 │ █████████████████▎ │
|
||||
│ 2009 │ 213419 │ █████████████████ │
|
||||
│ 2010 │ 236109 │ ██████████████████▊ │
|
||||
│ 2010 │ 236110 │ ██████████████████▊ │
|
||||
│ 2011 │ 232805 │ ██████████████████▌ │
|
||||
│ 2012 │ 238367 │ ███████████████████ │
|
||||
│ 2013 │ 256931 │ ████████████████████▌ │
|
||||
│ 2014 │ 279915 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297266 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313201 │ █████████████████████████ │
|
||||
│ 2017 │ 346097 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350116 │ ████████████████████████████ │
|
||||
│ 2019 │ 351013 │ ████████████████████████████ │
|
||||
│ 2020 │ 369420 │ █████████████████████████████▌ │
|
||||
│ 2021 │ 386903 │ ██████████████████████████████▊ │
|
||||
│ 2012 │ 238381 │ ███████████████████ │
|
||||
│ 2013 │ 256927 │ ████████████████████▌ │
|
||||
│ 2014 │ 280008 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297263 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313518 │ █████████████████████████ │
|
||||
│ 2017 │ 346371 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350556 │ ████████████████████████████ │
|
||||
│ 2019 │ 352184 │ ████████████████████████████▏ │
|
||||
│ 2020 │ 375808 │ ██████████████████████████████ │
|
||||
│ 2021 │ 381105 │ ██████████████████████████████▍ │
|
||||
│ 2022 │ 362572 │ █████████████████████████████ │
|
||||
└──────┴────────┴────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Query 2. Average Price per Year in London {#average-price-london}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year;
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 2000000, 100
|
||||
)
|
||||
FROM uk_price_paid
|
||||
WHERE town = 'LONDON'
|
||||
GROUP BY year
|
||||
ORDER BY year
|
||||
```
|
||||
|
||||
Result:
|
||||
The result looks like:
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
|
||||
│ 1995 │ 109116 │ █████▍ │
|
||||
│ 1996 │ 118667 │ █████▊ │
|
||||
│ 1997 │ 136518 │ ██████▋ │
|
||||
│ 1998 │ 152983 │ ███████▋ │
|
||||
│ 1999 │ 180637 │ █████████ │
|
||||
│ 2000 │ 215838 │ ██████████▋ │
|
||||
│ 2001 │ 232994 │ ███████████▋ │
|
||||
│ 2002 │ 263670 │ █████████████▏ │
|
||||
│ 2003 │ 278394 │ █████████████▊ │
|
||||
│ 2004 │ 304666 │ ███████████████▏ │
|
||||
│ 2005 │ 322875 │ ████████████████▏ │
|
||||
│ 2006 │ 356191 │ █████████████████▋ │
|
||||
│ 2007 │ 404054 │ ████████████████████▏ │
|
||||
│ 1995 │ 109110 │ █████▍ │
|
||||
│ 1996 │ 118659 │ █████▊ │
|
||||
│ 1997 │ 136526 │ ██████▋ │
|
||||
│ 1998 │ 153002 │ ███████▋ │
|
||||
│ 1999 │ 180633 │ █████████ │
|
||||
│ 2000 │ 215849 │ ██████████▋ │
|
||||
│ 2001 │ 232987 │ ███████████▋ │
|
||||
│ 2002 │ 263668 │ █████████████▏ │
|
||||
│ 2003 │ 278424 │ █████████████▊ │
|
||||
│ 2004 │ 304664 │ ███████████████▏ │
|
||||
│ 2005 │ 322887 │ ████████████████▏ │
|
||||
│ 2006 │ 356195 │ █████████████████▋ │
|
||||
│ 2007 │ 404062 │ ████████████████████▏ │
|
||||
│ 2008 │ 420741 │ █████████████████████ │
|
||||
│ 2009 │ 427753 │ █████████████████████▍ │
|
||||
│ 2010 │ 480306 │ ████████████████████████ │
|
||||
│ 2011 │ 496274 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519442 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616212 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
|
||||
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
|
||||
│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
|
||||
│ 2009 │ 427754 │ █████████████████████▍ │
|
||||
│ 2010 │ 480322 │ ████████████████████████ │
|
||||
│ 2011 │ 496278 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519482 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616195 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724121 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792101 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │
|
||||
│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │
|
||||
│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │
|
||||
│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │
|
||||
└──────┴─────────┴───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
|
||||
Something happened to home prices in 2020! But that is probably not a surprise...
|
||||
|
||||
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
town,
|
||||
@ -240,124 +230,123 @@ GROUP BY
|
||||
district
|
||||
HAVING c >= 100
|
||||
ORDER BY price DESC
|
||||
LIMIT 100;
|
||||
LIMIT 100
|
||||
```
|
||||
|
||||
Result:
|
||||
The result looks like:
|
||||
|
||||
```text
|
||||
|
||||
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
|
||||
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
|
||||
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
|
||||
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
|
||||
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
|
||||
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
|
||||
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
|
||||
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
|
||||
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
|
||||
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
|
||||
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
|
||||
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
|
||||
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
|
||||
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
|
||||
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
|
||||
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
|
||||
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
|
||||
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
|
||||
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
|
||||
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
|
||||
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
|
||||
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
|
||||
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
|
||||
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
|
||||
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
|
||||
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
|
||||
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
|
||||
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
|
||||
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
|
||||
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
|
||||
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
|
||||
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
|
||||
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
|
||||
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
|
||||
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
|
||||
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
|
||||
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
|
||||
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
|
||||
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
|
||||
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
|
||||
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
|
||||
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
|
||||
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
|
||||
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
|
||||
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
|
||||
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
|
||||
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
|
||||
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
|
||||
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
|
||||
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
|
||||
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
|
||||
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
|
||||
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
|
||||
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
|
||||
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
|
||||
```response
|
||||
┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐
|
||||
│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │
|
||||
│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │
|
||||
│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │
|
||||
│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │
|
||||
│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │
|
||||
│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │
|
||||
│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │
|
||||
│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │
|
||||
│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │
|
||||
│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │
|
||||
│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │
|
||||
│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │
|
||||
│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │
|
||||
│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │
|
||||
│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │
|
||||
│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │
|
||||
│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │
|
||||
│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │
|
||||
│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │
|
||||
│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │
|
||||
│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │
|
||||
│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │
|
||||
│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │
|
||||
│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │
|
||||
│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │
|
||||
│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │
|
||||
│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │
|
||||
│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │
|
||||
│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │
|
||||
│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │
|
||||
│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │
|
||||
│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │
|
||||
│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │
|
||||
│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │
|
||||
│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │
|
||||
│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │
|
||||
│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │
|
||||
│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │
|
||||
│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │
|
||||
│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │
|
||||
│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │
|
||||
│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │
|
||||
│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │
|
||||
│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │
|
||||
│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │
|
||||
│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │
|
||||
│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │
|
||||
│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │
|
||||
│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │
|
||||
│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │
|
||||
│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │
|
||||
│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │
|
||||
│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │
|
||||
└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Let's Speed Up Queries Using Projections {#speedup-with-projections}
|
||||
|
||||
[Projections](../../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data.
|
||||
[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At execution time, ClickHouse will use your projection if it thinks the projection can improve the performance fo the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful).
|
||||
|
||||
### Build a Projection {#build-projection}
|
||||
|
||||
Create an aggregate projection by dimensions `toYear(date)`, `district`, `town`:
|
||||
Let's create an aggregate projection by the dimensions `toYear(date)`, `district`, and `town`:
|
||||
|
||||
```sql
|
||||
ALTER TABLE uk_price_paid
|
||||
@ -374,25 +363,23 @@ ALTER TABLE uk_price_paid
|
||||
toYear(date),
|
||||
district,
|
||||
town
|
||||
);
|
||||
)
|
||||
```
|
||||
|
||||
Populate the projection for existing data (without it projection will be created for only newly inserted data):
|
||||
Populate the projection for existing data. (Without materializing it, the projection will be created for only newly inserted data):
|
||||
|
||||
```sql
|
||||
ALTER TABLE uk_price_paid
|
||||
MATERIALIZE PROJECTION projection_by_year_district_town
|
||||
SETTINGS mutations_sync = 1;
|
||||
SETTINGS mutations_sync = 1
|
||||
```
|
||||
|
||||
## Test Performance {#test-performance}
|
||||
|
||||
Let's run the same 3 queries.
|
||||
Let's run the same 3 queries again:
|
||||
|
||||
### Query 1. Average Price Per Year {#average-price-projections}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
@ -400,47 +387,18 @@ SELECT
|
||||
bar(price, 0, 1000000, 80)
|
||||
FROM uk_price_paid
|
||||
GROUP BY year
|
||||
ORDER BY year ASC;
|
||||
ORDER BY year ASC
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
|
||||
│ 1995 │ 67932 │ █████▍ │
|
||||
│ 1996 │ 71505 │ █████▋ │
|
||||
│ 1997 │ 78532 │ ██████▎ │
|
||||
│ 1998 │ 85436 │ ██████▋ │
|
||||
│ 1999 │ 96037 │ ███████▋ │
|
||||
│ 2000 │ 107479 │ ████████▌ │
|
||||
│ 2001 │ 118885 │ █████████▌ │
|
||||
│ 2002 │ 137941 │ ███████████ │
|
||||
│ 2003 │ 155889 │ ████████████▍ │
|
||||
│ 2004 │ 178885 │ ██████████████▎ │
|
||||
│ 2005 │ 189351 │ ███████████████▏ │
|
||||
│ 2006 │ 203528 │ ████████████████▎ │
|
||||
│ 2007 │ 219378 │ █████████████████▌ │
|
||||
│ 2008 │ 217056 │ █████████████████▎ │
|
||||
│ 2009 │ 213419 │ █████████████████ │
|
||||
│ 2010 │ 236109 │ ██████████████████▊ │
|
||||
│ 2011 │ 232805 │ ██████████████████▌ │
|
||||
│ 2012 │ 238367 │ ███████████████████ │
|
||||
│ 2013 │ 256931 │ ████████████████████▌ │
|
||||
│ 2014 │ 279915 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297266 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313201 │ █████████████████████████ │
|
||||
│ 2017 │ 346097 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350116 │ ████████████████████████████ │
|
||||
│ 2019 │ 351013 │ ████████████████████████████ │
|
||||
│ 2020 │ 369420 │ █████████████████████████████▌ │
|
||||
│ 2021 │ 386903 │ ██████████████████████████████▊ │
|
||||
└──────┴────────┴────────────────────────────────────────┘
|
||||
The result is the same, but the performance is better!
|
||||
```response
|
||||
No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.)
|
||||
With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.)
|
||||
```
|
||||
|
||||
|
||||
### Query 2. Average Price Per Year in London {#average-price-london-projections}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
@ -449,48 +407,19 @@ SELECT
|
||||
FROM uk_price_paid
|
||||
WHERE town = 'LONDON'
|
||||
GROUP BY year
|
||||
ORDER BY year ASC;
|
||||
ORDER BY year ASC
|
||||
```
|
||||
|
||||
Result:
|
||||
Same result, but notice the improvement in query performance:
|
||||
|
||||
```text
|
||||
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
|
||||
│ 1995 │ 109116 │ █████▍ │
|
||||
│ 1996 │ 118667 │ █████▊ │
|
||||
│ 1997 │ 136518 │ ██████▋ │
|
||||
│ 1998 │ 152983 │ ███████▋ │
|
||||
│ 1999 │ 180637 │ █████████ │
|
||||
│ 2000 │ 215838 │ ██████████▋ │
|
||||
│ 2001 │ 232994 │ ███████████▋ │
|
||||
│ 2002 │ 263670 │ █████████████▏ │
|
||||
│ 2003 │ 278394 │ █████████████▊ │
|
||||
│ 2004 │ 304666 │ ███████████████▏ │
|
||||
│ 2005 │ 322875 │ ████████████████▏ │
|
||||
│ 2006 │ 356191 │ █████████████████▋ │
|
||||
│ 2007 │ 404054 │ ████████████████████▏ │
|
||||
│ 2008 │ 420741 │ █████████████████████ │
|
||||
│ 2009 │ 427753 │ █████████████████████▍ │
|
||||
│ 2010 │ 480306 │ ████████████████████████ │
|
||||
│ 2011 │ 496274 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519442 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616212 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
|
||||
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
|
||||
│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
|
||||
└──────┴─────────┴───────────────────────────────────────────────────────┘
|
||||
```response
|
||||
No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.)
|
||||
With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.)
|
||||
```
|
||||
|
||||
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods-projections}
|
||||
|
||||
The condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020).
|
||||
|
||||
Query:
|
||||
The condition (date >= '2020-01-01') needs to be modified so that it matches the projection dimension (`toYear(date) >= 2020)`:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
@ -506,138 +435,16 @@ GROUP BY
|
||||
district
|
||||
HAVING c >= 100
|
||||
ORDER BY price DESC
|
||||
LIMIT 100;
|
||||
LIMIT 100
|
||||
```
|
||||
|
||||
Result:
|
||||
Again, the result is the same but notice the improvement in query performance:
|
||||
|
||||
```text
|
||||
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
|
||||
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
|
||||
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
|
||||
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
|
||||
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
|
||||
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
|
||||
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
|
||||
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
|
||||
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
|
||||
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
|
||||
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
|
||||
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
|
||||
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
|
||||
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
|
||||
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
|
||||
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
|
||||
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
|
||||
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
|
||||
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
|
||||
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
|
||||
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
|
||||
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
|
||||
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
|
||||
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
|
||||
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
|
||||
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
|
||||
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
|
||||
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
|
||||
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
|
||||
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
|
||||
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
|
||||
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
|
||||
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
|
||||
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
|
||||
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
|
||||
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
|
||||
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
|
||||
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
|
||||
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
|
||||
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
|
||||
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
|
||||
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
|
||||
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
|
||||
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
|
||||
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
|
||||
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
|
||||
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
|
||||
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
|
||||
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
|
||||
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
|
||||
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
|
||||
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
|
||||
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
|
||||
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
|
||||
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
|
||||
```response
|
||||
No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.)
|
||||
With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.)
|
||||
```
|
||||
|
||||
### Summary {#summary}
|
||||
|
||||
All 3 queries work much faster and read fewer rows.
|
||||
|
||||
```text
|
||||
Query 1
|
||||
|
||||
no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.)
|
||||
projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.)
|
||||
|
||||
|
||||
Query 2
|
||||
|
||||
no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.)
|
||||
projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.)
|
||||
|
||||
Query 3
|
||||
|
||||
no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.)
|
||||
projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.)
|
||||
```
|
||||
|
||||
### Test It in Playground {#playground}
|
||||
### Test it in the Playground {#playground}
|
||||
|
||||
The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).
|
||||
|
26
docs/en/getting-started/index.md
Normal file
26
docs/en/getting-started/index.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/
|
||||
sidebar_position: 0
|
||||
sidebar_label: Overview
|
||||
keywords: [clickhouse, install, tutorial, sample, datasets]
|
||||
pagination_next: 'en/tutorial'
|
||||
---
|
||||
|
||||
# Tutorials and Example Datasets
|
||||
|
||||
We have a lot of resources for helping you get started and learn how ClickHouse works:
|
||||
|
||||
- If you need to get ClickHouse up and running, check out our [Quick Start](../quick-start.mdx)
|
||||
- The [ClickHouse Tutorial](../tutorial.md) analyzes a dataset of New York City taxi rides
|
||||
|
||||
In addition, the sample datasets provide a great experience on working with ClickHouse,
|
||||
learning important techniques and tricks, and seeing how to take advantage of the many powerful
|
||||
functions in ClickHouse. The sample datasets include:
|
||||
|
||||
- The [UK Property Price Paid dataset](../getting-started/example-datasets/uk-price-paid.md) is a good starting point with some interesting SQL queries
|
||||
- The [New York Taxi Data](../getting-started/example-datasets/nyc-taxi.md) has an example of how to insert data from S3 into ClickHouse
|
||||
- The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse
|
||||
- The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables
|
||||
- The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data
|
||||
|
||||
View the **Tutorials and Datasets** menu for a complete list of sample datasets.
|
@ -1,13 +1,34 @@
|
||||
---
|
||||
sidebar_label: Installation
|
||||
sidebar_position: 1
|
||||
keywords: [clickhouse, install, installation, docs]
|
||||
description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
|
||||
slug: /en/getting-started/install
|
||||
title: Installation
|
||||
sidebar_label: Install
|
||||
keywords: [clickhouse, install, getting started, quick start]
|
||||
slug: /en/install
|
||||
---
|
||||
|
||||
## System Requirements {#system-requirements}
|
||||
# Installing ClickHouse
|
||||
|
||||
You have two options for getting up and running with ClickHouse:
|
||||
|
||||
- **[ClickHouse Cloud](https://clickhouse.cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse
|
||||
- **Self-managed ClickHouse:** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture
|
||||
|
||||
## ClickHouse Cloud
|
||||
|
||||
The quickest and easiest way to get up and running with ClickHouse is to create a new service in [ClickHouse Cloud](https://clickhouse.cloud/):
|
||||
|
||||
<div class="eighty-percent">
|
||||
|
||||
![Create a ClickHouse Cloud service](@site/docs/en/_snippets/images/createservice1.png)
|
||||
</div>
|
||||
|
||||
Once your Cloud service is provisioned, you will be able to [connect to it](/docs/en/integrations/connect-a-client.md) and start [inserting data](/docs/en/integrations/data-ingestion.md).
|
||||
|
||||
:::note
|
||||
The [Quick Start](/docs/en/quick-start.mdx) walks through the steps to get a ClickHouse Cloud service up and running, connecting to it, and inserting data.
|
||||
:::
|
||||
|
||||
## Self-Managed Requirements
|
||||
|
||||
### CPU Architecture
|
||||
|
||||
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
|
||||
|
||||
@ -19,6 +40,55 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not
|
||||
|
||||
To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should [build ClickHouse from sources](#from-sources) with proper configuration adjustments.
|
||||
|
||||
ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.
|
||||
|
||||
It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload.
|
||||
|
||||
### RAM {#ram}
|
||||
|
||||
We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries.
|
||||
|
||||
The required volume of RAM depends on:
|
||||
|
||||
- The complexity of queries.
|
||||
- The amount of data that is processed in queries.
|
||||
|
||||
To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use.
|
||||
|
||||
ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details.
|
||||
|
||||
### Swap File {#swap-file}
|
||||
|
||||
Disable the swap file for production environments.
|
||||
|
||||
### Storage Subsystem {#storage-subsystem}
|
||||
|
||||
You need to have 2GB of free disk space to install ClickHouse.
|
||||
|
||||
The volume of storage required for your data should be calculated separately. Assessment should include:
|
||||
|
||||
- Estimation of the data volume.
|
||||
|
||||
You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
|
||||
|
||||
- The data compression coefficient.
|
||||
|
||||
To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
|
||||
|
||||
To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
|
||||
|
||||
### Network {#network}
|
||||
|
||||
If possible, use networks of 10G or higher class.
|
||||
|
||||
The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes.
|
||||
|
||||
### Software {#software}
|
||||
|
||||
ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.
|
||||
|
||||
## Self-Managed Install
|
||||
|
||||
## Available Installation Options {#available-installation-options}
|
||||
|
||||
### From DEB Packages {#install-from-deb-packages}
|
||||
@ -58,9 +128,9 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
|
||||
</details>
|
||||
|
||||
You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
|
||||
You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
|
||||
|
||||
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable).
|
||||
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/).
|
||||
|
||||
#### Packages {#packages}
|
||||
|
||||
@ -105,7 +175,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
|
||||
</details>
|
||||
|
||||
You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
|
||||
You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
|
||||
|
||||
Then run these commands to install packages:
|
||||
|
||||
@ -226,7 +296,7 @@ Use the `clickhouse client` to connect to the server, or `clickhouse local` to p
|
||||
|
||||
### From Sources {#from-sources}
|
||||
|
||||
To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md).
|
||||
To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [Mac OS X](/docs/en/development/build-osx.md).
|
||||
|
||||
You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs.
|
||||
|
||||
@ -281,7 +351,7 @@ If the configuration file is in the current directory, you do not need to specif
|
||||
|
||||
ClickHouse supports access restriction settings. They are located in the `users.xml` file (next to `config.xml`).
|
||||
By default, access is allowed from anywhere for the `default` user, without a password. See `user/default/networks`.
|
||||
For more information, see the section [“Configuration Files”](../operations/configuration-files.md).
|
||||
For more information, see the section [“Configuration Files”](/docs/en/operations/configuration-files.md).
|
||||
|
||||
After launching server, you can use the command-line client to connect to it:
|
||||
|
||||
@ -292,7 +362,7 @@ $ clickhouse-client
|
||||
By default, it connects to `localhost:9000` on behalf of the user `default` without a password. It can also be used to connect to a remote server using `--host` argument.
|
||||
|
||||
The terminal must use UTF-8 encoding.
|
||||
For more information, see the section [“Command-line client”](../interfaces/cli.md).
|
||||
For more information, see the section [“Command-line client”](/docs/en/interfaces/cli.md).
|
||||
|
||||
Example:
|
||||
|
||||
@ -317,6 +387,5 @@ SELECT 1
|
||||
|
||||
**Congratulations, the system works!**
|
||||
|
||||
To continue experimenting, you can download one of the test data sets or go through [tutorial](./../tutorial.md).
|
||||
To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md).
|
||||
|
||||
[Original article](https://clickhouse.com/docs/en/getting_started/install/) <!--hide-->
|
||||
|
@ -3,6 +3,7 @@ slug: /en/interfaces/cli
|
||||
sidebar_position: 17
|
||||
sidebar_label: Command-Line Client
|
||||
---
|
||||
import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_native.md';
|
||||
|
||||
# Command-line Client
|
||||
|
||||
@ -24,26 +25,76 @@ Connected to ClickHouse server version 20.13.1 revision 54442.
|
||||
Different client and server versions are compatible with one another, but some features may not be available in older clients. We recommend using the same version of the client as the server app. When you try to use a client of the older version, then the server, `clickhouse-client` displays the message:
|
||||
|
||||
```response
|
||||
ClickHouse client version is older than ClickHouse server. It may lack support for new features.
|
||||
ClickHouse client version is older than ClickHouse server.
|
||||
It may lack support for new features.
|
||||
```
|
||||
|
||||
## Usage {#cli_usage}
|
||||
|
||||
The client can be used in interactive and non-interactive (batch) mode. To use batch mode, specify the ‘query’ parameter, or send data to ‘stdin’ (it verifies that ‘stdin’ is not a terminal), or both. Similar to the HTTP interface, when using the ‘query’ parameter and sending data to ‘stdin’, the request is a concatenation of the ‘query’ parameter, a line feed, and the data in ‘stdin’. This is convenient for large INSERT queries.
|
||||
The client can be used in interactive and non-interactive (batch) mode.
|
||||
|
||||
Example of using the client to insert data:
|
||||
### Gather your connection details
|
||||
<ConnectionDetails />
|
||||
|
||||
### Interactive
|
||||
|
||||
To connect to your ClickHouse Cloud service, or any ClickHouse server using TLS and passwords, interactively use `--secure`, port 9440, and provide your username and password:
|
||||
|
||||
```bash
|
||||
clickhouse-client --host <HOSTNAME> \
|
||||
--secure \
|
||||
--port 9440 \
|
||||
--user <USERNAME> \
|
||||
--password <PASSWORD>
|
||||
```
|
||||
|
||||
To connect to a self-managed ClickHouse server you will need the details for that server. Whether or not TLS is used, port numbers, and passwords are all configurable. Use the above example for ClickHouse Cloud as a starting point.
|
||||
|
||||
|
||||
### Batch
|
||||
|
||||
To use batch mode, specify the ‘query’ parameter, or send data to ‘stdin’ (it verifies that ‘stdin’ is not a terminal), or both. Similar to the HTTP interface, when using the ‘query’ parameter and sending data to ‘stdin’, the request is a concatenation of the ‘query’ parameter, a line feed, and the data in ‘stdin’. This is convenient for large INSERT queries.
|
||||
|
||||
Examples of using the client to insert data:
|
||||
|
||||
#### Inserting a CSV file into a remote ClickHouse service
|
||||
|
||||
This example is appropriate for ClickHouse Cloud, or any ClickHouse server using TLS and a password. In this example a sample dataset CSV file, `cell_towers.csv` is inserted into an existing table `cell_towers` in the `default` database:
|
||||
|
||||
```bash
|
||||
clickhouse-client --host HOSTNAME.clickhouse.cloud \
|
||||
--secure \
|
||||
--port 9440 \
|
||||
--user default \
|
||||
--password PASSWORD \
|
||||
--query "INSERT INTO cell_towers FORMAT CSVWithNames" \
|
||||
< cell_towers.csv
|
||||
```
|
||||
|
||||
:::note
|
||||
To concentrate on the query syntax, the rest of the examples leave off the connection details (`--host`, `--port`, etc.). Add them in when you try the commands.
|
||||
:::
|
||||
|
||||
#### Three different ways of inserting data
|
||||
|
||||
``` bash
|
||||
$ echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | \
|
||||
clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
```
|
||||
|
||||
$ cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
```bash
|
||||
cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
3, 'some text', '2016-08-14 00:00:00'
|
||||
4, 'some more text', '2016-08-14 00:00:01'
|
||||
_EOF
|
||||
|
||||
$ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
```
|
||||
|
||||
```bash
|
||||
cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
```
|
||||
|
||||
### Notes
|
||||
|
||||
In batch mode, the default data format is TabSeparated. You can set the format in the FORMAT clause of the query.
|
||||
|
||||
By default, you can only process a single query in batch mode. To make multiple queries from a “script,” use the `--multiquery` parameter. This works for all queries except INSERT. Query results are output consecutively without additional separators. Similarly, to process a large number of queries, you can run ‘clickhouse-client’ for each query. Note that it may take tens of milliseconds to launch the ‘clickhouse-client’ program.
|
||||
|
@ -1020,6 +1020,62 @@ Example:
|
||||
}
|
||||
```
|
||||
|
||||
To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). Value of this setting is set to the name of a column, that is used as JSON key for a row in resulting object.
|
||||
Examples:
|
||||
|
||||
For output:
|
||||
|
||||
Let's say we have table `test` with two columns:
|
||||
```
|
||||
┌─object_name─┬─number─┐
|
||||
│ first_obj │ 1 │
|
||||
│ second_obj │ 2 │
|
||||
│ third_obj │ 3 │
|
||||
└─────────────┴────────┘
|
||||
```
|
||||
Let's output it in `JSONObjectEachRow` format and use `format_json_object_each_row_column_for_object_name` setting:
|
||||
|
||||
```sql
|
||||
select * from test settings format_json_object_each_row_column_for_object_name='object_name'
|
||||
```
|
||||
|
||||
The output:
|
||||
```json
|
||||
{
|
||||
"first_obj": {"number": 1},
|
||||
"second_obj": {"number": 2},
|
||||
"third_obj": {"number": 3}
|
||||
}
|
||||
```
|
||||
|
||||
For input:
|
||||
|
||||
Let's say we stored output from previous example in a file with name `data.json`:
|
||||
```sql
|
||||
select * from file('data.json', JSONObjectEachRow, 'object_name String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name'
|
||||
```
|
||||
|
||||
```
|
||||
┌─object_name─┬─number─┐
|
||||
│ first_obj │ 1 │
|
||||
│ second_obj │ 2 │
|
||||
│ third_obj │ 3 │
|
||||
└─────────────┴────────┘
|
||||
```
|
||||
|
||||
It also works in schema inference:
|
||||
|
||||
```sql
|
||||
desc file('data.json', JSONObjectEachRow) settings format_json_object_each_row_column_for_object_name='object_name'
|
||||
```
|
||||
|
||||
```
|
||||
┌─name────────┬─type────────────┐
|
||||
│ object_name │ String │
|
||||
│ number │ Nullable(Int64) │
|
||||
└─────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
|
||||
### Inserting Data {#json-inserting-data}
|
||||
|
||||
|
@ -6,16 +6,32 @@ sidebar_label: MySQL Interface
|
||||
|
||||
# MySQL Interface
|
||||
|
||||
ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting in configuration file:
|
||||
ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
|
||||
|
||||
``` xml
|
||||
<mysql_port>9004</mysql_port>
|
||||
<clickhouse>
|
||||
<mysql_port>9004</mysql_port>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Example of connecting using command-line tool `mysql`:
|
||||
Startup your ClickHouse server and look for a log message similar to the following that mentions Listening for MySQL compatibility protocol:
|
||||
|
||||
```
|
||||
{} <Information> Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004
|
||||
```
|
||||
|
||||
## Connect mysql to ClickHouse
|
||||
|
||||
The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse:
|
||||
|
||||
```bash
|
||||
mysql --protocol tcp -h [hostname] -u [username] -P [port_number] [database_name]
|
||||
```
|
||||
|
||||
For example:
|
||||
|
||||
``` bash
|
||||
$ mysql --protocol tcp -u default -P 9004
|
||||
$ mysql --protocol tcp -h 127.0.0.1 -u default -P 9004 default
|
||||
```
|
||||
|
||||
Output if a connection succeeded:
|
||||
|
@ -41,6 +41,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don
|
||||
- [node-clickhouse](https://github.com/apla/node-clickhouse)
|
||||
- [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
|
||||
- [clickhouse-client](https://github.com/depyronick/clickhouse-client)
|
||||
- [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
|
||||
- Perl
|
||||
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
|
||||
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)
|
||||
|
@ -5,6 +5,9 @@ sidebar_label: ClickHouse Keeper
|
||||
---
|
||||
|
||||
# ClickHouse Keeper
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper.
|
||||
|
||||
|
@ -3,7 +3,11 @@ slug: /en/operations/external-authenticators/
|
||||
sidebar_position: 48
|
||||
sidebar_label: External User Authenticators and Directories
|
||||
title: "External User Authenticators and Directories"
|
||||
pagination_next: 'en/operations/external-authenticators/kerberos'
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
ClickHouse supports authenticating and managing users using external services.
|
||||
|
||||
|
@ -2,6 +2,9 @@
|
||||
slug: /en/operations/external-authenticators/kerberos
|
||||
---
|
||||
# Kerberos
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
Existing and properly configured ClickHouse users can be authenticated via Kerberos authentication protocol.
|
||||
|
||||
|
@ -2,6 +2,9 @@
|
||||
slug: /en/operations/external-authenticators/ldap
|
||||
title: "LDAP"
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this:
|
||||
|
||||
|
@ -2,6 +2,9 @@
|
||||
slug: /en/operations/external-authenticators/ssl-x509
|
||||
title: "SSL X.509 certificate authentication"
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
|
||||
|
||||
|
@ -5,6 +5,9 @@ sidebar_label: Monitoring
|
||||
---
|
||||
|
||||
# Monitoring
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
You can monitor:
|
||||
|
||||
|
@ -3,9 +3,12 @@ slug: /en/operations/optimizing-performance/sampling-query-profiler
|
||||
sidebar_position: 54
|
||||
sidebar_label: Query Profiling
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
# Sampling Query Profiler
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time.
|
||||
|
||||
To use profiler:
|
||||
|
@ -5,6 +5,10 @@ sidebar_label: Testing Hardware
|
||||
title: "How to Test Your Hardware with ClickHouse"
|
||||
---
|
||||
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
You can run a basic ClickHouse performance test on any server without installation of ClickHouse packages.
|
||||
|
||||
|
||||
|
@ -1,60 +0,0 @@
|
||||
---
|
||||
slug: /en/operations/requirements
|
||||
sidebar_position: 44
|
||||
sidebar_label: Requirements
|
||||
---
|
||||
|
||||
# Requirements
|
||||
|
||||
## CPU
|
||||
|
||||
For installation from prebuilt deb packages, use a CPU with x86_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
|
||||
|
||||
ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.
|
||||
|
||||
It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload.
|
||||
|
||||
## RAM {#ram}
|
||||
|
||||
We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries.
|
||||
|
||||
The required volume of RAM depends on:
|
||||
|
||||
- The complexity of queries.
|
||||
- The amount of data that is processed in queries.
|
||||
|
||||
To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct), [JOIN](../sql-reference/statements/select/join.md#select-join) and other operations you use.
|
||||
|
||||
ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details.
|
||||
|
||||
## Swap File {#swap-file}
|
||||
|
||||
Disable the swap file for production environments.
|
||||
|
||||
## Storage Subsystem {#storage-subsystem}
|
||||
|
||||
You need to have 2GB of free disk space to install ClickHouse.
|
||||
|
||||
The volume of storage required for your data should be calculated separately. Assessment should include:
|
||||
|
||||
- Estimation of the data volume.
|
||||
|
||||
You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
|
||||
|
||||
- The data compression coefficient.
|
||||
|
||||
To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
|
||||
|
||||
To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
|
||||
|
||||
## Network {#network}
|
||||
|
||||
If possible, use networks of 10G or higher class.
|
||||
|
||||
The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes.
|
||||
|
||||
## Software {#software}
|
||||
|
||||
ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.
|
||||
|
||||
ClickHouse can also work in other operating system families. See details in the [install guide](../getting-started/install.md) section of the documentation.
|
@ -2,6 +2,7 @@
|
||||
slug: /en/operations/server-configuration-parameters/
|
||||
sidebar_position: 54
|
||||
sidebar_label: Server Configuration Parameters
|
||||
pagination_next: en/operations/server-configuration-parameters/settings
|
||||
---
|
||||
|
||||
# Server Configuration Parameters
|
||||
|
@ -666,6 +666,7 @@ Keys:
|
||||
- `http_proxy` - Configure HTTP proxy for sending crash reports.
|
||||
- `debug` - Sets the Sentry client into debug mode.
|
||||
- `tmp_path` - Filesystem path for temporary crash report state.
|
||||
- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse.
|
||||
|
||||
**Recommended way to use**
|
||||
|
||||
@ -1501,6 +1502,21 @@ If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
|
||||
- Policy should have exactly one volume with local disks.
|
||||
:::
|
||||
|
||||
## max_temporary_data_on_disk_size {#max_temporary_data_on_disk_size}
|
||||
|
||||
Limit the amount of disk space consumed by temporary files in `tmp_path` for the server.
|
||||
Queries that exceed this limit will fail with an exception.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**See also**
|
||||
|
||||
- [max_temporary_data_on_disk_size_for_user](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_user)
|
||||
- [max_temporary_data_on_disk_size_for_query](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_query)
|
||||
- [tmp_path](#tmp-path)
|
||||
- [tmp_policy](#tmp-policy)
|
||||
- [max_server_memory_usage](#max_server_memory_usage)
|
||||
|
||||
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
|
||||
|
||||
Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
@ -2,6 +2,7 @@
|
||||
sidebar_label: Settings
|
||||
sidebar_position: 51
|
||||
slug: /en/operations/settings/
|
||||
pagination_next: en/operations/settings/settings
|
||||
---
|
||||
|
||||
# Settings Overview
|
||||
|
@ -313,4 +313,19 @@ When inserting data, ClickHouse calculates the number of partitions in the inser
|
||||
|
||||
> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
|
||||
|
||||
## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user}
|
||||
|
||||
The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries.
|
||||
Zero means unlimited.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
|
||||
## max_temporary_data_on_disk_size_for_query {#settings_max_temporary_data_on_disk_size_for_query}
|
||||
|
||||
The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries.
|
||||
Zero means unlimited.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
[Original article](https://clickhouse.com/docs/en/operations/settings/query_complexity/) <!--hide-->
|
||||
|
@ -35,7 +35,7 @@ Structure of the `users` section:
|
||||
<database_name>
|
||||
<table_name>
|
||||
<filter>expression</filter>
|
||||
<table_name>
|
||||
</table_name>
|
||||
</database_name>
|
||||
</databases>
|
||||
</user_name>
|
||||
|
@ -668,7 +668,7 @@ log_query_views=1
|
||||
|
||||
## log_formatted_queries {#settings-log-formatted-queries}
|
||||
|
||||
Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table.
|
||||
Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1599,7 +1599,7 @@ Right now it requires `optimize_skip_unused_shards` (the reason behind this is t
|
||||
|
||||
## optimize_throw_if_noop {#setting-optimize_throw_if_noop}
|
||||
|
||||
Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) query didn’t perform a merge.
|
||||
Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/optimize.md) query didn’t perform a merge.
|
||||
|
||||
By default, `OPTIMIZE` returns successfully even if it didn’t do anything. This setting lets you differentiate these situations and get the reason in an exception message.
|
||||
|
||||
@ -2629,12 +2629,6 @@ Sets the maximum number of inserted blocks after which mergeable blocks are drop
|
||||
|
||||
Default value: `64`.
|
||||
|
||||
## temporary_live_view_timeout {#temporary-live-view-timeout}
|
||||
|
||||
Sets the interval in seconds after which [live view](../../sql-reference/statements/create/view.md#live-view) with timeout is deleted.
|
||||
|
||||
Default value: `5`.
|
||||
|
||||
## periodic_live_view_refresh {#periodic-live-view-refresh}
|
||||
|
||||
Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md#live-view) is forced to refresh.
|
||||
@ -3908,6 +3902,13 @@ Controls validation of UTF-8 sequences in JSON output formats, doesn't impact fo
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name}
|
||||
|
||||
The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md#jsonobjecteachrow) format.
|
||||
Column type should be String. If value is empty, default names `row_{i}`will be used for object names.
|
||||
|
||||
Default value: ''.
|
||||
|
||||
## TSV format settings {#tsv-format-settings}
|
||||
|
||||
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}
|
||||
|
@ -5,6 +5,9 @@ sidebar_label: Secured Communication with Zookeeper
|
||||
---
|
||||
|
||||
# Optional secured communication between ClickHouse and Zookeeper
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
You should specify `ssl.keyStore.location`, `ssl.keyStore.password` and `ssl.trustStore.location`, `ssl.trustStore.password` for communication with ClickHouse client over SSL. These options are available from Zookeeper version 3.5.2.
|
||||
|
||||
|
@ -5,7 +5,7 @@ slug: /en/operations/system-tables/columns
|
||||
|
||||
Contains information about columns in all the tables.
|
||||
|
||||
You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.
|
||||
You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md) query, but for multiple tables at once.
|
||||
|
||||
Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field.
|
||||
|
||||
|
@ -11,6 +11,7 @@ Columns:
|
||||
- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system.
|
||||
- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes.
|
||||
- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes.
|
||||
- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space which is not taken by reservations (`free_space` minus the size of reservations taken by merges, inserts, and other disk write operations currently running).
|
||||
- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration.
|
||||
|
||||
**Example**
|
||||
|
@ -4,6 +4,9 @@ sidebar_position: 58
|
||||
sidebar_label: Usage Recommendations
|
||||
title: "Usage Recommendations"
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
## CPU Scaling Governor
|
||||
|
||||
|
@ -294,6 +294,53 @@ Result:
|
||||
|
||||
Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
|
||||
|
||||
## tryDecrypt
|
||||
|
||||
Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key.
|
||||
|
||||
**Examples**
|
||||
|
||||
Let's create a table where `user_id` is the unique user id, `encrypted` is an encrypted string field, `iv` is an initial vector for decrypt/encrypt. Assume that users know their id and the key to decrypt the encrypted field:
|
||||
|
||||
```sql
|
||||
CREATE TABLE decrypt_null (
|
||||
dt DateTime,
|
||||
user_id UInt32,
|
||||
encrypted String,
|
||||
iv String
|
||||
) ENGINE = Memory;
|
||||
```
|
||||
|
||||
Insert some data:
|
||||
|
||||
```sql
|
||||
INSERT INTO decrypt_null VALUES
|
||||
('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'),
|
||||
('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'),
|
||||
('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
dt,
|
||||
user_id,
|
||||
tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) AS value
|
||||
FROM decrypt_null
|
||||
ORDER BY user_id ASC
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌──────────────────dt─┬─user_id─┬─value──┐
|
||||
│ 2022-08-02 00:00:00 │ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 2022-09-02 00:00:00 │ 2 │ value2 │
|
||||
│ 2022-09-02 00:00:01 │ 3 │ ᴺᵁᴸᴸ │
|
||||
└─────────────────────┴─────────┴────────┘
|
||||
```
|
||||
|
||||
## aes_decrypt_mysql
|
||||
|
||||
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
|
||||
|
@ -127,7 +127,7 @@ Adds a comment to the column. If the `IF EXISTS` clause is specified, the query
|
||||
|
||||
Each column can have one comment. If a comment already exists for the column, a new comment overwrites the previous comment.
|
||||
|
||||
Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](../../../sql-reference/statements/misc.md#misc-describe-table) query.
|
||||
Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](../../../sql-reference/statements/describe-table.md) query.
|
||||
|
||||
Example:
|
||||
|
||||
@ -253,7 +253,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest
|
||||
|
||||
There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`).
|
||||
|
||||
If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](../../../sql-reference/statements/insert-into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](../../../sql-reference/statements/misc.md#misc_operations-rename) query and delete the old table. You can use the [clickhouse-copier](../../../operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query.
|
||||
If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](../../../sql-reference/statements/insert-into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](../../../sql-reference/statements/rename.md#rename-table) query and delete the old table. You can use the [clickhouse-copier](../../../operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query.
|
||||
|
||||
The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running.
|
||||
|
||||
|
@ -44,7 +44,7 @@ For `*MergeTree` tables mutations execute by **rewriting whole data parts**. The
|
||||
|
||||
Mutations are totally ordered by their creation order and are applied to each part in that order. Mutations are also partially ordered with `INSERT INTO` queries: data that was inserted into the table before the mutation was submitted will be mutated and data that was inserted after that will not be mutated. Note that mutations do not block inserts in any way.
|
||||
|
||||
A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for non-replicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../../../operations/system-tables/mutations.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](../../../sql-reference/statements/misc.md#kill-mutation) query.
|
||||
A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for non-replicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../../../operations/system-tables/mutations.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](../../../sql-reference/statements/kill.md#kill-mutation) query.
|
||||
|
||||
Entries for finished mutations are not deleted right away (the number of preserved entries is determined by the `finished_mutations_to_keep` storage engine parameter). Older mutation entries are deleted.
|
||||
|
||||
|
@ -319,7 +319,7 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif
|
||||
|
||||
Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed.
|
||||
|
||||
All the rules above are also true for the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example:
|
||||
All the rules above are also true for the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example:
|
||||
|
||||
``` sql
|
||||
OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/check-table
|
||||
sidebar_position: 41
|
||||
sidebar_label: CHECK
|
||||
sidebar_label: CHECK TABLE
|
||||
title: "CHECK TABLE Statement"
|
||||
---
|
||||
|
||||
|
@ -10,7 +10,7 @@ Creates new [roles](../../../operations/access-rights.md#role-management). Role
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
|
@ -13,7 +13,7 @@ Creates a new view. Views can be [normal](#normal-view), [materialized](#materia
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ...
|
||||
```
|
||||
|
||||
Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
|
||||
@ -166,23 +166,6 @@ SELECT * FROM [db.]live_view WHERE ...
|
||||
|
||||
You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
|
||||
|
||||
### WITH TIMEOUT Clause
|
||||
|
||||
When a live view is created with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view.
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
If the timeout value is not specified then the value specified by the [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout) setting is used.
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
|
||||
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
|
||||
```
|
||||
|
||||
### WITH REFRESH Clause
|
||||
|
||||
When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
|
||||
@ -212,20 +195,6 @@ WATCH lv
|
||||
└─────────────────────┴──────────┘
|
||||
```
|
||||
|
||||
You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause.
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
|
||||
```
|
||||
|
||||
After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries.
|
||||
|
||||
```sql
|
||||
WATCH lv
|
||||
```
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/describe-table
|
||||
sidebar_position: 42
|
||||
sidebar_label: DESCRIBE
|
||||
sidebar_label: DESCRIBE TABLE
|
||||
title: "DESCRIBE TABLE"
|
||||
---
|
||||
|
||||
|
@ -221,7 +221,7 @@ By default, a user account or a role has no privileges.
|
||||
|
||||
If a user or a role has no privileges, it is displayed as [NONE](#grant-none) privilege.
|
||||
|
||||
Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`.
|
||||
Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/optimize.md) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`.
|
||||
|
||||
### SELECT
|
||||
|
||||
@ -304,11 +304,11 @@ Examples of how this hierarchy is treated:
|
||||
- The `MODIFY SETTING` privilege allows modifying table engine settings. It does not affect settings or server configuration parameters.
|
||||
- The `ATTACH` operation needs the [CREATE](#grant-create) privilege.
|
||||
- The `DETACH` operation needs the [DROP](#grant-drop) privilege.
|
||||
- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
|
||||
- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/kill.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
|
||||
|
||||
### CREATE
|
||||
|
||||
Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/misc.md#attach) DDL-queries according to the following hierarchy of privileges:
|
||||
Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/attach.md) DDL-queries according to the following hierarchy of privileges:
|
||||
|
||||
- `CREATE`. Level: `GROUP`
|
||||
- `CREATE DATABASE`. Level: `DATABASE`
|
||||
@ -323,7 +323,7 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A
|
||||
|
||||
### DROP
|
||||
|
||||
Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach) queries according to the following hierarchy of privileges:
|
||||
Allows executing [DROP](../../sql-reference/statements/drop.md) and [DETACH](../../sql-reference/statements/detach.md) queries according to the following hierarchy of privileges:
|
||||
|
||||
- `DROP`. Level: `GROUP`
|
||||
- `DROP DATABASE`. Level: `DATABASE`
|
||||
@ -333,13 +333,13 @@ Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH
|
||||
|
||||
### TRUNCATE
|
||||
|
||||
Allows executing [TRUNCATE](../../sql-reference/statements/misc.md#truncate-statement) queries.
|
||||
Allows executing [TRUNCATE](../../sql-reference/statements/truncate.md) queries.
|
||||
|
||||
Privilege level: `TABLE`.
|
||||
|
||||
### OPTIMIZE
|
||||
|
||||
Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/misc.md#misc_operations-optimize) queries.
|
||||
Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/optimize.md) queries.
|
||||
|
||||
Privilege level: `TABLE`.
|
||||
|
||||
@ -359,7 +359,7 @@ A user has the `SHOW` privilege if it has any other privilege concerning the spe
|
||||
|
||||
### KILL QUERY
|
||||
|
||||
Allows executing [KILL](../../sql-reference/statements/misc.md#kill-query-statement) queries according to the following hierarchy of privileges:
|
||||
Allows executing [KILL](../../sql-reference/statements/kill.md#kill-query) queries according to the following hierarchy of privileges:
|
||||
|
||||
Privilege level: `GLOBAL`.
|
||||
|
||||
|
@ -430,9 +430,9 @@ FROM
|
||||
### Cumulative sum.
|
||||
|
||||
```sql
|
||||
CREATE TABLE events
|
||||
CREATE TABLE warehouse
|
||||
(
|
||||
`metric` String,
|
||||
`item` String,
|
||||
`ts` DateTime,
|
||||
`value` Float
|
||||
)
|
||||
|
@ -488,7 +488,7 @@ FORMAT TSV
|
||||
max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion."
|
||||
```
|
||||
|
||||
Optionally you can [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later:
|
||||
Optionally you can [OPTIMIZE](../sql-reference/statements/optimize.md) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL"
|
||||
|
@ -34,6 +34,7 @@ sidebar_label: "Клиентские библиотеки от сторонни
|
||||
- [node-clickhouse](https://github.com/apla/node-clickhouse)
|
||||
- [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
|
||||
- [clickhouse-client](https://github.com/depyronick/clickhouse-client)
|
||||
- [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
|
||||
- Perl
|
||||
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
|
||||
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)
|
||||
|
@ -64,7 +64,7 @@ ClickHouse поддерживает управление доступом на
|
||||
|
||||
- [CREATE USER](../sql-reference/statements/create/user.md#create-user-statement)
|
||||
- [ALTER USER](../sql-reference/statements/alter/user.md)
|
||||
- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement)
|
||||
- [DROP USER](../sql-reference/statements/drop.md#drop-user)
|
||||
- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
|
||||
|
||||
### Применение настроек {#access-control-settings-applying}
|
||||
@ -91,9 +91,9 @@ ClickHouse поддерживает управление доступом на
|
||||
|
||||
- [CREATE ROLE](../sql-reference/statements/create/index.md#create-role-statement)
|
||||
- [ALTER ROLE](../sql-reference/statements/alter/role.md)
|
||||
- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement)
|
||||
- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement)
|
||||
- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement)
|
||||
- [DROP ROLE](../sql-reference/statements/drop.md#drop-role)
|
||||
- [SET ROLE](../sql-reference/statements/set-role.md)
|
||||
- [SET DEFAULT ROLE](../sql-reference/statements/set-role.md#set-default-role)
|
||||
- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
|
||||
|
||||
Привилегии можно присвоить роли с помощью запроса [GRANT](../sql-reference/statements/grant.md). Для отзыва привилегий у роли ClickHouse предоставляет запрос [REVOKE](../sql-reference/statements/revoke.md).
|
||||
@ -106,7 +106,7 @@ ClickHouse поддерживает управление доступом на
|
||||
|
||||
- [CREATE ROW POLICY](../sql-reference/statements/create/index.md#create-row-policy-statement)
|
||||
- [ALTER ROW POLICY](../sql-reference/statements/alter/row-policy.md)
|
||||
- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement)
|
||||
- [DROP ROW POLICY](../sql-reference/statements/drop.md#drop-row-policy)
|
||||
- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
|
||||
|
||||
|
||||
@ -118,7 +118,7 @@ ClickHouse поддерживает управление доступом на
|
||||
|
||||
- [CREATE SETTINGS PROFILE](../sql-reference/statements/create/index.md#create-settings-profile-statement)
|
||||
- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter/settings-profile.md)
|
||||
- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement)
|
||||
- [DROP SETTINGS PROFILE](../sql-reference/statements/drop.md#drop-settings-profile)
|
||||
- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
|
||||
|
||||
|
||||
@ -132,7 +132,7 @@ ClickHouse поддерживает управление доступом на
|
||||
|
||||
- [CREATE QUOTA](../sql-reference/statements/create/index.md#create-quota-statement)
|
||||
- [ALTER QUOTA](../sql-reference/statements/alter/quota.md)
|
||||
- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement)
|
||||
- [DROP QUOTA](../sql-reference/statements/drop.md#drop-quota)
|
||||
- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
|
||||
|
||||
|
||||
|
@ -624,6 +624,7 @@ ClickHouse поддерживает динамическое изменение
|
||||
- `http_proxy` - Настройка HTTP proxy для отсылки отчетов о сбоях.
|
||||
- `debug` - Настроить клиентскую библиотеку Sentry в debug режим.
|
||||
- `tmp_path` - Путь в файловой системе для временного хранения состояния отчетов о сбоях перед отправкой на сервер Sentry.
|
||||
- `environment` - Произвольное название среды, в которой запущен сервер ClickHouse, которое будет упомянуто в каждом отчете от сбое. По умолчанию имеет значение `test` или `prod` в зависимости от версии ClickHouse.
|
||||
|
||||
**Рекомендованные настройки**
|
||||
|
||||
|
@ -1986,7 +1986,7 @@ SELECT * FROM test_table
|
||||
|
||||
## optimize_throw_if_noop {#setting-optimize_throw_if_noop}
|
||||
|
||||
Включает или отключает генерирование исключения в случаях, когда запрос [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) не выполняет мёрж.
|
||||
Включает или отключает генерирование исключения в случаях, когда запрос [OPTIMIZE](../../sql-reference/statements/optimize.md) не выполняет мёрж.
|
||||
|
||||
По умолчанию, `OPTIMIZE` завершается успешно и в тех случаях, когда он ничего не сделал. Настройка позволяет отделить подобные случаи и включает генерирование исключения с поясняющим сообщением.
|
||||
|
||||
@ -3258,12 +3258,6 @@ SELECT * FROM test2;
|
||||
|
||||
Значение по умолчанию: `64`.
|
||||
|
||||
## temporary_live_view_timeout {#temporary-live-view-timeout}
|
||||
|
||||
Задает время в секундах, после которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) удаляется.
|
||||
|
||||
Значение по умолчанию: `5`.
|
||||
|
||||
## periodic_live_view_refresh {#periodic-live-view-refresh}
|
||||
|
||||
Задает время в секундах, по истечении которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) с установленным автообновлением обновляется.
|
||||
|
@ -5,7 +5,7 @@ slug: /ru/operations/system-tables/columns
|
||||
|
||||
Содержит информацию о столбцах всех таблиц.
|
||||
|
||||
С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу.
|
||||
С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md), но для многих таблиц сразу.
|
||||
|
||||
Колонки [временных таблиц](../../sql-reference/statements/create/table.md#temporary-tables) содержатся в `system.columns` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких колонок пустое.
|
||||
|
||||
|
@ -11,5 +11,6 @@ Cодержит информацию о дисках, заданных в [ко
|
||||
- `path` ([String](../../sql-reference/data-types/string.md)) — путь к точке монтирования в файловой системе.
|
||||
- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — свободное место на диске в байтах.
|
||||
- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — объём диска в байтах.
|
||||
- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — не зарезервированное cвободное место в байтах (`free_space` минус размер места, зарезервированного на выполняемые в данный момент фоновые слияния, вставки и другие операции записи на диск).
|
||||
- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — место, которое должно остаться свободным на диске в байтах. Задаётся значением параметра `keep_free_space_bytes` конфигурации дисков.
|
||||
|
||||
|
@ -1053,6 +1053,7 @@ formatDateTime(Time, Format[, Timezone])
|
||||
| %w | номер дня недели, начиная с воскресенья (0-6) | 2 |
|
||||
| %y | год, последние 2 цифры (00-99) | 18 |
|
||||
| %Y | год, 4 цифры | 2018 |
|
||||
| %z | Смещение времени от UTC +HHMM или -HHMM | -0500 |
|
||||
| %% | символ % | % |
|
||||
|
||||
**Пример**
|
||||
|
@ -10,5 +10,4 @@ sidebar_position: 28
|
||||
- [INSERT INTO](statements/insert-into.md)
|
||||
- [CREATE](statements/create/index.md)
|
||||
- [ALTER](statements/alter/index.md#query_language_queries_alter)
|
||||
- [Прочие виды запросов](statements/misc.md)
|
||||
|
||||
|
@ -128,7 +128,7 @@ COMMENT COLUMN [IF EXISTS] name 'Text comment'
|
||||
|
||||
Каждый столбец может содержать только один комментарий. При выполнении запроса существующий комментарий заменяется на новый.
|
||||
|
||||
Посмотреть комментарии можно в столбце `comment_expression` из запроса [DESCRIBE TABLE](../misc.md#misc-describe-table).
|
||||
Посмотреть комментарии можно в столбце `comment_expression` из запроса [DESCRIBE TABLE](../describe-table.md).
|
||||
|
||||
Пример:
|
||||
|
||||
@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp;
|
||||
|
||||
Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`).
|
||||
|
||||
Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../misc.md#misc_operations-rename), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
|
||||
Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
|
||||
|
||||
Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`.
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /ru/sql-reference/statements/check-table
|
||||
sidebar_position: 41
|
||||
sidebar_label: CHECK
|
||||
sidebar_label: CHECK TABLE
|
||||
---
|
||||
|
||||
# CHECK TABLE Statement {#check-table}
|
||||
|
@ -11,19 +11,19 @@ sidebar_label: "Роль"
|
||||
Синтаксис:
|
||||
|
||||
```sql
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
||||
## Управление ролями {#managing-roles}
|
||||
|
||||
Одному пользователю можно назначить несколько ролей. Пользователи могут применять назначенные роли в произвольных комбинациях с помощью выражения [SET ROLE](../misc.md#set-role-statement). Конечный объем привилегий — это комбинация всех привилегий всех примененных ролей. Если у пользователя имеются привилегии, присвоенные его аккаунту напрямую, они также прибавляются к привилегиям, присвоенным через роли.
|
||||
Одному пользователю можно назначить несколько ролей. Пользователи могут применять назначенные роли в произвольных комбинациях с помощью выражения [SET ROLE](../set-role.md). Конечный объем привилегий — это комбинация всех привилегий всех примененных ролей. Если у пользователя имеются привилегии, присвоенные его аккаунту напрямую, они также прибавляются к привилегиям, присвоенным через роли.
|
||||
|
||||
Роли по умолчанию применяются при входе пользователя в систему. Установить роли по умолчанию можно с помощью выражений [SET DEFAULT ROLE](../misc.md#set-default-role-statement) или [ALTER USER](../alter/index.md#alter-user-statement).
|
||||
Роли по умолчанию применяются при входе пользователя в систему. Установить роли по умолчанию можно с помощью выражений [SET DEFAULT ROLE](../set-role.md#set-default-role) или [ALTER USER](../alter/index.md#alter-user-statement).
|
||||
|
||||
Для отзыва роли используется выражение [REVOKE](../../../sql-reference/statements/revoke.md).
|
||||
|
||||
Для удаления роли используется выражение [DROP ROLE](../misc.md#drop-role-statement). Удаленная роль автоматически отзывается у всех пользователей, которым была назначена.
|
||||
Для удаления роли используется выражение [DROP ROLE](../drop.md#drop-role). Удаленная роль автоматически отзывается у всех пользователей, которым была назначена.
|
||||
|
||||
## Примеры {#create-role-examples}
|
||||
|
||||
|
@ -11,7 +11,7 @@ sidebar_label: "Представление"
|
||||
## Обычные представления {#normal}
|
||||
|
||||
``` sql
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ...
|
||||
```
|
||||
|
||||
Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление — это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md).
|
||||
@ -156,23 +156,6 @@ SELECT * FROM [db.]live_view WHERE ...
|
||||
|
||||
Чтобы принудительно обновить LIVE-представление, используйте запрос `ALTER LIVE VIEW [db.]table_name REFRESH`.
|
||||
|
||||
### Секция WITH TIMEOUT {#live-view-with-timeout}
|
||||
|
||||
LIVE-представление, созданное с параметром `WITH TIMEOUT`, будет автоматически удалено через определенное количество секунд с момента предыдущего запроса [WATCH](../../../sql-reference/statements/watch.md), примененного к данному LIVE-представлению.
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
Если временной промежуток не указан, используется значение настройки [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout).
|
||||
|
||||
**Пример:**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
|
||||
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
|
||||
```
|
||||
|
||||
### Секция WITH REFRESH {#live-view-with-refresh}
|
||||
|
||||
LIVE-представление, созданное с параметром `WITH REFRESH`, будет автоматически обновляться через указанные промежутки времени, начиная с момента последнего обновления.
|
||||
@ -202,20 +185,6 @@ WATCH lv;
|
||||
└─────────────────────┴──────────┘
|
||||
```
|
||||
|
||||
Параметры `WITH TIMEOUT` и `WITH REFRESH` можно сочетать с помощью `AND`.
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
**Пример:**
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
|
||||
```
|
||||
|
||||
По истечении 15 секунд представление будет автоматически удалено, если нет активного запроса `WATCH`.
|
||||
|
||||
```sql
|
||||
WATCH lv;
|
||||
```
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /ru/sql-reference/statements/describe-table
|
||||
sidebar_position: 42
|
||||
sidebar_label: DESCRIBE
|
||||
sidebar_label: DESCRIBE TABLE
|
||||
---
|
||||
|
||||
# DESCRIBE TABLE {#misc-describe-table}
|
||||
|
@ -221,7 +221,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
|
||||
|
||||
Отсутствие привилегий у пользователя или роли отображается как привилегия [NONE](#grant-none).
|
||||
|
||||
Выполнение некоторых запросов требует определенного набора привилегий. Например, чтобы выполнить запрос [RENAME](misc.md#misc_operations-rename), нужны следующие привилегии: `SELECT`, `CREATE TABLE`, `INSERT` и `DROP TABLE`.
|
||||
Выполнение некоторых запросов требует определенного набора привилегий. Например, чтобы выполнить запрос [RENAME](rename.md#rename-table), нужны следующие привилегии: `SELECT`, `CREATE TABLE`, `INSERT` и `DROP TABLE`.
|
||||
|
||||
|
||||
### SELECT {#grant-select}
|
||||
@ -309,7 +309,7 @@ GRANT INSERT(x,y) ON db.table TO john
|
||||
|
||||
### CREATE {#grant-create}
|
||||
|
||||
Разрешает выполнять DDL-запросы [CREATE](../../sql-reference/statements/create/index.md) и [ATTACH](misc.md#attach) в соответствии со следующей иерархией привилегий:
|
||||
Разрешает выполнять DDL-запросы [CREATE](../../sql-reference/statements/create/index.md) и [ATTACH](attach.md) в соответствии со следующей иерархией привилегий:
|
||||
|
||||
- `CREATE`. Уровень: `GROUP`
|
||||
- `CREATE DATABASE`. Уровень: `DATABASE`
|
||||
@ -324,7 +324,7 @@ GRANT INSERT(x,y) ON db.table TO john
|
||||
|
||||
### DROP {#grant-drop}
|
||||
|
||||
Разрешает выполнять запросы [DROP](misc.md#drop) и [DETACH](misc.md#detach-statement) в соответствии со следующей иерархией привилегий:
|
||||
Разрешает выполнять запросы [DROP](drop.md) и [DETACH](detach.md) в соответствии со следующей иерархией привилегий:
|
||||
|
||||
- `DROP`. Уровень: `GROUP`
|
||||
- `DROP DATABASE`. Уровень: `DATABASE`
|
||||
@ -340,7 +340,7 @@ GRANT INSERT(x,y) ON db.table TO john
|
||||
|
||||
### OPTIMIZE {#grant-optimize}
|
||||
|
||||
Разрешает выполнять запросы [OPTIMIZE TABLE](misc.md#misc_operations-optimize).
|
||||
Разрешает выполнять запросы [OPTIMIZE TABLE](optimize.md).
|
||||
|
||||
Уровень: `TABLE`.
|
||||
|
||||
|
@ -1,10 +1,460 @@
|
||||
---
|
||||
slug: /zh/getting-started/example-datasets/brown-benchmark
|
||||
sidebar_label: Brown University Benchmark
|
||||
description: A new analytical benchmark for machine-generated log data
|
||||
title: "Brown University Benchmark"
|
||||
sidebar_label: 布朗大学基准
|
||||
description: 机器生成日志数据的新分析基准
|
||||
title: "布朗大学基准"
|
||||
---
|
||||
|
||||
import Content from '@site/docs/en/getting-started/example-datasets/brown-benchmark.md';
|
||||
`MgBench` 是机器生成的日志数据的新分析基准,[Andrew Crotty](http://cs.brown.edu/people/acrotty/)。
|
||||
|
||||
<Content />
|
||||
下载数据:
|
||||
|
||||
```bash
|
||||
wget https://datasets.clickhouse.com/mgbench{1..3}.csv.xz
|
||||
```
|
||||
|
||||
解压数据:
|
||||
|
||||
```bash
|
||||
xz -v -d mgbench{1..3}.csv.xz
|
||||
```
|
||||
|
||||
创建数据库和表:
|
||||
|
||||
```sql
|
||||
CREATE DATABASE mgbench;
|
||||
```
|
||||
|
||||
```sql
|
||||
USE mgbench;
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE mgbench.logs1 (
|
||||
log_time DateTime,
|
||||
machine_name LowCardinality(String),
|
||||
machine_group LowCardinality(String),
|
||||
cpu_idle Nullable(Float32),
|
||||
cpu_nice Nullable(Float32),
|
||||
cpu_system Nullable(Float32),
|
||||
cpu_user Nullable(Float32),
|
||||
cpu_wio Nullable(Float32),
|
||||
disk_free Nullable(Float32),
|
||||
disk_total Nullable(Float32),
|
||||
part_max_used Nullable(Float32),
|
||||
load_fifteen Nullable(Float32),
|
||||
load_five Nullable(Float32),
|
||||
load_one Nullable(Float32),
|
||||
mem_buffers Nullable(Float32),
|
||||
mem_cached Nullable(Float32),
|
||||
mem_free Nullable(Float32),
|
||||
mem_shared Nullable(Float32),
|
||||
swap_free Nullable(Float32),
|
||||
bytes_in Nullable(Float32),
|
||||
bytes_out Nullable(Float32)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (machine_group, machine_name, log_time);
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
CREATE TABLE mgbench.logs2 (
|
||||
log_time DateTime,
|
||||
client_ip IPv4,
|
||||
request String,
|
||||
status_code UInt16,
|
||||
object_size UInt64
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY log_time;
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
CREATE TABLE mgbench.logs3 (
|
||||
log_time DateTime64,
|
||||
device_id FixedString(15),
|
||||
device_name LowCardinality(String),
|
||||
device_type LowCardinality(String),
|
||||
device_floor UInt8,
|
||||
event_type LowCardinality(String),
|
||||
event_unit FixedString(1),
|
||||
event_value Nullable(Float32)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (event_type, log_time);
|
||||
```
|
||||
|
||||
插入数据:
|
||||
|
||||
```
|
||||
clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
|
||||
clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
|
||||
clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
|
||||
```
|
||||
|
||||
## 运行基准查询:
|
||||
|
||||
```sql
|
||||
USE mgbench;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Q1.1: 自午夜以来每个 Web 服务器的 CPU/网络利用率是多少?
|
||||
|
||||
SELECT machine_name,
|
||||
MIN(cpu) AS cpu_min,
|
||||
MAX(cpu) AS cpu_max,
|
||||
AVG(cpu) AS cpu_avg,
|
||||
MIN(net_in) AS net_in_min,
|
||||
MAX(net_in) AS net_in_max,
|
||||
AVG(net_in) AS net_in_avg,
|
||||
MIN(net_out) AS net_out_min,
|
||||
MAX(net_out) AS net_out_max,
|
||||
AVG(net_out) AS net_out_avg
|
||||
FROM (
|
||||
SELECT machine_name,
|
||||
COALESCE(cpu_user, 0.0) AS cpu,
|
||||
COALESCE(bytes_in, 0.0) AS net_in,
|
||||
COALESCE(bytes_out, 0.0) AS net_out
|
||||
FROM logs1
|
||||
WHERE machine_name IN ('anansi','aragog','urd')
|
||||
AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
|
||||
) AS r
|
||||
GROUP BY machine_name;
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
-- Q1.2:最近一天有哪些机房的机器离线?
|
||||
|
||||
SELECT machine_name,
|
||||
log_time
|
||||
FROM logs1
|
||||
WHERE (machine_name LIKE 'cslab%' OR
|
||||
machine_name LIKE 'mslab%')
|
||||
AND load_one IS NULL
|
||||
AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
|
||||
ORDER BY machine_name,
|
||||
log_time;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Q1.3:特定工作站过去 10 天的每小时的平均指标是多少?
|
||||
|
||||
SELECT dt,
|
||||
hr,
|
||||
AVG(load_fifteen) AS load_fifteen_avg,
|
||||
AVG(load_five) AS load_five_avg,
|
||||
AVG(load_one) AS load_one_avg,
|
||||
AVG(mem_free) AS mem_free_avg,
|
||||
AVG(swap_free) AS swap_free_avg
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
EXTRACT(HOUR FROM log_time) AS hr,
|
||||
load_fifteen,
|
||||
load_five,
|
||||
load_one,
|
||||
mem_free,
|
||||
swap_free
|
||||
FROM logs1
|
||||
WHERE machine_name = 'babbage'
|
||||
AND load_fifteen IS NOT NULL
|
||||
AND load_five IS NOT NULL
|
||||
AND load_one IS NOT NULL
|
||||
AND mem_free IS NOT NULL
|
||||
AND swap_free IS NOT NULL
|
||||
AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
|
||||
) AS r
|
||||
GROUP BY dt,
|
||||
hr
|
||||
ORDER BY dt,
|
||||
hr;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Q1.4: 1 个月内,每台服务器的磁盘 I/O 阻塞的频率是多少?
|
||||
|
||||
SELECT machine_name,
|
||||
COUNT(*) AS spikes
|
||||
FROM logs1
|
||||
WHERE machine_group = 'Servers'
|
||||
AND cpu_wio > 0.99
|
||||
AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
|
||||
AND log_time < TIMESTAMP '2017-01-01 00:00:00'
|
||||
GROUP BY machine_name
|
||||
ORDER BY spikes DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Q1.5:哪些外部可访问的虚拟机的运行内存不足?
|
||||
|
||||
SELECT machine_name,
|
||||
dt,
|
||||
MIN(mem_free) AS mem_free_min
|
||||
FROM (
|
||||
SELECT machine_name,
|
||||
CAST(log_time AS DATE) AS dt,
|
||||
mem_free
|
||||
FROM logs1
|
||||
WHERE machine_group = 'DMZ'
|
||||
AND mem_free IS NOT NULL
|
||||
) AS r
|
||||
GROUP BY machine_name,
|
||||
dt
|
||||
HAVING MIN(mem_free) < 10000
|
||||
ORDER BY machine_name,
|
||||
dt;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Q1.6: 每小时所有文件服务器的总网络流量是多少?
|
||||
|
||||
SELECT dt,
|
||||
hr,
|
||||
SUM(net_in) AS net_in_sum,
|
||||
SUM(net_out) AS net_out_sum,
|
||||
SUM(net_in) + SUM(net_out) AS both_sum
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
EXTRACT(HOUR FROM log_time) AS hr,
|
||||
COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
|
||||
COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
|
||||
FROM logs1
|
||||
WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
|
||||
'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
|
||||
'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
|
||||
'poprocks','razzles','runts','smarties','smuggler','spree','stride',
|
||||
'tootsie','trident','wrigley','york')
|
||||
) AS r
|
||||
GROUP BY dt,
|
||||
hr
|
||||
ORDER BY both_sum DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Q2.1:过去 2 周内哪些请求导致了服务器错误?
|
||||
|
||||
SELECT *
|
||||
FROM logs2
|
||||
WHERE status_code >= 500
|
||||
AND log_time >= TIMESTAMP '2012-12-18 00:00:00'
|
||||
ORDER BY log_time;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Q2.2:在特定的某 2 周内,用户密码文件是否被泄露了?
|
||||
|
||||
SELECT *
|
||||
FROM logs2
|
||||
WHERE status_code >= 200
|
||||
AND status_code < 300
|
||||
AND request LIKE '%/etc/passwd%'
|
||||
AND log_time >= TIMESTAMP '2012-05-06 00:00:00'
|
||||
AND log_time < TIMESTAMP '2012-05-20 00:00:00';
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
-- Q2.3:过去一个月顶级请求的平均路径深度是多少?
|
||||
|
||||
SELECT top_level,
|
||||
AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg
|
||||
FROM (
|
||||
SELECT SUBSTRING(request FROM 1 FOR len) AS top_level,
|
||||
request
|
||||
FROM (
|
||||
SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len,
|
||||
request
|
||||
FROM logs2
|
||||
WHERE status_code >= 200
|
||||
AND status_code < 300
|
||||
AND log_time >= TIMESTAMP '2012-12-01 00:00:00'
|
||||
) AS r
|
||||
WHERE len > 0
|
||||
) AS s
|
||||
WHERE top_level IN ('/about','/courses','/degrees','/events',
|
||||
'/grad','/industry','/news','/people',
|
||||
'/publications','/research','/teaching','/ugrad')
|
||||
GROUP BY top_level
|
||||
ORDER BY top_level;
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
-- Q2.4:在过去的 3 个月里,哪些客户端发出了过多的请求?
|
||||
|
||||
SELECT client_ip,
|
||||
COUNT(*) AS num_requests
|
||||
FROM logs2
|
||||
WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00'
|
||||
GROUP BY client_ip
|
||||
HAVING COUNT(*) >= 100000
|
||||
ORDER BY num_requests DESC;
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
-- Q2.5:每天的独立访问者数量是多少?
|
||||
|
||||
SELECT dt,
|
||||
COUNT(DISTINCT client_ip)
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
client_ip
|
||||
FROM logs2
|
||||
) AS r
|
||||
GROUP BY dt
|
||||
ORDER BY dt;
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
-- Q2.6:平均和最大数据传输速率(Gbps)是多少?
|
||||
|
||||
SELECT AVG(transfer) / 125000000.0 AS transfer_avg,
|
||||
MAX(transfer) / 125000000.0 AS transfer_max
|
||||
FROM (
|
||||
SELECT log_time,
|
||||
SUM(object_size) AS transfer
|
||||
FROM logs2
|
||||
GROUP BY log_time
|
||||
) AS r;
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
-- Q3.1:自 2019/11/29 17:00 以来,室温是否达到过冰点?
|
||||
|
||||
SELECT *
|
||||
FROM logs3
|
||||
WHERE event_type = 'temperature'
|
||||
AND event_value <= 32.0
|
||||
AND log_time >= '2019-11-29 17:00:00.000';
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
-- Q3.4:在过去的 6 个月里,每扇门打开的频率是多少?
|
||||
|
||||
SELECT device_name,
|
||||
device_floor,
|
||||
COUNT(*) AS ct
|
||||
FROM logs3
|
||||
WHERE event_type = 'door_open'
|
||||
AND log_time >= '2019-06-01 00:00:00.000'
|
||||
GROUP BY device_name,
|
||||
device_floor
|
||||
ORDER BY ct DESC;
|
||||
```
|
||||
|
||||
下面的查询 3.5 使用了 UNION 关键词。设置该模式以便组合 SELECT 的查询结果。该设置仅在未明确指定 UNION ALL 或 UNION DISTINCT 但使用了 UNION 进行共享时使用。
|
||||
|
||||
```sql
|
||||
SET union_default_mode = 'DISTINCT'
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Q3.5: 在冬季和夏季,建筑物内哪些地方会出现较大的温度变化?
|
||||
|
||||
WITH temperature AS (
|
||||
SELECT dt,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor
|
||||
FROM (
|
||||
SELECT dt,
|
||||
hr,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor,
|
||||
AVG(event_value) AS temperature_hourly_avg
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
EXTRACT(HOUR FROM log_time) AS hr,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor,
|
||||
event_value
|
||||
FROM logs3
|
||||
WHERE event_type = 'temperature'
|
||||
) AS r
|
||||
GROUP BY dt,
|
||||
hr,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor
|
||||
) AS s
|
||||
GROUP BY dt,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor
|
||||
HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0
|
||||
)
|
||||
SELECT DISTINCT device_name,
|
||||
device_type,
|
||||
device_floor,
|
||||
'WINTER'
|
||||
FROM temperature
|
||||
WHERE dt >= DATE '2018-12-01'
|
||||
AND dt < DATE '2019-03-01'
|
||||
UNION
|
||||
SELECT DISTINCT device_name,
|
||||
device_type,
|
||||
device_floor,
|
||||
'SUMMER'
|
||||
FROM temperature
|
||||
WHERE dt >= DATE '2019-06-01'
|
||||
AND dt < DATE '2019-09-01';
|
||||
```
|
||||
|
||||
|
||||
```sql
|
||||
-- Q3.6:对于每种类别的设备,每月的功耗指标是什么?
|
||||
|
||||
SELECT yr,
|
||||
mo,
|
||||
SUM(coffee_hourly_avg) AS coffee_monthly_sum,
|
||||
AVG(coffee_hourly_avg) AS coffee_monthly_avg,
|
||||
SUM(printer_hourly_avg) AS printer_monthly_sum,
|
||||
AVG(printer_hourly_avg) AS printer_monthly_avg,
|
||||
SUM(projector_hourly_avg) AS projector_monthly_sum,
|
||||
AVG(projector_hourly_avg) AS projector_monthly_avg,
|
||||
SUM(vending_hourly_avg) AS vending_monthly_sum,
|
||||
AVG(vending_hourly_avg) AS vending_monthly_avg
|
||||
FROM (
|
||||
SELECT dt,
|
||||
yr,
|
||||
mo,
|
||||
hr,
|
||||
AVG(coffee) AS coffee_hourly_avg,
|
||||
AVG(printer) AS printer_hourly_avg,
|
||||
AVG(projector) AS projector_hourly_avg,
|
||||
AVG(vending) AS vending_hourly_avg
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
EXTRACT(YEAR FROM log_time) AS yr,
|
||||
EXTRACT(MONTH FROM log_time) AS mo,
|
||||
EXTRACT(HOUR FROM log_time) AS hr,
|
||||
CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee,
|
||||
CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer,
|
||||
CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector,
|
||||
CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending
|
||||
FROM logs3
|
||||
WHERE device_type = 'meter'
|
||||
) AS r
|
||||
GROUP BY dt,
|
||||
yr,
|
||||
mo,
|
||||
hr
|
||||
) AS s
|
||||
GROUP BY yr,
|
||||
mo
|
||||
ORDER BY yr,
|
||||
mo;
|
||||
```
|
||||
|
||||
此数据集可在 [Playground](https://play.clickhouse.com/play?user=play) 中进行交互式的请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
|
||||
|
@ -1,10 +1,450 @@
|
||||
---
|
||||
slug: /zh/getting-started/example-datasets/uk-price-paid
|
||||
sidebar_label: UK Property Price Paid
|
||||
sidebar_label: 英国房地产支付价格
|
||||
sidebar_position: 1
|
||||
title: "UK Property Price Paid"
|
||||
title: "英国房地产支付价格"
|
||||
---
|
||||
|
||||
import Content from '@site/docs/en/getting-started/example-datasets/uk-price-paid.md';
|
||||
该数据集包含自 1995 年以来有关英格兰和威尔士房地产价格的数据。未压缩的大小约为 4 GiB,在 ClickHouse 中大约需要 278 MiB。
|
||||
|
||||
<Content />
|
||||
来源:https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
|
||||
字段说明:https://www.gov.uk/guidance/about-the-price-data
|
||||
|
||||
包含 HM Land Registry data © Crown copyright and database right 2021.。此数据集需在 Open Government License v3.0 的许可下使用。
|
||||
|
||||
## 创建表 {#create-table}
|
||||
|
||||
```sql
|
||||
CREATE TABLE uk_price_paid
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
|
||||
is_new UInt8,
|
||||
duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2);
|
||||
```
|
||||
|
||||
## 预处理和插入数据 {#preprocess-import-data}
|
||||
|
||||
我们将使用 `url` 函数将数据流式传输到 ClickHouse。我们需要首先预处理一些传入的数据,其中包括:
|
||||
|
||||
- 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`,因为这更适合存储和查询
|
||||
- 将`time` 字段转换为日期为它只包含 00:00 时间
|
||||
- 忽略 [UUid](../../sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析
|
||||
- 使用 [transform](../../sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段
|
||||
- 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1
|
||||
- 删除最后两列,因为它们都具有相同的值(即 0)
|
||||
|
||||
`url` 函数将来自网络服务器的数据流式传输到 ClickHouse 表中。以下命令将 500 万行插入到 `uk_price_paid` 表中:
|
||||
|
||||
```sql
|
||||
INSERT INTO uk_price_paid
|
||||
WITH
|
||||
splitByChar(' ', postcode) AS p
|
||||
SELECT
|
||||
toUInt32(price_string) AS price,
|
||||
parseDateTimeBestEffortUS(time) AS date,
|
||||
p[1] AS postcode1,
|
||||
p[2] AS postcode2,
|
||||
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
|
||||
b = 'Y' AS is_new,
|
||||
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
|
||||
addr1,
|
||||
addr2,
|
||||
street,
|
||||
locality,
|
||||
town,
|
||||
district,
|
||||
county
|
||||
FROM url(
|
||||
'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv',
|
||||
'CSV',
|
||||
'uuid_string String,
|
||||
price_string String,
|
||||
time String,
|
||||
postcode String,
|
||||
a String,
|
||||
b String,
|
||||
c String,
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street String,
|
||||
locality String,
|
||||
town String,
|
||||
district String,
|
||||
county String,
|
||||
d String,
|
||||
e String'
|
||||
) SETTINGS max_http_get_redirects=10;
|
||||
```
|
||||
|
||||
需要等待一两分钟以便数据插入,具体时间取决于网络速度。
|
||||
|
||||
## 验证数据 {#validate-data}
|
||||
|
||||
让我们通过查看插入了多少行来验证它是否有效:
|
||||
|
||||
```sql
|
||||
SELECT count()
|
||||
FROM uk_price_paid
|
||||
```
|
||||
|
||||
在执行此查询时,数据集有 27,450,499 行。让我们看看 ClickHouse 中表的大小是多少:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableSize(total_bytes)
|
||||
FROM system.tables
|
||||
WHERE name = 'uk_price_paid'
|
||||
```
|
||||
|
||||
请注意,表的大小仅为 221.43 MiB!
|
||||
|
||||
## 运行一些查询 {#run-queries}
|
||||
|
||||
让我们运行一些查询来分析数据:
|
||||
|
||||
### 查询 1. 每年平均价格 {#average-price}
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 1000000, 80
|
||||
)
|
||||
FROM uk_price_paid
|
||||
GROUP BY year
|
||||
ORDER BY year
|
||||
```
|
||||
|
||||
结果如下所示:
|
||||
|
||||
```response
|
||||
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
|
||||
│ 1995 │ 67934 │ █████▍ │
|
||||
│ 1996 │ 71508 │ █████▋ │
|
||||
│ 1997 │ 78536 │ ██████▎ │
|
||||
│ 1998 │ 85441 │ ██████▋ │
|
||||
│ 1999 │ 96038 │ ███████▋ │
|
||||
│ 2000 │ 107487 │ ████████▌ │
|
||||
│ 2001 │ 118888 │ █████████▌ │
|
||||
│ 2002 │ 137948 │ ███████████ │
|
||||
│ 2003 │ 155893 │ ████████████▍ │
|
||||
│ 2004 │ 178888 │ ██████████████▎ │
|
||||
│ 2005 │ 189359 │ ███████████████▏ │
|
||||
│ 2006 │ 203532 │ ████████████████▎ │
|
||||
│ 2007 │ 219375 │ █████████████████▌ │
|
||||
│ 2008 │ 217056 │ █████████████████▎ │
|
||||
│ 2009 │ 213419 │ █████████████████ │
|
||||
│ 2010 │ 236110 │ ██████████████████▊ │
|
||||
│ 2011 │ 232805 │ ██████████████████▌ │
|
||||
│ 2012 │ 238381 │ ███████████████████ │
|
||||
│ 2013 │ 256927 │ ████████████████████▌ │
|
||||
│ 2014 │ 280008 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297263 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313518 │ █████████████████████████ │
|
||||
│ 2017 │ 346371 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350556 │ ████████████████████████████ │
|
||||
│ 2019 │ 352184 │ ████████████████████████████▏ │
|
||||
│ 2020 │ 375808 │ ██████████████████████████████ │
|
||||
│ 2021 │ 381105 │ ██████████████████████████████▍ │
|
||||
│ 2022 │ 362572 │ █████████████████████████████ │
|
||||
└──────┴────────┴────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 查询 2. 伦敦每年的平均价格 {#average-price-london}
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 2000000, 100
|
||||
)
|
||||
FROM uk_price_paid
|
||||
WHERE town = 'LONDON'
|
||||
GROUP BY year
|
||||
ORDER BY year
|
||||
```
|
||||
|
||||
结果如下所示:
|
||||
|
||||
```response
|
||||
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
|
||||
│ 1995 │ 109110 │ █████▍ │
|
||||
│ 1996 │ 118659 │ █████▊ │
|
||||
│ 1997 │ 136526 │ ██████▋ │
|
||||
│ 1998 │ 153002 │ ███████▋ │
|
||||
│ 1999 │ 180633 │ █████████ │
|
||||
│ 2000 │ 215849 │ ██████████▋ │
|
||||
│ 2001 │ 232987 │ ███████████▋ │
|
||||
│ 2002 │ 263668 │ █████████████▏ │
|
||||
│ 2003 │ 278424 │ █████████████▊ │
|
||||
│ 2004 │ 304664 │ ███████████████▏ │
|
||||
│ 2005 │ 322887 │ ████████████████▏ │
|
||||
│ 2006 │ 356195 │ █████████████████▋ │
|
||||
│ 2007 │ 404062 │ ████████████████████▏ │
|
||||
│ 2008 │ 420741 │ █████████████████████ │
|
||||
│ 2009 │ 427754 │ █████████████████████▍ │
|
||||
│ 2010 │ 480322 │ ████████████████████████ │
|
||||
│ 2011 │ 496278 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519482 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616195 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724121 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792101 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │
|
||||
│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │
|
||||
│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │
|
||||
│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │
|
||||
└──────┴─────────┴───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
2020 年房价出事了!但这并不令人意外……
|
||||
|
||||
### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods}
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
town,
|
||||
district,
|
||||
count() AS c,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 5000000, 100)
|
||||
FROM uk_price_paid
|
||||
WHERE date >= '2020-01-01'
|
||||
GROUP BY
|
||||
town,
|
||||
district
|
||||
HAVING c >= 100
|
||||
ORDER BY price DESC
|
||||
LIMIT 100
|
||||
```
|
||||
|
||||
结果如下所示:
|
||||
|
||||
```response
|
||||
┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐
|
||||
│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │
|
||||
│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │
|
||||
│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │
|
||||
│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │
|
||||
│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │
|
||||
│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │
|
||||
│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │
|
||||
│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │
|
||||
│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │
|
||||
│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │
|
||||
│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │
|
||||
│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │
|
||||
│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │
|
||||
│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │
|
||||
│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │
|
||||
│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │
|
||||
│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │
|
||||
│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │
|
||||
│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │
|
||||
│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │
|
||||
│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │
|
||||
│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │
|
||||
│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │
|
||||
│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │
|
||||
│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │
|
||||
│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │
|
||||
│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │
|
||||
│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │
|
||||
│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │
|
||||
│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │
|
||||
│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │
|
||||
│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │
|
||||
│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │
|
||||
│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │
|
||||
│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │
|
||||
│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │
|
||||
│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │
|
||||
│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │
|
||||
│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │
|
||||
│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │
|
||||
│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │
|
||||
│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │
|
||||
│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │
|
||||
│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │
|
||||
│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │
|
||||
│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │
|
||||
│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │
|
||||
│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │
|
||||
│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │
|
||||
│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │
|
||||
│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │
|
||||
│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │
|
||||
│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │
|
||||
└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## 使用 Projection 加速查询 {#speedup-with-projections}
|
||||
|
||||
[Projections](../../sql-reference/statements/alter/projection.md) 允许我们通过存储任意格式的预先聚合的数据来提高查询速度。在此示例中,我们创建了一个按年份、地区和城镇分组的房产的平均价格、总价格和数量的 Projection。在执行时,如果 ClickHouse 认为 Projection 可以提高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。
|
||||
|
||||
### 构建投影{#build-projection}
|
||||
|
||||
让我们通过维度 `toYear(date)`、`district` 和 `town` 创建一个聚合 Projection:
|
||||
|
||||
```sql
|
||||
ALTER TABLE uk_price_paid
|
||||
ADD PROJECTION projection_by_year_district_town
|
||||
(
|
||||
SELECT
|
||||
toYear(date),
|
||||
district,
|
||||
town,
|
||||
avg(price),
|
||||
sum(price),
|
||||
count()
|
||||
GROUP BY
|
||||
toYear(date),
|
||||
district,
|
||||
town
|
||||
)
|
||||
```
|
||||
|
||||
填充现有数据的 Projection。 (如果不进行 materialize 操作,则 ClickHouse 只会为新插入的数据创建 Projection):
|
||||
|
||||
```sql
|
||||
ALTER TABLE uk_price_paid
|
||||
MATERIALIZE PROJECTION projection_by_year_district_town
|
||||
SETTINGS mutations_sync = 1
|
||||
```
|
||||
|
||||
## Test Performance {#test-performance}
|
||||
|
||||
让我们再次运行相同的 3 个查询:
|
||||
|
||||
### 查询 1. 每年平均价格 {#average-price-projections}
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 1000000, 80)
|
||||
FROM uk_price_paid
|
||||
GROUP BY year
|
||||
ORDER BY year ASC
|
||||
```
|
||||
|
||||
结果是一样的,但是性能更好!
|
||||
```response
|
||||
No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.)
|
||||
With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.)
|
||||
```
|
||||
|
||||
|
||||
### 查询 2. 伦敦每年的平均价格 {#average-price-london-projections}
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 2000000, 100)
|
||||
FROM uk_price_paid
|
||||
WHERE town = 'LONDON'
|
||||
GROUP BY year
|
||||
ORDER BY year ASC
|
||||
```
|
||||
|
||||
Same result, but notice the improvement in query performance:
|
||||
|
||||
```response
|
||||
No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.)
|
||||
With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.)
|
||||
```
|
||||
|
||||
### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods-projections}
|
||||
|
||||
注意:需要修改 (date >= '2020-01-01') 以使其与 Projection 定义的维度 (`toYear(date) >= 2020)` 匹配:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
town,
|
||||
district,
|
||||
count() AS c,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 5000000, 100)
|
||||
FROM uk_price_paid
|
||||
WHERE toYear(date) >= 2020
|
||||
GROUP BY
|
||||
town,
|
||||
district
|
||||
HAVING c >= 100
|
||||
ORDER BY price DESC
|
||||
LIMIT 100
|
||||
```
|
||||
|
||||
同样,结果是相同的,但请注意查询性能的改进:
|
||||
|
||||
```response
|
||||
No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.)
|
||||
With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.)
|
||||
```
|
||||
|
||||
### 在 Playground 上测试{#playground}
|
||||
|
||||
也可以在 [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==) 上找到此数据集。
|
||||
|
@ -35,6 +35,9 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
|
||||
- NodeJs
|
||||
- [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse)
|
||||
- [node-clickhouse](https://github.com/apla/node-clickhouse)
|
||||
- [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
|
||||
- [clickhouse-client](https://github.com/depyronick/clickhouse-client)
|
||||
- [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
|
||||
- Perl
|
||||
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
|
||||
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)
|
||||
|
@ -13,7 +13,7 @@ sidebar_label: VIEW
|
||||
语法:
|
||||
|
||||
``` sql
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ...
|
||||
```
|
||||
|
||||
普通视图不存储任何数据。 他们只是在每次访问时从另一个表执行读取。换句话说,普通视图只不过是一个保存的查询。 从视图中读取时,此保存的查询用作[FROM](../../../sql-reference/statements/select/from.md)子句中的子查询.
|
||||
@ -164,23 +164,6 @@ SELECT * FROM [db.]live_view WHERE ...
|
||||
|
||||
您可以使用`ALTER LIVE VIEW [db.]table_name REFRESH`语法.
|
||||
|
||||
### WITH TIMEOUT条件 {#live-view-with-timeout}
|
||||
|
||||
当使用`WITH TIMEOUT`子句创建实时视图时,[WATCH](../../../sql-reference/statements/watch.md)观察实时视图的查询。
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
如果未指定超时值,则由指定的值[temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout)决定.
|
||||
|
||||
**示例:**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
|
||||
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
|
||||
```
|
||||
|
||||
### WITH REFRESH条件 {#live-view-with-refresh}
|
||||
|
||||
当使用`WITH REFRESH`子句创建实时视图时,它将在自上次刷新或触发后经过指定的秒数后自动刷新。
|
||||
@ -210,20 +193,6 @@ WATCH lv
|
||||
└─────────────────────┴──────────┘
|
||||
```
|
||||
|
||||
您可以使用`AND`子句组合`WITH TIMEOUT`和`WITH REFRESH`子句。
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
**示例:**
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
|
||||
```
|
||||
|
||||
15 秒后,如果没有活动的`WATCH`查询,实时视图将自动删除。
|
||||
|
||||
```sql
|
||||
WATCH lv
|
||||
```
|
||||
|
@ -120,7 +120,11 @@ use_cron()
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
return 1
|
||||
fi
|
||||
# 2. disabled by config
|
||||
# 2. checking whether the config is existed
|
||||
if [ ! -f "$CLICKHOUSE_CRONFILE" ]; then
|
||||
return 1
|
||||
fi
|
||||
# 3. disabled by config
|
||||
if [ -z "$CLICKHOUSE_CRONFILE" ]; then
|
||||
return 2
|
||||
fi
|
||||
|
@ -189,7 +189,7 @@ else()
|
||||
message(STATUS "ClickHouse su: OFF")
|
||||
endif()
|
||||
|
||||
configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h)
|
||||
configure_file (config_tools.h.in ${CONFIG_INCLUDE_PATH}/config_tools.h)
|
||||
|
||||
macro(clickhouse_target_link_split_lib target name)
|
||||
if(NOT CLICKHOUSE_ONE_SHARED)
|
||||
|
@ -12,10 +12,11 @@
|
||||
#include <string>
|
||||
#include "Client.h"
|
||||
#include "Core/Protocol.h"
|
||||
#include "Parsers/formatAST.h"
|
||||
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
#include <Common/config_version.h>
|
||||
#include "config_version.h"
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
@ -514,6 +515,66 @@ static bool queryHasWithClause(const IAST & ast)
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<bool> Client::processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query)
|
||||
{
|
||||
processParsedSingleQuery(query_to_execute, query_to_execute, parsed_query);
|
||||
|
||||
const auto * exception = server_exception ? server_exception.get() : client_exception.get();
|
||||
// Sometimes you may get TOO_DEEP_RECURSION from the server,
|
||||
// and TOO_DEEP_RECURSION should not fail the fuzzer check.
|
||||
if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
|
||||
{
|
||||
have_error = false;
|
||||
server_exception.reset();
|
||||
client_exception.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (have_error)
|
||||
{
|
||||
fmt::print(stderr, "Error on processing query '{}': {}\n", parsed_query->formatForErrorMessage(), exception->message());
|
||||
|
||||
// Try to reconnect after errors, for two reasons:
|
||||
// 1. We might not have realized that the server died, e.g. if
|
||||
// it sent us a <Fatal> trace and closed connection properly.
|
||||
// 2. The connection might have gotten into a wrong state and
|
||||
// the next query will get false positive about
|
||||
// "Unknown packet from server".
|
||||
try
|
||||
{
|
||||
connection->forceConnected(connection_parameters.timeouts);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// Just report it, we'll terminate below.
|
||||
fmt::print(stderr,
|
||||
"Error while reconnecting to the server: {}\n",
|
||||
getCurrentExceptionMessage(true));
|
||||
|
||||
// The reconnection might fail, but we'll still be connected
|
||||
// in the sense of `connection->isConnected() = true`,
|
||||
// in case when the requested database doesn't exist.
|
||||
// Disconnect manually now, so that the following code doesn't
|
||||
// have any doubts, and the connection state is predictable.
|
||||
connection->disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
if (!connection->isConnected())
|
||||
{
|
||||
// Probably the server is dead because we found an assertion
|
||||
// failure. Fail fast.
|
||||
fmt::print(stderr, "Lost connection to the server.\n");
|
||||
|
||||
// Print the changed settings because they might be needed to
|
||||
// reproduce the error.
|
||||
printChangedSettings();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Returns false when server is not available.
|
||||
bool Client::processWithFuzzing(const String & full_query)
|
||||
@ -558,18 +619,33 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
// - SET -- The time to fuzz the settings has not yet come
|
||||
// (see comments in Client/QueryFuzzer.cpp)
|
||||
size_t this_query_runs = query_fuzzer_runs;
|
||||
if (orig_ast->as<ASTInsertQuery>() ||
|
||||
orig_ast->as<ASTCreateQuery>() ||
|
||||
orig_ast->as<ASTDropQuery>() ||
|
||||
orig_ast->as<ASTSetQuery>())
|
||||
ASTs queries_for_fuzzed_tables;
|
||||
|
||||
if (orig_ast->as<ASTSetQuery>())
|
||||
{
|
||||
this_query_runs = 1;
|
||||
}
|
||||
else if (const auto * create = orig_ast->as<ASTCreateQuery>())
|
||||
{
|
||||
if (QueryFuzzer::isSuitableForFuzzing(*create))
|
||||
this_query_runs = create_query_fuzzer_runs;
|
||||
else
|
||||
this_query_runs = 1;
|
||||
}
|
||||
else if (const auto * insert = orig_ast->as<ASTInsertQuery>())
|
||||
{
|
||||
this_query_runs = 1;
|
||||
queries_for_fuzzed_tables = fuzzer.getInsertQueriesForFuzzedTables(full_query);
|
||||
}
|
||||
else if (const auto * drop = orig_ast->as<ASTDropQuery>())
|
||||
{
|
||||
this_query_runs = 1;
|
||||
queries_for_fuzzed_tables = fuzzer.getDropQueriesForFuzzedTables(*drop);
|
||||
}
|
||||
|
||||
String query_to_execute;
|
||||
ASTPtr parsed_query;
|
||||
|
||||
ASTPtr fuzz_base = orig_ast;
|
||||
|
||||
for (size_t fuzz_step = 0; fuzz_step < this_query_runs; ++fuzz_step)
|
||||
{
|
||||
fmt::print(stderr, "Fuzzing step {} out of {}\n", fuzz_step, this_query_runs);
|
||||
@ -630,9 +706,9 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
continue;
|
||||
}
|
||||
|
||||
parsed_query = ast_to_process;
|
||||
query_to_execute = parsed_query->formatForErrorMessage();
|
||||
processParsedSingleQuery(full_query, query_to_execute, parsed_query);
|
||||
query_to_execute = ast_to_process->formatForErrorMessage();
|
||||
if (auto res = processFuzzingStep(query_to_execute, ast_to_process))
|
||||
return *res;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -645,60 +721,6 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
const auto * exception = server_exception ? server_exception.get() : client_exception.get();
|
||||
// Sometimes you may get TOO_DEEP_RECURSION from the server,
|
||||
// and TOO_DEEP_RECURSION should not fail the fuzzer check.
|
||||
if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
|
||||
{
|
||||
have_error = false;
|
||||
server_exception.reset();
|
||||
client_exception.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (have_error)
|
||||
{
|
||||
fmt::print(stderr, "Error on processing query '{}': {}\n", ast_to_process->formatForErrorMessage(), exception->message());
|
||||
|
||||
// Try to reconnect after errors, for two reasons:
|
||||
// 1. We might not have realized that the server died, e.g. if
|
||||
// it sent us a <Fatal> trace and closed connection properly.
|
||||
// 2. The connection might have gotten into a wrong state and
|
||||
// the next query will get false positive about
|
||||
// "Unknown packet from server".
|
||||
try
|
||||
{
|
||||
connection->forceConnected(connection_parameters.timeouts);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// Just report it, we'll terminate below.
|
||||
fmt::print(stderr,
|
||||
"Error while reconnecting to the server: {}\n",
|
||||
getCurrentExceptionMessage(true));
|
||||
|
||||
// The reconnection might fail, but we'll still be connected
|
||||
// in the sense of `connection->isConnected() = true`,
|
||||
// in case when the requested database doesn't exist.
|
||||
// Disconnect manually now, so that the following code doesn't
|
||||
// have any doubts, and the connection state is predictable.
|
||||
connection->disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
if (!connection->isConnected())
|
||||
{
|
||||
// Probably the server is dead because we found an assertion
|
||||
// failure. Fail fast.
|
||||
fmt::print(stderr, "Lost connection to the server.\n");
|
||||
|
||||
// Print the changed settings because they might be needed to
|
||||
// reproduce the error.
|
||||
printChangedSettings();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that after the query is formatted, we can parse it back,
|
||||
// format again and get the same result. Unfortunately, we can't
|
||||
// compare the ASTs, which would be more sensitive to errors. This
|
||||
@ -729,13 +751,12 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
// query, but second and third.
|
||||
// If you have to add any more workarounds to this check, just remove
|
||||
// it altogether, it's not so useful.
|
||||
if (parsed_query && !have_error && !queryHasWithClause(*parsed_query))
|
||||
if (ast_to_process && !have_error && !queryHasWithClause(*ast_to_process))
|
||||
{
|
||||
ASTPtr ast_2;
|
||||
try
|
||||
{
|
||||
const auto * tmp_pos = query_to_execute.c_str();
|
||||
|
||||
ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_execute.size(), false /* allow_multi_statements */);
|
||||
}
|
||||
catch (Exception & e)
|
||||
@ -762,7 +783,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
"Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
|
||||
text_3, text_2);
|
||||
fmt::print(stderr, "In more detail:\n");
|
||||
fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree());
|
||||
fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", ast_to_process->dumpTree());
|
||||
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute);
|
||||
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree());
|
||||
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2);
|
||||
@ -784,6 +805,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
// so that it doesn't influence the exit code.
|
||||
server_exception.reset();
|
||||
client_exception.reset();
|
||||
fuzzer.notifyQueryFailed(ast_to_process);
|
||||
have_error = false;
|
||||
}
|
||||
else if (ast_to_process->formatForErrorMessage().size() > 500)
|
||||
@ -800,6 +822,35 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & query : queries_for_fuzzed_tables)
|
||||
{
|
||||
std::cout << std::endl;
|
||||
WriteBufferFromOStream ast_buf(std::cout, 4096);
|
||||
formatAST(*query, ast_buf, false /*highlight*/);
|
||||
ast_buf.next();
|
||||
std::cout << std::endl << std::endl;
|
||||
|
||||
try
|
||||
{
|
||||
query_to_execute = query->formatForErrorMessage();
|
||||
if (auto res = processFuzzingStep(query_to_execute, query))
|
||||
return *res;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
if (have_error)
|
||||
{
|
||||
server_exception.reset();
|
||||
client_exception.reset();
|
||||
fuzzer.notifyQueryFailed(query);
|
||||
have_error = false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -834,6 +885,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
|
||||
|
||||
("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
|
||||
("create-query-fuzzer-runs", po::value<int>()->default_value(0), "")
|
||||
("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),
|
||||
"file path with queries to execute before every file from 'queries-file'; multiple files can be specified (--queries-file file1 file2...); this is needed to enable more aggressive fuzzing of newly added tests (see 'query-fuzzer-runs' option)")
|
||||
|
||||
@ -994,6 +1046,17 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
ignore_error = true;
|
||||
}
|
||||
|
||||
if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as<int>()))
|
||||
{
|
||||
// Fuzzer implies multiquery.
|
||||
config().setBool("multiquery", true);
|
||||
// Ignore errors in parsing queries.
|
||||
config().setBool("ignore-error", true);
|
||||
|
||||
global_context->setSetting("allow_suspicious_low_cardinality_types", true);
|
||||
ignore_error = true;
|
||||
}
|
||||
|
||||
if (options.count("opentelemetry-traceparent"))
|
||||
{
|
||||
String traceparent = options["opentelemetry-traceparent"].as<std::string>();
|
||||
|
@ -17,6 +17,7 @@ public:
|
||||
|
||||
protected:
|
||||
bool processWithFuzzing(const String & full_query) override;
|
||||
std::optional<bool> processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query);
|
||||
|
||||
void connect() override;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
/// This file was autogenerated by CMake
|
||||
|
||||
// .h autogenerated by cmake !
|
||||
#pragma once
|
||||
|
||||
#cmakedefine01 ENABLE_CLICKHOUSE_SERVER
|
||||
#cmakedefine01 ENABLE_CLICKHOUSE_CLIENT
|
||||
|
@ -58,7 +58,7 @@ void DisksApp::addOptions(
|
||||
("disk", po::value<String>(), "Set disk name")
|
||||
("command_name", po::value<String>(), "Name for command to do")
|
||||
("send-logs", "Send logs")
|
||||
("log-level", "Logging level")
|
||||
("log-level", po::value<String>(), "Logging level")
|
||||
;
|
||||
|
||||
positional_options_description.add("command_name", 1);
|
||||
|
@ -927,7 +927,11 @@ namespace
|
||||
executable.string(), config.string(), pid_file.string());
|
||||
|
||||
if (!user.empty())
|
||||
command = fmt::format("clickhouse su '{}' {}", user, command);
|
||||
{
|
||||
/// sudo respects limits in /etc/security/limits.conf e.g. open files,
|
||||
/// that's why we are using it instead of the 'clickhouse su' tool.
|
||||
command = fmt::format("sudo -u '{}' {}", user, command);
|
||||
}
|
||||
|
||||
fmt::print("Will run {}\n", command);
|
||||
executeScript(command, true);
|
||||
|
@ -24,8 +24,8 @@
|
||||
#include <pwd.h>
|
||||
#include <Coordination/FourLetterCommand.h>
|
||||
|
||||
#include "config_core.h"
|
||||
#include "Common/config_version.h"
|
||||
#include "config.h"
|
||||
#include "config_version.h"
|
||||
|
||||
#if USE_SSL
|
||||
# include <Poco/Net/Context.h>
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <base/getFQDNOrHostName.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
|
||||
#include <Interpreters/Session.h>
|
||||
#include <Access/AccessControl.h>
|
||||
#include <Common/Exception.h>
|
||||
@ -32,6 +31,7 @@
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Common/ErrorHandlers.h>
|
||||
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
#include <TableFunctions/registerTableFunctions.h>
|
||||
@ -602,8 +602,6 @@ void LocalServer::processConfig()
|
||||
global_context->setCurrentDatabase(default_database);
|
||||
applyCmdOptions(global_context);
|
||||
|
||||
bool enable_objects_loader = false;
|
||||
|
||||
if (config().has("path"))
|
||||
{
|
||||
String path = global_context->getPath();
|
||||
@ -611,12 +609,6 @@ void LocalServer::processConfig()
|
||||
/// Lock path directory before read
|
||||
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
|
||||
|
||||
LOG_DEBUG(log, "Loading user defined objects from {}", path);
|
||||
Poco::File(path + "user_defined/").createDirectories();
|
||||
UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
|
||||
enable_objects_loader = true;
|
||||
LOG_DEBUG(log, "Loaded user defined objects.");
|
||||
|
||||
LOG_DEBUG(log, "Loading metadata from {}", path);
|
||||
loadMetadataSystem(global_context);
|
||||
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
|
||||
@ -630,6 +622,9 @@ void LocalServer::processConfig()
|
||||
DatabaseCatalog::instance().loadDatabases();
|
||||
}
|
||||
|
||||
/// For ClickHouse local if path is not set the loader will be disabled.
|
||||
global_context->getUserDefinedSQLObjectsLoader().loadObjects();
|
||||
|
||||
LOG_DEBUG(log, "Loaded metadata.");
|
||||
}
|
||||
else if (!config().has("no-system-tables"))
|
||||
@ -639,9 +634,6 @@ void LocalServer::processConfig()
|
||||
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
|
||||
}
|
||||
|
||||
/// Persist SQL user defined objects only if user_defined folder was created
|
||||
UserDefinedSQLObjectsLoader::instance().enable(enable_objects_loader);
|
||||
|
||||
server_display_name = config().getString("display_name", getFQDNOrHostName());
|
||||
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) ");
|
||||
std::map<String, String> prompt_substitutions{{"display_name", server_display_name}};
|
||||
|
@ -219,7 +219,7 @@ auto instructionFailToString(InstructionFail fail)
|
||||
case InstructionFail::AVX512:
|
||||
ret("AVX512");
|
||||
}
|
||||
__builtin_unreachable();
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#include "config.h"
|
||||
|
||||
#if USE_ODBC
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Server/HTTP/HTTPRequestHandler.h>
|
||||
#include <Common/config.h>
|
||||
#include "config.h"
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
#if USE_ODBC
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <Common/BridgeProtocolVersion.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Server/HTTP/HTMLForm.h>
|
||||
#include <Common/config.h>
|
||||
#include "config.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user