diff --git a/.clang-tidy b/.clang-tidy index 532b0f37b81..5da1d309f62 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,7 +1,7 @@ # To run clang-tidy from CMake, build ClickHouse with -DENABLE_CLANG_TIDY=1. To show all warnings, it is # recommended to pass "-k0" to Ninja. -# Enable all checks + disale selected checks. Feel free to remove disabled checks from below list if +# Enable all checks + disable selected checks. Feel free to remove disabled checks from below list if # a) the new check is not controversial (this includes many checks in readability-* and google-*) or # b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*). diff --git a/.gitattributes b/.gitattributes index a23f027122b..56d6fecf4b8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ contrib/* linguist-vendored *.h linguist-language=C++ tests/queries/0_stateless/data_json/* binary +tests/queries/0_stateless/*.reference -crlf diff --git a/.gitmodules b/.gitmodules index f372a309cad..abd29c38846 100644 --- a/.gitmodules +++ b/.gitmodules @@ -30,9 +30,6 @@ [submodule "contrib/re2"] path = contrib/re2 url = https://github.com/google/re2.git -[submodule "contrib/llvm"] - path = contrib/llvm - url = https://github.com/ClickHouse/llvm [submodule "contrib/mariadb-connector-c"] path = contrib/mariadb-connector-c url = https://github.com/ClickHouse/mariadb-connector-c.git @@ -284,3 +281,9 @@ [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/ClickHouse/c-ares +[submodule "contrib/llvm-project"] + path = contrib/llvm-project + url = https://github.com/ClickHouse/llvm-project.git +[submodule "contrib/corrosion"] + path = contrib/corrosion + url = https://github.com/corrosion-rs/corrosion.git diff --git a/CMakeLists.txt b/CMakeLists.txt index c737046a5f6..7a04f347b2d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.15) +cmake_minimum_required(VERSION 3.20) project(ClickHouse LANGUAGES C CXX ASM) @@ -495,6 +495,14 @@ endif () enable_testing() # Enable for tests without binary +option(ENABLE_EXTERNAL_OPENSSL "This option is insecure and not recommended for any occasions. If it is enabled, it allows building with alternative OpenSSL library. By default, ClickHouse is using BoringSSL, which is better. Do not use this option." OFF) + +if (ENABLE_EXTERNAL_OPENSSL) + message (STATUS "Build and uses OpenSSL library instead of BoringSSL. This is strongly discouraged. Your build of ClickHouse will be unsupported.") + set(ENABLE_SSL 1) + target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations") +endif () + # when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc if (CMAKE_INSTALL_PREFIX STREQUAL "/usr") set (CLICKHOUSE_ETC_DIR "/etc") @@ -557,9 +565,9 @@ macro (clickhouse_add_executable target) endif() endmacro() -# With cross-compiling, all targets are built for the target platform which usually different from the host -# platform. This is problematic if a build artifact X (e.g. a file or an executable) is generated by running -# another executable Y previously produced in the build. This is solved by compiling and running Y for/on +# With cross-compiling, all targets are built for the target platform which usually different from the host +# platform. This is problematic if a build artifact X (e.g. a file or an executable) is generated by running +# another executable Y previously produced in the build. This is solved by compiling and running Y for/on # the host platform. Add target to the list: # add_native_target( ...) set_property (GLOBAL PROPERTY NATIVE_BUILD_TARGETS) @@ -567,13 +575,17 @@ function (add_native_target) set_property (GLOBAL APPEND PROPERTY NATIVE_BUILD_TARGETS ${ARGV}) endfunction (add_native_target) -set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.") -include_directories(${ConfigIncludePath}) +set(CONFIG_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.") +include_directories(${CONFIG_INCLUDE_PATH}) # Add as many warnings as possible for our own code. include (cmake/warnings.cmake) include (cmake/print_flags.cmake) +if (ENABLE_RUST) + add_subdirectory (rust) +endif() + add_subdirectory (base) add_subdirectory (src) add_subdirectory (programs) @@ -584,7 +596,7 @@ include (cmake/sanitize_target_link_libraries.cmake) # Build native targets if necessary get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS) -if (NATIVE_BUILD_TARGETS +if (NATIVE_BUILD_TARGETS AND NOT( CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR diff --git a/README.md b/README.md index 49aed14f719..9f4a39a2c97 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ ClickHouse® is an open-source column-oriented database management system that a ## Useful Links * [Official website](https://clickhouse.com/) has a quick high-level overview of ClickHouse on the main page. +* [ClickHouse Cloud](https://clickhouse.com/cloud) ClickHouse as a service, built by the creators and maintainers. * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster. * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. @@ -15,5 +16,5 @@ ClickHouse® is an open-source column-oriented database management system that a * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming events -* [**v22.9 Release Webinar**](https://clickhouse.com/company/events/v22-9-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap. -* [**ClickHouse for Analytics @ Barracuda Networks**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/288140358/) Join us for this in person meetup hosted by our friends at Barracuda in Bay Area. +* [**v22.10 Release Webinar**](https://clickhouse.com/company/events/v22-10-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap. +* [**Introducing ClickHouse Cloud**](https://clickhouse.com/company/events/cloud-beta) Introducing ClickHouse as a service, built by creators and maintainers of the fastest OLAP database on earth. Join Tanya Bragin for a detailed walkthrough of ClickHouse Cloud capabilities, as well as a peek behind the curtain to understand the unique architecture that makes our service tick. diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 916d4f9a74d..a014fa4b8f2 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -23,7 +23,7 @@ namespace { /// Trim ending whitespace inplace -void trim(String & s) +void rightTrim(String & s) { s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end()); } @@ -441,7 +441,7 @@ LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt) return (errno != EAGAIN) ? ABORT : RESET_LINE; input = cinput; - trim(input); + rightTrim(input); return INPUT_LINE; } @@ -512,6 +512,9 @@ void ReplxxLineReader::openInteractiveHistorySearch() /// NOTE: You can use one of the following to configure the behaviour additionally: /// - SKIM_DEFAULT_OPTIONS /// - FZF_DEFAULT_OPTS + /// + /// And also note, that fzf and skim is 95% compatible (at least option + /// that is used here) std::string fuzzy_finder_command = fmt::format( "{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}", fuzzy_finder, history_file.getPath(), output_file.getPath()); @@ -521,7 +524,8 @@ void ReplxxLineReader::openInteractiveHistorySearch() { if (executeCommand(argv) == 0) { - const std::string & new_query = readFile(output_file.getPath()); + std::string new_query = readFile(output_file.getPath()); + rightTrim(new_query); rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); } } diff --git a/base/base/defines.h b/base/base/defines.h index 671253ed9e8..52310362991 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -123,11 +123,15 @@ /// - tries to print failed assertion into server log /// It can be used for all assertions except heavy ones. /// Heavy assertions (that run loops or call complex functions) are allowed in debug builds only. +/// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds, +/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) #if defined(ABORT_ON_LOGICAL_ERROR) #define chassert(x) static_cast(x) ? void(0) : abortOnFailedAssertion(#x) + #define UNREACHABLE() abort() #else #define chassert(x) ((void)0) + #define UNREACHABLE() __builtin_unreachable() #endif #endif @@ -142,7 +146,9 @@ # define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) -/// Consider adding a comment before using these macros. +/// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of +/// suppressing them in the whole function +/// Consider adding a comment when using these macros. # define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) # define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()) @@ -159,9 +165,9 @@ # define TSA_REQUIRES_SHARED(...) # define TSA_NO_THREAD_SAFETY_ANALYSIS -# define TSA_SUPPRESS_WARNING_FOR_READ(x) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) -# define TSA_READ_ONE_THREAD(x) +# define TSA_SUPPRESS_WARNING_FOR_READ(x) (x) +# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x) +# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) #endif /// A template function for suppressing warnings about unused variables or function results. diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp index 4ccfee80643..e4f9e80759e 100644 --- a/base/base/safeExit.cpp +++ b/base/base/safeExit.cpp @@ -1,6 +1,7 @@ #if defined(OS_LINUX) # include #endif +#include #include #include #include @@ -11,7 +12,7 @@ /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, /// while connection handling threads are still run. (void)syscall(SYS_exit_group, code); - __builtin_unreachable(); + UNREACHABLE(); #else _exit(code); #endif diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index e3f62b7948a..d10bc6ba723 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -176,6 +176,249 @@ void __explicit_bzero_chk(void * buf, size_t len, size_t unused) } +#include +#include "syscall.h" + +ssize_t copy_file_range(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, unsigned flags) +{ + return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags); +} + + +long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, unsigned flags) +{ + return syscall(SYS_splice, fd_in, off_in, fd_out, off_out, len, flags); +} + + +#define _BSD_SOURCE +#include +#include + +#if !defined(__aarch64__) +struct statx { + uint32_t stx_mask; + uint32_t stx_blksize; + uint64_t stx_attributes; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t pad1; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_attributes_mask; + struct { + int64_t tv_sec; + uint32_t tv_nsec; + int32_t pad; + } stx_atime, stx_btime, stx_ctime, stx_mtime; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t spare[14]; +}; +#endif + +int statx(int fd, const char *restrict path, int flag, + unsigned int mask, struct statx *restrict statxbuf) +{ + return syscall(SYS_statx, fd, path, flag, mask, statxbuf); +} + + +#include + +ssize_t getrandom(void *buf, size_t buflen, unsigned flags) +{ + /// There was cancellable syscall (syscall_cp), but I don't care too. + return syscall(SYS_getrandom, buf, buflen, flags); +} + + +#include +#include + +#define ALIGN (sizeof(size_t)) +#define ONES ((size_t)-1/UCHAR_MAX) +#define HIGHS (ONES * (UCHAR_MAX/2+1)) +#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS) + +char *__strchrnul(const char *s, int c) +{ + c = (unsigned char)c; + if (!c) return (char *)s + strlen(s); + +#ifdef __GNUC__ + typedef size_t __attribute__((__may_alias__)) word; + const word *w; + for (; (uintptr_t)s % ALIGN; s++) + if (!*s || *(unsigned char *)s == c) return (char *)s; + size_t k = ONES * c; + for (w = (void *)s; !HASZERO(*w) && !HASZERO(*w^k); w++); + s = (void *)w; +#endif + for (; *s && *(unsigned char *)s != c; s++); + return (char *)s; +} + +int __execvpe(const char *file, char *const argv[], char *const envp[]) +{ + const char *p, *z, *path = getenv("PATH"); + size_t l, k; + int seen_eacces = 0; + + errno = ENOENT; + if (!*file) return -1; + + if (strchr(file, '/')) + return execve(file, argv, envp); + + if (!path) path = "/usr/local/bin:/bin:/usr/bin"; + k = strnlen(file, NAME_MAX+1); + if (k > NAME_MAX) { + errno = ENAMETOOLONG; + return -1; + } + l = strnlen(path, PATH_MAX-1)+1; + + for(p=path; ; p=z) { + char b[l+k+1]; + z = __strchrnul(p, ':'); + if (z-p >= l) { + if (!*z++) break; + continue; + } + memcpy(b, p, z-p); + b[z-p] = '/'; + memcpy(b+(z-p)+(z>p), file, k+1); + execve(b, argv, envp); + switch (errno) { + case EACCES: + seen_eacces = 1; + case ENOENT: + case ENOTDIR: + break; + default: + return -1; + } + if (!*z++) break; + } + if (seen_eacces) errno = EACCES; + return -1; +} + + +#include "spawn.h" + +int posix_spawnp(pid_t *restrict res, const char *restrict file, + const posix_spawn_file_actions_t *fa, + const posix_spawnattr_t *restrict attr, + char *const argv[restrict], char *const envp[restrict]) +{ + posix_spawnattr_t spawnp_attr = { 0 }; + if (attr) spawnp_attr = *attr; + spawnp_attr.__fn = (void *)__execvpe; + return posix_spawn(res, file, fa, &spawnp_attr, argv, envp); +} + +#define FDOP_CLOSE 1 +#define FDOP_DUP2 2 +#define FDOP_OPEN 3 +#define FDOP_CHDIR 4 +#define FDOP_FCHDIR 5 + +#define ENOMEM 12 +#define EBADF 9 + +struct fdop { + struct fdop *next, *prev; + int cmd, fd, srcfd, oflag; + mode_t mode; + char path[]; +}; + +int posix_spawn_file_actions_init(posix_spawn_file_actions_t *fa) { + fa->__actions = 0; + return 0; +} + +int posix_spawn_file_actions_addchdir_np(posix_spawn_file_actions_t *restrict fa, const char *restrict path) { + struct fdop *op = malloc(sizeof *op + strlen(path) + 1); + if (!op) return ENOMEM; + op->cmd = FDOP_CHDIR; + op->fd = -1; + strcpy(op->path, path); + if ((op->next = fa->__actions)) op->next->prev = op; + op->prev = 0; + fa->__actions = op; + return 0; +} + +int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *fa, int fd) { + if (fd < 0) return EBADF; + struct fdop *op = malloc(sizeof *op); + if (!op) return ENOMEM; + op->cmd = FDOP_CLOSE; + op->fd = fd; + if ((op->next = fa->__actions)) op->next->prev = op; + op->prev = 0; + fa->__actions = op; + return 0; +} + +int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *fa, int srcfd, int fd) { + if (srcfd < 0 || fd < 0) return EBADF; + struct fdop *op = malloc(sizeof *op); + if (!op) return ENOMEM; + op->cmd = FDOP_DUP2; + op->srcfd = srcfd; + op->fd = fd; + if ((op->next = fa->__actions)) op->next->prev = op; + op->prev = 0; + fa->__actions = op; + return 0; +} + +int posix_spawn_file_actions_addfchdir_np(posix_spawn_file_actions_t *fa, int fd) { + if (fd < 0) return EBADF; + struct fdop *op = malloc(sizeof *op); + if (!op) return ENOMEM; + op->cmd = FDOP_FCHDIR; + op->fd = fd; + if ((op->next = fa->__actions)) op->next->prev = op; + op->prev = 0; + fa->__actions = op; + return 0; +} + +int posix_spawn_file_actions_addopen(posix_spawn_file_actions_t *restrict fa, int fd, const char *restrict path, int flags, mode_t mode) { + if (fd < 0) return EBADF; + struct fdop *op = malloc(sizeof *op + strlen(path) + 1); + if (!op) return ENOMEM; + op->cmd = FDOP_OPEN; + op->fd = fd; + op->oflag = flags; + op->mode = mode; + strcpy(op->path, path); + if ((op->next = fa->__actions)) op->next->prev = op; + op->prev = 0; + fa->__actions = op; + return 0; +} + +int posix_spawn_file_actions_destroy(posix_spawn_file_actions_t *fa) { + struct fdop *op = fa->__actions, *next; + while (op) { + next = op->next; + free(op); + op = next; + } + return 0; +} + #if defined (__cplusplus) } #endif diff --git a/base/glibc-compatibility/spawn.h b/base/glibc-compatibility/spawn.h new file mode 100644 index 00000000000..ed1f36a614d --- /dev/null +++ b/base/glibc-compatibility/spawn.h @@ -0,0 +1,32 @@ +#ifndef _SPAWN_H +#define _SPAWN_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct { + int __flags; + pid_t __pgrp; + sigset_t __def, __mask; + int __prio, __pol; + void *__fn; + char __pad[64-sizeof(void *)]; +} posix_spawnattr_t; + +typedef struct { + int __pad0[2]; + void *__actions; + int __pad[16]; +} posix_spawn_file_actions_t; + +int posix_spawn(pid_t *__restrict, const char *__restrict, const posix_spawn_file_actions_t *, + const posix_spawnattr_t *__restrict, char *const *__restrict, char *const *__restrict); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/cmake/target.cmake b/cmake/target.cmake index ae360758701..b7806319980 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -51,6 +51,8 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_HDFS OFF CACHE INTERNAL "") set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") + # use of drand48_data + set (ENABLE_AZURE_BLOB_STORAGE OFF CACHE INTERNAL "") endif () # Don't know why but CXX_STANDARD doesn't work for cross-compilation diff --git a/contrib/AMQP-CPP b/contrib/AMQP-CPP index 1a6c51f4ac5..818c2d8ad96 160000 --- a/contrib/AMQP-CPP +++ b/contrib/AMQP-CPP @@ -1 +1 @@ -Subproject commit 1a6c51f4ac51ac56610fa95081bd2f349911375a +Subproject commit 818c2d8ad96a08a5d20fece7d1e1e8855a2b0860 diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 486fca60912..f914c0d2d3f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -74,7 +74,11 @@ add_contrib (re2-cmake re2) add_contrib (xz-cmake xz) add_contrib (brotli-cmake brotli) add_contrib (double-conversion-cmake double-conversion) -add_contrib (boringssl-cmake boringssl) +if (NOT ENABLE_EXTERNAL_OPENSSL) + add_contrib (boringssl-cmake boringssl) +else () + add_contrib (openssl-cmake openssl) +endif () add_contrib (poco-cmake poco) add_contrib (croaring-cmake croaring) add_contrib (zstd-cmake zstd) @@ -92,6 +96,8 @@ add_contrib (openldap-cmake openldap) add_contrib (grpc-cmake grpc) add_contrib (msgpack-c-cmake msgpack-c) +add_contrib (corrosion-cmake corrosion) + if (ENABLE_FUZZING) add_contrib (libprotobuf-mutator-cmake libprotobuf-mutator) endif() @@ -107,7 +113,7 @@ if (ENABLE_TESTS) add_contrib (googletest-cmake googletest) endif() -add_contrib (llvm-cmake llvm) +add_contrib (llvm-project-cmake llvm-project) add_contrib (libxml2-cmake libxml2) add_contrib (aws-s3-cmake aws diff --git a/contrib/amqpcpp-cmake/CMakeLists.txt b/contrib/amqpcpp-cmake/CMakeLists.txt index 6f6a0188e6f..631f40e6ed3 100644 --- a/contrib/amqpcpp-cmake/CMakeLists.txt +++ b/contrib/amqpcpp-cmake/CMakeLists.txt @@ -4,6 +4,11 @@ if (NOT ENABLE_AMQPCPP) message(STATUS "Not using AMQP-CPP") return() endif() +if (OS_FREEBSD) + message(STATUS "Not using AMQP-CPP because libuv is disabled") + return() +endif() + # can be removed once libuv build on MacOS with GCC is possible if (NOT TARGET ch_contrib::uv) diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt index 354998da5af..0d202ccbc52 100644 --- a/contrib/base64-cmake/CMakeLists.txt +++ b/contrib/base64-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -if(ARCH_AMD64 OR ARCH_AARCH64) +if(ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE) option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES}) elseif(ENABLE_BASE64) message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64") @@ -26,7 +26,11 @@ if (ARCH_AMD64) target_compile_options(_base64_avx PRIVATE -falign-loops -mavx) target_compile_options(_base64_avx2 PRIVATE -falign-loops -mavx2) else () - target_compile_options(_base64_ssse3 PRIVATE -falign-loops) + if (ARCH_PPC64LE) + target_compile_options(_base64_ssse3 PRIVATE -D__SSSE3__ -falign-loops) + else() + target_compile_options(_base64_ssse3 PRIVATE -falign-loops) + endif() endif () if (ARCH_AMD64) diff --git a/contrib/cctz b/contrib/cctz index 49c656c62fb..05ec08ce61e 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 49c656c62fbd36a1bc20d64c476853bdb7cf7bb9 +Subproject commit 05ec08ce61e4b5c44692cc2f1ce4b6d8596679bf diff --git a/contrib/corrosion b/contrib/corrosion new file mode 160000 index 00000000000..d9dfdefaa3d --- /dev/null +++ b/contrib/corrosion @@ -0,0 +1 @@ +Subproject commit d9dfdefaa3d9ec4ba1245c7070727359c65c7869 diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt new file mode 100644 index 00000000000..ef810182a40 --- /dev/null +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -0,0 +1,46 @@ +if (NOT ENABLE_LIBRARIES) + set(DEFAULT_ENABLE_RUST FALSE) +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64")) + message(STATUS "Rust is not available on aarch64-apple-darwin") + set(DEFAULT_ENABLE_RUST FALSE) +else() + list (APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake") + find_package(Rust) + set(DEFAULT_ENABLE_RUST ${Rust_FOUND}) +endif() + +option(ENABLE_RUST "Enable rust" ${DEFAULT_ENABLE_RUST}) + +message(STATUS ${ENABLE_RUST}) + +if(NOT ENABLE_RUST) + message(STATUS "Not using rust") + return() +endif() + +message(STATUS "Checking Rust toolchain for current target") + +if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") + set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") +endif() + +if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") + set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") +endif() + +if((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) + set(Rust_CARGO_TARGET "x86_64-apple-darwin") +endif() + +if((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) + set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") +endif() + +if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") + set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu") +endif() + +message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}") + +# Define function corrosion_import_crate() +include ("${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake/Corrosion.cmake") diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index 214d23bc2a9..8478def3cb1 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -578,6 +578,12 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c") endif() +if (ENABLE_EXTERNAL_OPENSSL) + list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c") + list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c") +endif () + + target_sources(_krb5 PRIVATE ${ALL_SRCS} ) diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 91326422b43..9d91397731e 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -59,6 +59,12 @@ set(SRCS add_library(_libpq ${SRCS}) +if (ENABLE_EXTERNAL_OPENSSL) + add_definitions(-DHAVE_BIO_METH_NEW) + add_definitions(-DHAVE_HMAC_CTX_NEW) + add_definitions(-DHAVE_HMAC_CTX_FREE) +endif () + target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR}) target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include") target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs") diff --git a/contrib/llvm b/contrib/llvm deleted file mode 160000 index 0db5bf5bd24..00000000000 --- a/contrib/llvm +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0db5bf5bd2452cd8f1283a1fcdc04845af705bfc diff --git a/contrib/llvm-project b/contrib/llvm-project new file mode 160000 index 00000000000..3a39038345a --- /dev/null +++ b/contrib/llvm-project @@ -0,0 +1 @@ +Subproject commit 3a39038345a400e7e767811b142a94355d511215 diff --git a/contrib/llvm-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt similarity index 50% rename from contrib/llvm-cmake/CMakeLists.txt rename to contrib/llvm-project-cmake/CMakeLists.txt index 4a4a5cef62e..6a73ae0f0c6 100644 --- a/contrib/llvm-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -6,18 +6,21 @@ endif() option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) +# If USE_STATIC_LIBRARIES=0 was passed to CMake, we'll still build LLVM statically to keep complexity minimal. + if (NOT ENABLE_EMBEDDED_COMPILER) message(STATUS "Not using LLVM") return() endif() -set (LLVM_FOUND 1) -set (LLVM_VERSION "12.0.0bundled") +# TODO: Enable compilation on AArch64 + +set (LLVM_VERSION "15.0.0bundled") set (LLVM_INCLUDE_DIRS - "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/include" - "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/include" + "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include" + "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm/include" ) -set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm") +set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") # This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. set (REQUIRED_LLVM_LIBRARIES @@ -54,45 +57,48 @@ set (REQUIRED_LLVM_LIBRARIES LLVMDemangle ) -if (ARCH_AMD64) +# if (ARCH_AMD64) list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) -elseif (ARCH_AARCH64) - list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) -endif () +# elseif (ARCH_AARCH64) +# list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) +# endif () -#function(llvm_libs_all REQUIRED_LLVM_LIBRARIES) -# llvm_map_components_to_libnames (result all) -# if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result) -# list (REMOVE_ITEM result "LTO" "LLVM") -# else() -# set (result "LLVM") -# endif () -# list (APPEND result ${CMAKE_DL_LIBS} ch_contrib::zlib) -# set (${REQUIRED_LLVM_LIBRARIES} ${result} PARENT_SCOPE) -#endfunction() - -message (STATUS "LLVM include Directory: ${LLVM_INCLUDE_DIRS}") -message (STATUS "LLVM library Directory: ${LLVM_LIBRARY_DIRS}") -message (STATUS "LLVM C++ compiler flags: ${LLVM_CXXFLAGS}") - -# ld: unknown option: --color-diagnostics -set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "") - -# Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind -set (CMAKE_INSTALL_RPATH "ON") -set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") -set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") +set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind +set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") # Skip internal compiler selection +set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") # With exception handling set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "") set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "") -set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE STRING "") +set (LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "") # for x86 + ARM: "X86;AArch64" -# Need to use C++17 since the compilation is not possible with C++20 currently, due to ambiguous operator != etc. -# LLVM project will set its default value for the -std=... but our global setting from CMake will override it. -set (CMAKE_CXX_STANDARD 17) +# Omit unnecessary stuff (just the options which are ON by default) +set(LLVM_ENABLE_BACKTRACES 0 CACHE INTERNAL "") +set(LLVM_ENABLE_CRASH_OVERRIDES 0 CACHE INTERNAL "") +set(LLVM_ENABLE_TERMINFO 0 CACHE INTERNAL "") +set(LLVM_ENABLE_LIBXML2 0 CACHE INTERNAL "") +set(LLVM_ENABLE_LIBEDIT 0 CACHE INTERNAL "") +set(LLVM_ENABLE_LIBPFM 0 CACHE INTERNAL "") +set(LLVM_ENABLE_ZLIB 0 CACHE INTERNAL "") +set(LLVM_ENABLE_ZSTD 0 CACHE INTERNAL "") +set(LLVM_ENABLE_Z3_SOLVER 0 CACHE INTERNAL "") +set(LLVM_INCLUDE_TOOLS 0 CACHE INTERNAL "") +set(LLVM_BUILD_TOOLS 0 CACHE INTERNAL "") +set(LLVM_INCLUDE_UTILS 0 CACHE INTERNAL "") +set(LLVM_BUILD_UTILS 0 CACHE INTERNAL "") +set(LLVM_INCLUDE_RUNTIMES 0 CACHE INTERNAL "") +set(LLVM_BUILD_RUNTIMES 0 CACHE INTERNAL "") +set(LLVM_BUILD_RUNTIME 0 CACHE INTERNAL "") +set(LLVM_INCLUDE_EXAMPLES 0 CACHE INTERNAL "") +set(LLVM_INCLUDE_TESTS 0 CACHE INTERNAL "") +set(LLVM_INCLUDE_GO_TESTS 0 CACHE INTERNAL "") +set(LLVM_INCLUDE_BENCHMARKS 0 CACHE INTERNAL "") +set(LLVM_INCLUDE_DOCS 0 CACHE INTERNAL "") +set(LLVM_ENABLE_OCAMLDOC 0 CACHE INTERNAL "") +set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "") -set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm") -set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm") +set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm") +set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") + set_directory_properties (PROPERTIES # due to llvm crosscompile cmake does not know how to clean it, and on clean # will lead to the following error: diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index db55c950241..2954cd574d0 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -3,6 +3,33 @@ ARG FROM_TAG=latest FROM clickhouse/test-util:$FROM_TAG +# Rust toolchain and libraries +ENV RUSTUP_HOME=/rust/rustup +ENV CARGO_HOME=/rust/cargo +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y +RUN chmod 777 -R /rust +ENV PATH="/rust/cargo/env:${PATH}" +ENV PATH="/rust/cargo/bin:${PATH}" +RUN rustup target add aarch64-unknown-linux-gnu && \ + rustup target add x86_64-apple-darwin && \ + rustup target add x86_64-unknown-freebsd && \ + rustup target add aarch64-apple-darwin && \ + rustup target add powerpc64le-unknown-linux-gnu +RUN apt-get install \ + gcc-aarch64-linux-gnu \ + build-essential \ + libc6 \ + libc6-dev \ + libc6-dev-arm64-cross \ + --yes + +# Install CMake 3.20+ for Rust compilation +# Used https://askubuntu.com/a/1157132 as reference +RUN apt purge cmake --yes +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null +RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main' +RUN apt update && apt install cmake --yes + ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index d40e6dd5379..5b597f927a2 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.9.2.7" +ARG VERSION="22.9.3.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 1d70c46d9ba..c6254b898ed 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.9.2.7" +ARG VERSION="22.9.3.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 7f7a8008d4e..c9404ddcba3 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -19,6 +19,12 @@ RUN apt-get update \ pv \ --yes --no-install-recommends +# Install CMake 3.20+ for Rust compilation +RUN apt purge cmake --yes +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null +RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main' +RUN apt update && apt install cmake --yes + RUN pip3 install numpy scipy pandas Jinja2 ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 377b816b2b6..9d6cf22c817 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -157,7 +157,6 @@ function run_cmake "-DUSE_UNWIND=1" "-DENABLE_NURAFT=1" "-DENABLE_JEMALLOC=1" - "-DENABLE_REPLXX=1" ) export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" diff --git a/docker/test/fuzzer/allow-nullable-key.xml b/docker/test/fuzzer/allow-nullable-key.xml new file mode 100644 index 00000000000..331012a2254 --- /dev/null +++ b/docker/test/fuzzer/allow-nullable-key.xml @@ -0,0 +1,6 @@ + + + + 1 + + diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index bab87865b42..7248728864e 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -94,6 +94,7 @@ function configure # TODO figure out which ones are needed cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d + cp -av --dereference "$script_dir"/allow-nullable-key.xml db/config.d cat > db/config.d/core.xml < @@ -240,6 +241,7 @@ quit --receive_data_timeout_ms=10000 \ --stacktrace \ --query-fuzzer-runs=1000 \ + --create-query-fuzzer-runs=50 \ --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ > >(tail -n 100000 > fuzzer.log) \ diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index fb47ed0cefa..4c337d138b8 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -35,6 +35,8 @@ RUN apt-get update \ tzdata \ vim \ wget \ + rustc \ + cargo \ && pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 9a31c5bbb4c..a497d3443b0 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ awscli \ brotli \ + lz4 \ expect \ golang \ lsof \ @@ -35,12 +36,13 @@ RUN apt-get update -y \ tree \ unixodbc \ wget \ + rustc \ + cargo \ zstd \ file \ pv \ && apt-get clean - RUN pip3 install numpy scipy pandas Jinja2 RUN mkdir -p /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index bf76fb20928..6b9954c2431 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -13,25 +13,28 @@ sysctl kernel.core_pattern='core.%e.%p-%P' # Thread Fuzzer allows to check more permutations of possible thread scheduling # and find more potential issues. +# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 +is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") +if [ "$is_tsan_build" -eq "0" ]; then + export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 + export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 + export THREAD_FUZZER_SLEEP_TIME_US=100000 -export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 -export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 -export THREAD_FUZZER_SLEEP_TIME_US=100000 + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 -export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 -export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 -export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 -export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 -export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 -export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 -export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 -export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 -export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 - -export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 -export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 -export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 +fi function install_packages() diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 0fb5373a3ae..824062ca43c 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -15,7 +15,7 @@ then # If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0 # compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo. # Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake). - ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/') + ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/') if [ "${ARMV82}" ] then DIR="aarch64" diff --git a/docs/changelogs/v22.3.13.80-lts.md b/docs/changelogs/v22.3.13.80-lts.md new file mode 100644 index 00000000000..7b05b3b446f --- /dev/null +++ b/docs/changelogs/v22.3.13.80-lts.md @@ -0,0 +1,56 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.3.13.80-lts (e2708b01fba) FIXME as compared to v22.3.12.19-lts (4a08f8a073b) + +#### New Feature +* Backported in [#41264](https://github.com/ClickHouse/ClickHouse/issues/41264): Implemented automatic conversion of database engine from `Ordinary` to `Atomic`. Create empty `convert_ordinary_to_atomic` file in `flags` directory and all `Ordinary` databases will be converted automatically on next server start. Resolves [#39546](https://github.com/ClickHouse/ClickHouse/issues/39546). [#39933](https://github.com/ClickHouse/ClickHouse/pull/39933) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#40886](https://github.com/ClickHouse/ClickHouse/issues/40886): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix +* Backported in [#41273](https://github.com/ClickHouse/ClickHouse/issues/41273): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#41557](https://github.com/ClickHouse/ClickHouse/issues/41557): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#40745](https://github.com/ClickHouse/ClickHouse/issues/40745): * Fix cast lowcard of nullable in JoinSwitcher, close [#37385](https://github.com/ClickHouse/ClickHouse/issues/37385). [#37453](https://github.com/ClickHouse/ClickHouse/pull/37453) ([Vladimir C](https://github.com/vdimir)). +* Backported in [#41812](https://github.com/ClickHouse/ClickHouse/issues/41812): Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41320](https://github.com/ClickHouse/ClickHouse/issues/41320): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#40904](https://github.com/ClickHouse/ClickHouse/issues/40904): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#41916](https://github.com/ClickHouse/ClickHouse/issues/41916): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)). +* Backported in [#40903](https://github.com/ClickHouse/ClickHouse/issues/40903): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#40663](https://github.com/ClickHouse/ClickHouse/issues/40663): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)). +* Backported in [#40901](https://github.com/ClickHouse/ClickHouse/issues/40901): Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#41637](https://github.com/ClickHouse/ClickHouse/issues/41637): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#41664](https://github.com/ClickHouse/ClickHouse/issues/41664): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#41356](https://github.com/ClickHouse/ClickHouse/issues/41356): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#41807](https://github.com/ClickHouse/ClickHouse/issues/41807): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41804](https://github.com/ClickHouse/ClickHouse/issues/41804): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41503](https://github.com/ClickHouse/ClickHouse/issues/41503): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41639](https://github.com/ClickHouse/ClickHouse/issues/41639): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#41899](https://github.com/ClickHouse/ClickHouse/issues/41899): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Backported in [#41321](https://github.com/ClickHouse/ClickHouse/issues/41321): Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector to DB::ColumnVector'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* DNSResolver remove AI_V4MAPPED, AI_ALL hints [#40502](https://github.com/ClickHouse/ClickHouse/pull/40502) ([Maksim Kita](https://github.com/kitaisreal)). +* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)). +* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.6.9.11-stable.md b/docs/changelogs/v22.6.9.11-stable.md new file mode 100644 index 00000000000..ab2ff27f9eb --- /dev/null +++ b/docs/changelogs/v22.6.9.11-stable.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.6.9.11-stable (9ec61dcac49) FIXME as compared to v22.6.8.35-stable (b91dc59a565) + +#### Improvement +* Backported in [#42089](https://github.com/ClickHouse/ClickHouse/issues/42089): Replace back `clickhouse su` command with `sudo -u` in start in order to respect limits in `/etc/security/limits.conf`. [#41847](https://github.com/ClickHouse/ClickHouse/pull/41847) ([Eugene Konkov](https://github.com/ekonkov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#41558](https://github.com/ClickHouse/ClickHouse/issues/41558): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#41504](https://github.com/ClickHouse/ClickHouse/issues/41504): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.7.6.74-stable.md b/docs/changelogs/v22.7.6.74-stable.md new file mode 100644 index 00000000000..e1c76afe0c7 --- /dev/null +++ b/docs/changelogs/v22.7.6.74-stable.md @@ -0,0 +1,52 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.7.6.74-stable (c00ffb3c11a) FIXME as compared to v22.7.5.13-stable (6f48d2d1f59) + +#### New Feature +* Backported in [#40869](https://github.com/ClickHouse/ClickHouse/issues/40869): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix +* Backported in [#41228](https://github.com/ClickHouse/ClickHouse/issues/41228): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#41559](https://github.com/ClickHouse/ClickHouse/issues/41559): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#41283](https://github.com/ClickHouse/ClickHouse/issues/41283): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#40865](https://github.com/ClickHouse/ClickHouse/issues/40865): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#40804](https://github.com/ClickHouse/ClickHouse/issues/40804): During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#40810](https://github.com/ClickHouse/ClickHouse/issues/40810): In [#40595](https://github.com/ClickHouse/ClickHouse/issues/40595) it was reported that the `host_regexp` functionality was not working properly with a name to address resolution in `/etc/hosts`. It's fixed. [#40769](https://github.com/ClickHouse/ClickHouse/pull/40769) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#41134](https://github.com/ClickHouse/ClickHouse/issues/41134): Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE ()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#41617](https://github.com/ClickHouse/ClickHouse/issues/41617): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#41666](https://github.com/ClickHouse/ClickHouse/issues/41666): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#41361](https://github.com/ClickHouse/ClickHouse/issues/41361): Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#41358](https://github.com/ClickHouse/ClickHouse/issues/41358): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#41808](https://github.com/ClickHouse/ClickHouse/issues/41808): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41805](https://github.com/ClickHouse/ClickHouse/issues/41805): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41505](https://github.com/ClickHouse/ClickHouse/issues/41505): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41644](https://github.com/ClickHouse/ClickHouse/issues/41644): Queries with `ORDER BY` and `1500 <= LIMIT <= max_block_size` could return incorrect result with missing rows from top. Fixes [#41182](https://github.com/ClickHouse/ClickHouse/issues/41182). [#41576](https://github.com/ClickHouse/ClickHouse/pull/41576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#41641](https://github.com/ClickHouse/ClickHouse/issues/41641): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#41900](https://github.com/ClickHouse/ClickHouse/issues/41900): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CaresPTRResolver small safety improvement [#40890](https://github.com/ClickHouse/ClickHouse/pull/40890) ([Arthur Passos](https://github.com/arthurpassos)). +* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Increase open files limit [#41345](https://github.com/ClickHouse/ClickHouse/pull/41345) ([Eugene Konkov](https://github.com/ekonkov)). +* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)). +* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v22.8.6.71-lts.md b/docs/changelogs/v22.8.6.71-lts.md new file mode 100644 index 00000000000..f5a06fb709e --- /dev/null +++ b/docs/changelogs/v22.8.6.71-lts.md @@ -0,0 +1,56 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.8.6.71-lts (7bf38a43e30) FIXME as compared to v22.8.5.29-lts (74ffb843807) + +#### Improvement +* Backported in [#41507](https://github.com/ClickHouse/ClickHouse/issues/41507): Fix incompatibility of cache after switching setting `do_no_evict_index_and_mark_files` from 1 to 0, 0 to 1. [#41330](https://github.com/ClickHouse/ClickHouse/pull/41330) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Backported in [#41229](https://github.com/ClickHouse/ClickHouse/issues/41229): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#41560](https://github.com/ClickHouse/ClickHouse/issues/41560): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#41284](https://github.com/ClickHouse/ClickHouse/issues/41284): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#41837](https://github.com/ClickHouse/ClickHouse/issues/41837): Fix vertical merge of parts with lightweight deleted rows. [#40559](https://github.com/ClickHouse/ClickHouse/pull/40559) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#41618](https://github.com/ClickHouse/ClickHouse/issues/41618): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#41667](https://github.com/ClickHouse/ClickHouse/issues/41667): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#41685](https://github.com/ClickHouse/ClickHouse/issues/41685): Fix possible wrong query result with `query_plan_optimize_primary_key` enabled. Fixes [#40599](https://github.com/ClickHouse/ClickHouse/issues/40599). [#41281](https://github.com/ClickHouse/ClickHouse/pull/41281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#41362](https://github.com/ClickHouse/ClickHouse/issues/41362): Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#41359](https://github.com/ClickHouse/ClickHouse/issues/41359): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#41596](https://github.com/ClickHouse/ClickHouse/issues/41596): Fix possible deadlock with async_socket_for_remote/use_hedged_requests and parallel KILL. [#41343](https://github.com/ClickHouse/ClickHouse/pull/41343) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#41521](https://github.com/ClickHouse/ClickHouse/issues/41521): Since 22.8 `ON CLUSTER` clause is ignored if database is `Replicated` and cluster name and database name are the same. Because of this `DROP PARTITION ON CLUSTER` worked unexpected way with `Replicated`. It's fixed, now `ON CLUSTER` clause is ignored only for queries that are replicated on database level. Fixes [#41299](https://github.com/ClickHouse/ClickHouse/issues/41299). [#41390](https://github.com/ClickHouse/ClickHouse/pull/41390) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#41480](https://github.com/ClickHouse/ClickHouse/issues/41480): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41806](https://github.com/ClickHouse/ClickHouse/issues/41806): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41506](https://github.com/ClickHouse/ClickHouse/issues/41506): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#41581](https://github.com/ClickHouse/ClickHouse/issues/41581): Fix possible hung/deadlock on query cancellation (`KILL QUERY` or server shutdown). [#41467](https://github.com/ClickHouse/ClickHouse/pull/41467) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#41645](https://github.com/ClickHouse/ClickHouse/issues/41645): Queries with `ORDER BY` and `1500 <= LIMIT <= max_block_size` could return incorrect result with missing rows from top. Fixes [#41182](https://github.com/ClickHouse/ClickHouse/issues/41182). [#41576](https://github.com/ClickHouse/ClickHouse/pull/41576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#41642](https://github.com/ClickHouse/ClickHouse/issues/41642): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#41901](https://github.com/ClickHouse/ClickHouse/issues/41901): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#41836](https://github.com/ClickHouse/ClickHouse/issues/41836): Don't allow to create or alter merge tree tables with virtual column name _row_exists, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Backported in [#41890](https://github.com/ClickHouse/ClickHouse/issues/41890): Old versions of Replicated database doesn't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix stress test after [#40420](https://github.com/ClickHouse/ClickHouse/issues/40420) [#40608](https://github.com/ClickHouse/ClickHouse/pull/40608) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change default in one cache setting [#41139](https://github.com/ClickHouse/ClickHouse/pull/41139) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix download_binary, use proper version and commit [#41260](https://github.com/ClickHouse/ClickHouse/pull/41260) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Increase open files limit [#41345](https://github.com/ClickHouse/ClickHouse/pull/41345) ([Eugene Konkov](https://github.com/ekonkov)). +* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)). +* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix debug build after [#41507](https://github.com/ClickHouse/ClickHouse/issues/41507) [#41597](https://github.com/ClickHouse/ClickHouse/pull/41597) ([Dmitry Novik](https://github.com/novikd)). +* Revert of "Revert the revert of "ColumnVector: optimize filter with AVX512 VBMI2 compress store" [#40033](https://github.com/ClickHouse/ClickHouse/issues/40033)" [#41752](https://github.com/ClickHouse/ClickHouse/pull/41752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v22.9.3.18-stable.md b/docs/changelogs/v22.9.3.18-stable.md new file mode 100644 index 00000000000..f8f82077f43 --- /dev/null +++ b/docs/changelogs/v22.9.3.18-stable.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.9.3.18-stable (0cb4b15d2fa) FIXME as compared to v22.9.2.7-stable (362e2cefcef) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#41902](https://github.com/ClickHouse/ClickHouse/issues/41902): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#41863](https://github.com/ClickHouse/ClickHouse/issues/41863): 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#41892](https://github.com/ClickHouse/ClickHouse/issues/41892): Fix compact parts with compressed marks setting. Fixes [#41783](https://github.com/ClickHouse/ClickHouse/issues/41783) and [#41746](https://github.com/ClickHouse/ClickHouse/issues/41746). [#41823](https://github.com/ClickHouse/ClickHouse/pull/41823) ([alesapin](https://github.com/alesapin)). +* Backported in [#41891](https://github.com/ClickHouse/ClickHouse/issues/41891): Old versions of Replicated database doesn't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Revert of "Revert the revert of "ColumnVector: optimize filter with AVX512 VBMI2 compress store" [#40033](https://github.com/ClickHouse/ClickHouse/issues/40033)" [#41752](https://github.com/ClickHouse/ClickHouse/pull/41752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update 02354_annoy.sql [#41767](https://github.com/ClickHouse/ClickHouse/pull/41767) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 8712aa3e2bc..f397dc0d037 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -38,13 +38,13 @@ For other Linux distribution - check the availability of the [prebuild packages] #### Use the latest clang for Builds ``` bash -export CC=clang-14 -export CXX=clang++-14 +export CC=clang-15 +export CXX=clang++-15 ``` -In this example we use version 14 that is the latest as of Feb 2022. +In this example we use version 15 that is the latest as of Sept 2022. -Gcc can also be used though it is discouraged. +Gcc cannot be used. ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 82cb4018625..69afb31e214 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -122,7 +122,7 @@ If you use Arch or Gentoo, you probably know it yourself how to install CMake. ## C++ Compiler {#c-compiler} -Compilers Clang starting from version 12 is supported for building ClickHouse. +Compilers Clang starting from version 15 is supported for building ClickHouse. Clang should be used instead of gcc. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations. @@ -146,7 +146,7 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. -If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-14 CXX=clang++-14`. The clang version will be in the script output. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. @@ -178,7 +178,7 @@ If you get the message: `ninja: error: loading 'build.ninja': No such file or di Upon the successful start of the building process, you’ll see the build progress - the number of processed tasks and the total number of tasks. -While building messages about protobuf files in libhdfs2 library like `libprotobuf WARNING` may show up. They affect nothing and are safe to be ignored. +While building messages about LLVM library may show up. They affect nothing and are safe to be ignored. Upon successful build you get an executable file `ClickHouse//programs/clickhouse`: @@ -272,15 +272,10 @@ Most probably some of the builds will fail at first times. This is due to the fa You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation, semantic highlighting, search and indexing. The code snapshot is updated daily. +You can use GitHub integrated code browser [here](https://github.dev/ClickHouse/ClickHouse). + Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. -## Faster builds for development: Split build configuration {#split-build} - -ClickHouse is normally statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that for every change the entire binary needs to be re-linked, which is slow and inconvenient for development. As an alternative, you can instead build dynamically linked shared libraries, allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation: -``` --DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -``` - If you are not interested in functionality provided by third-party libraries, you can further speed up the build using `cmake` options ``` -DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0 diff --git a/docs/en/development/integrating_rust_libraries.md b/docs/en/development/integrating_rust_libraries.md index ef0472bf4ac..ac77500bb35 100644 --- a/docs/en/development/integrating_rust_libraries.md +++ b/docs/en/development/integrating_rust_libraries.md @@ -5,11 +5,49 @@ slug: /en/development/integrating_rust_libraries Rust library integration will be described based on BLAKE3 hash-function integration. -The first step is forking a library and making necessary changes for Rust and C/C++ compatibility. +The first step of integration is to add the library to /rust folder. To do this, you need to create an empty Rust project and include the required library in Cargo.toml. It is also necessary to configure new library compilation as static by adding `crate-type = ["staticlib"]` ​​to Cargo.toml. -After forking library repository you need to change target settings in Cargo.toml file. Firstly, you need to switch build to static library. Secondly, you need to add cbindgen crate to the crate list. We will use it later to generate C-header automatically. +Next, you need to link the library to CMake using Corrosion library. The first step is to add the library folder in the CMakeLists.txt inside the /rust folder. After that, you should add the CMakeLists.txt file to the library directory. In it, you need to call the Corrosion import function. These lines were used to import BLAKE3: -The next step is creating or editing the build.rs script for your library - and enable cbindgen to generate the header during library build. These lines were added to BLAKE3 build script for the same purpose: +``` +corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD) + +target_include_directories(_ch_rust_blake3 INTERFACE include) +add_library(ch_rust::blake3 ALIAS _ch_rust_blake3) +``` + +Thus, we will create a correct CMake target using Corrosion, and then rename it with a more convenient name. Note that the name `_ch_rust_blake3` comes from Cargo.toml, where it is used as project name (`name = "_ch_rust_blake3"`). + +Since Rust data types are not compatible with C/C++ data types, we will use our empty library project to create shim methods for conversion of data received from C/C++, calling library methods, and inverse conversion for output data. For example, this method was written for BLAKE3: + +``` +#[no_mangle] +pub unsafe extern "C" fn blake3_apply_shim( + begin: *const c_char, + _size: u32, + out_char_data: *mut u8, +) -> *mut c_char { + if begin.is_null() { + let err_str = CString::new("input was a null pointer").unwrap(); + return err_str.into_raw(); + } + let mut hasher = blake3::Hasher::new(); + let input_bytes = CStr::from_ptr(begin); + let input_res = input_bytes.to_bytes(); + hasher.update(input_res); + let mut reader = hasher.finalize_xof(); + reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN)); + std::ptr::null_mut() +} +``` + +This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the conversion was not needed. The main advice here is to create less methods, so you will need to do less conversions on each method call and won't create much overhead. + +It is worth noting that the `#[no_mangle]` attribute and `extern "C"` are mandatory for all such methods. Without them, it will not be possible to perform a correct C/C++-compatible compilation. Moreover, they are necessary for the next step of the integration. + +After writing the code for the shim methods, we need to prepare the header file for the library. This can be done manually, or you can use the cbindgen library for auto-generation. In case of using cbindgen, you will need to write a build.rs build script and include cbindgen as a build-dependency. + +An example of a build script that can auto-generate a header file: ``` let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); @@ -27,39 +65,9 @@ The next step is creating or editing the build.rs script for your library - and } ``` -As you can see, script sets the output directory and launches header generation. - -The next step is to add CMake files into library directory, so it can build with other submodules. As you can see, BLAKE3 main directory contains two CMake files - CMakeLists.txt and build_rust_lib.cmake. The second one is a function, which calls cargo build and sets all needed paths for library build. You should copy it to your library and then you can adjust cargo flags and other settings for you library needs. - -When finished with CMake configuration, you should move on to create a C/C++ compatible API for your library. Let us see BLAKE3's method blake3_apply_shim: - -``` -#[no_mangle] -pub unsafe extern "C" fn blake3_apply_shim( - begin: *const c_char, - _size: u32, - out_char_data: *mut u8, -) -> *mut c_char { - if begin.is_null() { - let err_str = CString::new("input was a null pointer").unwrap(); - return err_str.into_raw(); - } - let mut hasher = Hasher::new(); - let input_bytes = CStr::from_ptr(begin); - let input_res = input_bytes.to_bytes(); - hasher.update(input_res); - let mut reader = hasher.finalize_xof(); - reader.fill(std::slice::from_raw_parts_mut(out_char_data, OUT_LEN)); - std::ptr::null_mut() -} -``` - -This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the conversion was not needed. The main advice here is to create less methods, so you will need to do less conversions on each method call and won't create much overhead. - Also, you should use attribute #[no_mangle] and `extern "C"` for every C-compatible attribute. Without it library can compile incorrectly and cbindgen won't launch header autogeneration. -After all these steps you can test your library in a small project to find all problems with compatibility or header generation. If any problems occur during header generation, you can try to configure it with cbindgen.toml file (you can find an example of it in BLAKE3 directory or a template here: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml)). If everything works correctly, you can finally integrate its methods into ClickHouse. +After all these steps you can test your library in a small project to find all problems with compatibility or header generation. If any problems occur during header generation, you can try to configure it with cbindgen.toml file (you can find a template here: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml)). -In addition, some problems with integration are worth noting here: -1) Some architectures may require special cargo flags or build.rs configurations, so you may want to test cross-compilation for different platforms first. -2) MemorySanitizer can cause false-positive reports as it's unable to see if some variables in Rust are initialized or not. It was solved with writing a method with more explicit definition for some variables, although this implementation of method is slower and is used only to fix MemorySanitizer builds. +It is worth noting the problem that occurred when integrating BLAKE3: +MemorySanitizer can cause false-positive reports as it's unable to see if some variables in Rust are initialized or not. It was solved with writing a method with more explicit definition for some variables, although this implementation of method is slower and is used only to fix MemorySanitizer builds. \ No newline at end of file diff --git a/docs/en/getting-started/example-datasets/_category_.yml b/docs/en/getting-started/example-datasets/_category_.yml index a9fab0a9d3c..2ee34c63e93 100644 --- a/docs/en/getting-started/example-datasets/_category_.yml +++ b/docs/en/getting-started/example-datasets/_category_.yml @@ -1,8 +1,7 @@ -position: 10 +position: 1 label: 'Example Datasets' collapsible: true collapsed: true link: - type: generated-index - title: Example Datasets - slug: /en/getting-started/example-datasets + type: doc + id: en/getting-started/example-datasets/ diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index eaa9cdfde88..3d993c3e224 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -1,9 +1,16 @@ --- slug: /en/getting-started/example-datasets/cell-towers sidebar_label: Cell Towers +sidebar_position: 3 title: "Cell Towers" --- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md'; +import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md'; + This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers. As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc). @@ -13,6 +20,26 @@ OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4 ## Get the Dataset {#get-the-dataset} + + + +ClickHouse Cloud provides an easy-button for uploading this dataset from S3. Log in to your ClickHouse Cloud organization, or create a free trial at [ClickHouse.cloud](https://clickhouse.cloud). + + +Choose the **Cell Towers** dataset from the **Sample data** tab, and **Load data**: + +![Load cell towers dataset](@site/docs/en/_snippets/images/cloud-load-data-sample.png) + +Examine the schema of the cell_towers table: +```sql +DESCRIBE TABLE cell_towers +``` + + + + + + 1. Download the snapshot of the dataset from February 2021: [cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB). 2. Validate the integrity (optional step): @@ -56,7 +83,10 @@ ENGINE = MergeTree ORDER BY (radio, mcc, net, created); clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv ``` -## Examples {#examples} + + + +## Example queries {#examples} 1. A number of cell towers by type: @@ -101,18 +131,31 @@ So, the top countries are: the USA, Germany, and Russia. You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values. - -## Use case {#use-case} +## Use case: Incorporate geo data {#use-case} Using `pointInPolygon` function. 1. Create a table where we will store polygons: + + + +```sql +CREATE TABLE moscow (polygon Array(Tuple(Float64, Float64))) +ORDER BY polygon; +``` + + + + ```sql CREATE TEMPORARY TABLE moscow (polygon Array(Tuple(Float64, Float64))); ``` + + + 2. This is a rough shape of Moscow (without "new Moscow"): ```sql diff --git a/docs/en/getting-started/example-datasets/criteo.md b/docs/en/getting-started/example-datasets/criteo.md index ab99333390e..e0bab94428f 100644 --- a/docs/en/getting-started/example-datasets/criteo.md +++ b/docs/en/getting-started/example-datasets/criteo.md @@ -65,7 +65,9 @@ CREATE TABLE criteo icat24 UInt32, icat25 UInt32, icat26 UInt32 -) ENGINE = MergeTree(date, intHash32(icat1), (date, intHash32(icat1)), 8192) +) ENGINE = MergeTree() +PARTITION BY toYYYYMM(date) +ORDER BY (date, icat1) ``` Transform data from the raw log and put it in the second table: diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index 11621cfa5f5..e24fb4b01a7 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -5,285 +5,195 @@ sidebar_position: 2 description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009 --- -# New York Taxi Data +# New York Taxi Data -This dataset can be obtained in two ways: +The New York taxi data consists of 3+ billion taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009. The dataset can be obtained in a couple of ways: -- import from raw data -- download of prepared partitions +- insert the data directly into ClickHouse Cloud from S3 +- download prepared partitions -## How to Import the Raw Data {#how-to-import-the-raw-data} +## Retrieve the Data from S3 + +Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in AWS S3, which is easily streamed into +ClickHouse Cloud using the `s3` table function. Start by creating a table for the taxi rides: + +```sql +CREATE TABLE trips ( + trip_id UInt32, + pickup_datetime DateTime, + dropoff_datetime DateTime, + pickup_longitude Nullable(Float64), + pickup_latitude Nullable(Float64), + dropoff_longitude Nullable(Float64), + dropoff_latitude Nullable(Float64), + passenger_count UInt8, + trip_distance Float32, + fare_amount Float32, + extra Float32, + tip_amount Float32, + tolls_amount Float32, + total_amount Float32, + payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4), + pickup_ntaname LowCardinality(String), + dropoff_ntaname LowCardinality(String) +) +ENGINE = MergeTree +PRIMARY KEY (pickup_datetime, dropoff_datetime) +``` + +The following command streams three files from an S3 bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2): + +```sql +INSERT INTO trips +SELECT + trip_id, + pickup_datetime, + dropoff_datetime, + pickup_longitude, + pickup_latitude, + dropoff_longitude, + dropoff_latitude, + passenger_count, + trip_distance, + fare_amount, + extra, + tip_amount, + tolls_amount, + total_amount, + payment_type, + pickup_ntaname, + dropoff_ntaname +FROM url( + 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz', + 'TabSeparatedWithNames' +) +``` + +Let's see how many rows were inserted: + +```sql +SELECT count() +FROM trips +``` + +Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's look at a few rows: + +```sql +SELECT * +FROM trips +LIMIT 10 +``` + +Notice there are columns for the pickup and dropoff dates, geo coordinates, fare details, New York neighborhoods, and more: + +```response +┌────trip_id─┬─────pickup_datetime─┬────dropoff_datetime─┬───pickup_longitude─┬────pickup_latitude─┬──dropoff_longitude─┬───dropoff_latitude─┬─passenger_count─┬─trip_distance─┬─fare_amount─┬─extra─┬─tip_amount─┬─tolls_amount─┬─total_amount─┬─payment_type─┬─pickup_ntaname─────────────────────────────┬─dropoff_ntaname────────────────────────────┐ +│ 1200864931 │ 2015-07-01 00:00:13 │ 2015-07-01 00:14:41 │ -73.99046325683594 │ 40.746116638183594 │ -73.97918701171875 │ 40.78467559814453 │ 5 │ 3.54 │ 13.5 │ 0.5 │ 1 │ 0 │ 15.8 │ CSH │ Midtown-Midtown South │ Upper West Side │ +│ 1200018648 │ 2015-07-01 00:00:16 │ 2015-07-01 00:02:57 │ -73.78358459472656 │ 40.648677825927734 │ -73.80242919921875 │ 40.64767837524414 │ 1 │ 1.45 │ 6 │ 0.5 │ 0 │ 0 │ 7.3 │ CRE │ Airport │ Airport │ +│ 1201452450 │ 2015-07-01 00:00:20 │ 2015-07-01 00:11:07 │ -73.98579406738281 │ 40.72777557373047 │ -74.00482177734375 │ 40.73748779296875 │ 5 │ 1.56 │ 8.5 │ 0.5 │ 1.96 │ 0 │ 11.76 │ CSH │ East Village │ West Village │ +│ 1202368372 │ 2015-07-01 00:00:40 │ 2015-07-01 00:05:46 │ -74.00206756591797 │ 40.73833084106445 │ -74.00658416748047 │ 40.74875259399414 │ 2 │ 1 │ 6 │ 0.5 │ 0 │ 0 │ 7.3 │ CRE │ West Village │ Hudson Yards-Chelsea-Flatiron-Union Square │ +│ 1200831168 │ 2015-07-01 00:01:06 │ 2015-07-01 00:09:23 │ -73.98748016357422 │ 40.74344253540039 │ -74.00575256347656 │ 40.716793060302734 │ 1 │ 2.3 │ 9 │ 0.5 │ 2 │ 0 │ 12.3 │ CSH │ Hudson Yards-Chelsea-Flatiron-Union Square │ SoHo-TriBeCa-Civic Center-Little Italy │ +│ 1201362116 │ 2015-07-01 00:01:07 │ 2015-07-01 00:03:31 │ -73.9926986694336 │ 40.75826644897461 │ -73.98628997802734 │ 40.76075744628906 │ 1 │ 0.6 │ 4 │ 0.5 │ 0 │ 0 │ 5.3 │ CRE │ Clinton │ Midtown-Midtown South │ +│ 1200639419 │ 2015-07-01 00:01:13 │ 2015-07-01 00:03:56 │ -74.00382995605469 │ 40.741981506347656 │ -73.99711608886719 │ 40.742271423339844 │ 1 │ 0.49 │ 4 │ 0.5 │ 0 │ 0 │ 5.3 │ CRE │ Hudson Yards-Chelsea-Flatiron-Union Square │ Hudson Yards-Chelsea-Flatiron-Union Square │ +│ 1201181622 │ 2015-07-01 00:01:17 │ 2015-07-01 00:05:12 │ -73.9512710571289 │ 40.78261947631836 │ -73.95230865478516 │ 40.77476119995117 │ 4 │ 0.97 │ 5 │ 0.5 │ 1 │ 0 │ 7.3 │ CSH │ Upper East Side-Carnegie Hill │ Yorkville │ +│ 1200978273 │ 2015-07-01 00:01:28 │ 2015-07-01 00:09:46 │ -74.00822448730469 │ 40.72113037109375 │ -74.00422668457031 │ 40.70782470703125 │ 1 │ 1.71 │ 8.5 │ 0.5 │ 1.96 │ 0 │ 11.76 │ CSH │ SoHo-TriBeCa-Civic Center-Little Italy │ Battery Park City-Lower Manhattan │ +│ 1203283366 │ 2015-07-01 00:01:47 │ 2015-07-01 00:24:26 │ -73.98199462890625 │ 40.77289962768555 │ -73.91968536376953 │ 40.766082763671875 │ 3 │ 5.26 │ 19.5 │ 0.5 │ 5.2 │ 0 │ 26 │ CSH │ Lincoln Square │ Astoria │ +└────────────┴─────────────────────┴─────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴─────────────────┴───────────────┴─────────────┴───────┴────────────┴──────────────┴──────────────┴──────────────┴────────────────────────────────────────────┴────────────────────────────────────────────┘ +``` + +Let's run a few queries. This query shows us the top 10 neighborhoods that have the most frequent pickups: + +``` sql +SELECT + pickup_ntaname, + count(*) AS count +FROM trips +GROUP BY pickup_ntaname +ORDER BY count DESC +LIMIT 10 +``` + +The result is: + +```response +┌─pickup_ntaname─────────────────────────────┬──count─┐ +│ Midtown-Midtown South │ 526864 │ +│ Hudson Yards-Chelsea-Flatiron-Union Square │ 288797 │ +│ West Village │ 210436 │ +│ Turtle Bay-East Midtown │ 197111 │ +│ Upper East Side-Carnegie Hill │ 184327 │ +│ Airport │ 151343 │ +│ SoHo-TriBeCa-Civic Center-Little Italy │ 144967 │ +│ Murray Hill-Kips Bay │ 138599 │ +│ Upper West Side │ 135469 │ +│ Clinton │ 130002 │ +└────────────────────────────────────────────┴────────┘ +``` + +This query shows the average fare based on the number of passengers: + +``` sql +SELECT + passenger_count, + avg(total_amount) +FROM trips +GROUP BY passenger_count +``` + +```response +┌─passenger_count─┬──avg(total_amount)─┐ +│ 0 │ 25.226335263065018 │ +│ 1 │ 15.961279340656672 │ +│ 2 │ 17.146174183960667 │ +│ 3 │ 17.65380033178517 │ +│ 4 │ 17.248804201047456 │ +│ 5 │ 16.353501285179135 │ +│ 6 │ 15.995094439202836 │ +│ 7 │ 62.077143805367605 │ +│ 8 │ 26.120000791549682 │ +│ 9 │ 10.300000190734863 │ +└─────────────────┴────────────────────┘ +``` + +Here's a correlation between the number of passengers and the distance of the trip: + +``` sql +SELECT + passenger_count, + toYear(pickup_datetime) AS year, + round(trip_distance) AS distance, + count(*) +FROM trips +GROUP BY passenger_count, year, distance +ORDER BY year, count(*) DESC +``` + +The first part of the result is: + +```response +┌─passenger_count─┬─year─┬─distance─┬─count()─┐ +│ 1 │ 2015 │ 1 │ 748644 │ +│ 1 │ 2015 │ 2 │ 521602 │ +│ 1 │ 2015 │ 3 │ 225077 │ +│ 2 │ 2015 │ 1 │ 144990 │ +│ 1 │ 2015 │ 4 │ 134782 │ +│ 1 │ 2015 │ 0 │ 127284 │ +│ 2 │ 2015 │ 2 │ 106411 │ +│ 1 │ 2015 │ 5 │ 72725 │ +│ 5 │ 2015 │ 1 │ 59343 │ +│ 1 │ 2015 │ 6 │ 53447 │ +│ 2 │ 2015 │ 3 │ 48019 │ +│ 3 │ 2015 │ 1 │ 44865 │ +│ 6 │ 2015 │ 1 │ 39409 │ +``` + +## Download of Prepared Partitions {#download-of-prepared-partitions} See https://github.com/toddwschneider/nyc-taxi-data and http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html for the description of a dataset and instructions for downloading. Downloading will result in about 227 GB of uncompressed data in CSV files. The download takes about an hour over a 1 Gbit connection (parallel downloading from s3.amazonaws.com recovers at least half of a 1 Gbit channel). Some of the files might not download fully. Check the file sizes and re-download any that seem doubtful. -Some of the files might contain invalid rows. You can fix them as follows: - -``` bash -sed -E '/(.*,){18,}/d' data/yellow_tripdata_2010-02.csv > data/yellow_tripdata_2010-02.csv_ -sed -E '/(.*,){18,}/d' data/yellow_tripdata_2010-03.csv > data/yellow_tripdata_2010-03.csv_ -mv data/yellow_tripdata_2010-02.csv_ data/yellow_tripdata_2010-02.csv -mv data/yellow_tripdata_2010-03.csv_ data/yellow_tripdata_2010-03.csv -``` - -Then the data must be pre-processed in PostgreSQL. This will create selections of points in the polygons (to match points on the map with the boroughs of New York City) and combine all the data into a single denormalized flat table by using a JOIN. To do this, you will need to install PostgreSQL with PostGIS support. - -Be careful when running `initialize_database.sh` and manually re-check that all the tables were created correctly. - -It takes about 20-30 minutes to process each month’s worth of data in PostgreSQL, for a total of about 48 hours. - -You can check the number of downloaded rows as follows: - -``` bash -$ time psql nyc-taxi-data -c "SELECT count(*) FROM trips;" -## Count - 1298979494 -(1 row) - -real 7m9.164s -``` - -(This is slightly more than 1.1 billion rows reported by Mark Litwintschik in a series of blog posts.) - -The data in PostgreSQL uses 370 GB of space. - -Exporting the data from PostgreSQL: - -``` sql -COPY -( - SELECT trips.id, - trips.vendor_id, - trips.pickup_datetime, - trips.dropoff_datetime, - trips.store_and_fwd_flag, - trips.rate_code_id, - trips.pickup_longitude, - trips.pickup_latitude, - trips.dropoff_longitude, - trips.dropoff_latitude, - trips.passenger_count, - trips.trip_distance, - trips.fare_amount, - trips.extra, - trips.mta_tax, - trips.tip_amount, - trips.tolls_amount, - trips.ehail_fee, - trips.improvement_surcharge, - trips.total_amount, - trips.payment_type, - trips.trip_type, - trips.pickup, - trips.dropoff, - - cab_types.type cab_type, - - weather.precipitation_tenths_of_mm rain, - weather.snow_depth_mm, - weather.snowfall_mm, - weather.max_temperature_tenths_degrees_celsius max_temp, - weather.min_temperature_tenths_degrees_celsius min_temp, - weather.average_wind_speed_tenths_of_meters_per_second wind, - - pick_up.gid pickup_nyct2010_gid, - pick_up.ctlabel pickup_ctlabel, - pick_up.borocode pickup_borocode, - pick_up.boroname pickup_boroname, - pick_up.ct2010 pickup_ct2010, - pick_up.boroct2010 pickup_boroct2010, - pick_up.cdeligibil pickup_cdeligibil, - pick_up.ntacode pickup_ntacode, - pick_up.ntaname pickup_ntaname, - pick_up.puma pickup_puma, - - drop_off.gid dropoff_nyct2010_gid, - drop_off.ctlabel dropoff_ctlabel, - drop_off.borocode dropoff_borocode, - drop_off.boroname dropoff_boroname, - drop_off.ct2010 dropoff_ct2010, - drop_off.boroct2010 dropoff_boroct2010, - drop_off.cdeligibil dropoff_cdeligibil, - drop_off.ntacode dropoff_ntacode, - drop_off.ntaname dropoff_ntaname, - drop_off.puma dropoff_puma - FROM trips - LEFT JOIN cab_types - ON trips.cab_type_id = cab_types.id - LEFT JOIN central_park_weather_observations_raw weather - ON weather.date = trips.pickup_datetime::date - LEFT JOIN nyct2010 pick_up - ON pick_up.gid = trips.pickup_nyct2010_gid - LEFT JOIN nyct2010 drop_off - ON drop_off.gid = trips.dropoff_nyct2010_gid -) TO '/opt/milovidov/nyc-taxi-data/trips.tsv'; -``` - -The data snapshot is created at a speed of about 50 MB per second. While creating the snapshot, PostgreSQL reads from the disk at a speed of about 28 MB per second. -This takes about 5 hours. The resulting TSV file is 590612904969 bytes. - -Create a temporary table in ClickHouse: - -``` sql -CREATE TABLE trips -( -trip_id UInt32, -vendor_id String, -pickup_datetime DateTime, -dropoff_datetime Nullable(DateTime), -store_and_fwd_flag Nullable(FixedString(1)), -rate_code_id Nullable(UInt8), -pickup_longitude Nullable(Float64), -pickup_latitude Nullable(Float64), -dropoff_longitude Nullable(Float64), -dropoff_latitude Nullable(Float64), -passenger_count Nullable(UInt8), -trip_distance Nullable(Float64), -fare_amount Nullable(Float32), -extra Nullable(Float32), -mta_tax Nullable(Float32), -tip_amount Nullable(Float32), -tolls_amount Nullable(Float32), -ehail_fee Nullable(Float32), -improvement_surcharge Nullable(Float32), -total_amount Nullable(Float32), -payment_type Nullable(String), -trip_type Nullable(UInt8), -pickup Nullable(String), -dropoff Nullable(String), -cab_type Nullable(String), -precipitation Nullable(UInt8), -snow_depth Nullable(UInt8), -snowfall Nullable(UInt8), -max_temperature Nullable(UInt8), -min_temperature Nullable(UInt8), -average_wind_speed Nullable(UInt8), -pickup_nyct2010_gid Nullable(UInt8), -pickup_ctlabel Nullable(String), -pickup_borocode Nullable(UInt8), -pickup_boroname Nullable(String), -pickup_ct2010 Nullable(String), -pickup_boroct2010 Nullable(String), -pickup_cdeligibil Nullable(FixedString(1)), -pickup_ntacode Nullable(String), -pickup_ntaname Nullable(String), -pickup_puma Nullable(String), -dropoff_nyct2010_gid Nullable(UInt8), -dropoff_ctlabel Nullable(String), -dropoff_borocode Nullable(UInt8), -dropoff_boroname Nullable(String), -dropoff_ct2010 Nullable(String), -dropoff_boroct2010 Nullable(String), -dropoff_cdeligibil Nullable(String), -dropoff_ntacode Nullable(String), -dropoff_ntaname Nullable(String), -dropoff_puma Nullable(String) -) ENGINE = Log; -``` - -It is needed for converting fields to more correct data types and, if possible, to eliminate NULLs. - -``` bash -$ time clickhouse-client --query="INSERT INTO trips FORMAT TabSeparated" < trips.tsv - -real 75m56.214s -``` - -Data is read at a speed of 112-140 Mb/second. -Loading data into a Log type table in one stream took 76 minutes. -The data in this table uses 142 GB. - -(Importing data directly from Postgres is also possible using `COPY ... TO PROGRAM`.) - -Unfortunately, all the fields associated with the weather (precipitation…average_wind_speed) were filled with NULL. Because of this, we will remove them from the final data set. - -To start, we’ll create a table on a single server. Later we will make the table distributed. - -Create and populate a summary table: - -``` sql -CREATE TABLE trips_mergetree -ENGINE = MergeTree(pickup_date, pickup_datetime, 8192) -AS SELECT - -trip_id, -CAST(vendor_id AS Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14)) AS vendor_id, -toDate(pickup_datetime) AS pickup_date, -ifNull(pickup_datetime, toDateTime(0)) AS pickup_datetime, -toDate(dropoff_datetime) AS dropoff_date, -ifNull(dropoff_datetime, toDateTime(0)) AS dropoff_datetime, -assumeNotNull(store_and_fwd_flag) IN ('Y', '1', '2') AS store_and_fwd_flag, -assumeNotNull(rate_code_id) AS rate_code_id, -assumeNotNull(pickup_longitude) AS pickup_longitude, -assumeNotNull(pickup_latitude) AS pickup_latitude, -assumeNotNull(dropoff_longitude) AS dropoff_longitude, -assumeNotNull(dropoff_latitude) AS dropoff_latitude, -assumeNotNull(passenger_count) AS passenger_count, -assumeNotNull(trip_distance) AS trip_distance, -assumeNotNull(fare_amount) AS fare_amount, -assumeNotNull(extra) AS extra, -assumeNotNull(mta_tax) AS mta_tax, -assumeNotNull(tip_amount) AS tip_amount, -assumeNotNull(tolls_amount) AS tolls_amount, -assumeNotNull(ehail_fee) AS ehail_fee, -assumeNotNull(improvement_surcharge) AS improvement_surcharge, -assumeNotNull(total_amount) AS total_amount, -CAST((assumeNotNull(payment_type) AS pt) IN ('CSH', 'CASH', 'Cash', 'CAS', 'Cas', '1') ? 'CSH' : (pt IN ('CRD', 'Credit', 'Cre', 'CRE', 'CREDIT', '2') ? 'CRE' : (pt IN ('NOC', 'No Charge', 'No', '3') ? 'NOC' : (pt IN ('DIS', 'Dispute', 'Dis', '4') ? 'DIS' : 'UNK'))) AS Enum8('CSH' = 1, 'CRE' = 2, 'UNK' = 0, 'NOC' = 3, 'DIS' = 4)) AS payment_type_, -assumeNotNull(trip_type) AS trip_type, -ifNull(toFixedString(unhex(pickup), 25), toFixedString('', 25)) AS pickup, -ifNull(toFixedString(unhex(dropoff), 25), toFixedString('', 25)) AS dropoff, -CAST(assumeNotNull(cab_type) AS Enum8('yellow' = 1, 'green' = 2, 'uber' = 3)) AS cab_type, - -assumeNotNull(pickup_nyct2010_gid) AS pickup_nyct2010_gid, -toFloat32(ifNull(pickup_ctlabel, '0')) AS pickup_ctlabel, -assumeNotNull(pickup_borocode) AS pickup_borocode, -CAST(assumeNotNull(pickup_boroname) AS Enum8('Manhattan' = 1, 'Queens' = 4, 'Brooklyn' = 3, '' = 0, 'Bronx' = 2, 'Staten Island' = 5)) AS pickup_boroname, -toFixedString(ifNull(pickup_ct2010, '000000'), 6) AS pickup_ct2010, -toFixedString(ifNull(pickup_boroct2010, '0000000'), 7) AS pickup_boroct2010, -CAST(assumeNotNull(ifNull(pickup_cdeligibil, ' ')) AS Enum8(' ' = 0, 'E' = 1, 'I' = 2)) AS pickup_cdeligibil, -toFixedString(ifNull(pickup_ntacode, '0000'), 4) AS pickup_ntacode, - -CAST(assumeNotNull(pickup_ntaname) AS Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195)) AS pickup_ntaname, - -toUInt16(ifNull(pickup_puma, '0')) AS pickup_puma, - -assumeNotNull(dropoff_nyct2010_gid) AS dropoff_nyct2010_gid, -toFloat32(ifNull(dropoff_ctlabel, '0')) AS dropoff_ctlabel, -assumeNotNull(dropoff_borocode) AS dropoff_borocode, -CAST(assumeNotNull(dropoff_boroname) AS Enum8('Manhattan' = 1, 'Queens' = 4, 'Brooklyn' = 3, '' = 0, 'Bronx' = 2, 'Staten Island' = 5)) AS dropoff_boroname, -toFixedString(ifNull(dropoff_ct2010, '000000'), 6) AS dropoff_ct2010, -toFixedString(ifNull(dropoff_boroct2010, '0000000'), 7) AS dropoff_boroct2010, -CAST(assumeNotNull(ifNull(dropoff_cdeligibil, ' ')) AS Enum8(' ' = 0, 'E' = 1, 'I' = 2)) AS dropoff_cdeligibil, -toFixedString(ifNull(dropoff_ntacode, '0000'), 4) AS dropoff_ntacode, - -CAST(assumeNotNull(dropoff_ntaname) AS Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195)) AS dropoff_ntaname, - -toUInt16(ifNull(dropoff_puma, '0')) AS dropoff_puma - -FROM trips -``` - -This takes 3030 seconds at a speed of about 428,000 rows per second. -To load it faster, you can create the table with the `Log` engine instead of `MergeTree`. In this case, the download works faster than 200 seconds. - -The table uses 126 GB of disk space. - -``` sql -SELECT formatReadableSize(sum(bytes)) FROM system.parts WHERE table = 'trips_mergetree' AND active -``` - -``` text -┌─formatReadableSize(sum(bytes))─┐ -│ 126.18 GiB │ -└────────────────────────────────┘ -``` - -Among other things, you can run the OPTIMIZE query on MergeTree. But it’s not required since everything will be fine without it. - -## Download of Prepared Partitions {#download-of-prepared-partitions} - ``` bash $ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar $ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory @@ -292,7 +202,7 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "select count(*) from datasets.trips_mergetree" ``` -:::info +:::info If you will run the queries described below, you have to use the full table name, `datasets.trips_mergetree`. ::: @@ -390,5 +300,3 @@ We ran queries using a client located in a different datacenter than where the c | 1, AWS c5n.9xlarge | 0.130 | 0.584 | 0.777 | 1.811 | | 3, AWS c5n.9xlarge | 0.057 | 0.231 | 0.285 | 0.641 | | 140, E5-2650v2 | 0.028 | 0.043 | 0.051 | 0.072 | - -[Original article](https://clickhouse.com/docs/en/getting_started/example_datasets/nyc_taxi/) diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index bb9046397a0..ef20c03883f 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -13,16 +13,6 @@ Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0. -## Download the Dataset {#download-dataset} - -Run the command: - -```bash -wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv -``` - -Download will take about 2 minutes with good internet connection. - ## Create the Table {#create-table} ```sql @@ -41,31 +31,49 @@ CREATE TABLE uk_price_paid locality LowCardinality(String), town LowCardinality(String), district LowCardinality(String), - county LowCardinality(String), - category UInt8 -) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2); + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2); ``` -## Preprocess and Import Data {#preprocess-import-data} +## Preprocess and Insert the Data {#preprocess-import-data} -We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it. +We will use the `url` function to stream the data into ClickHouse. We need to preprocess some of the incoming data first, which includes: +- splitting the `postcode` to two different columns - `postcode1` and `postcode2`, which is better for storage and queries +- converting the `time` field to date as it only contains 00:00 time +- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis +- transforming `type` and `duration` to more readable `Enum` fields using the [transform](../../sql-reference/functions/other-functions.md#transform) function +- transforming the `is_new` field from a single-character string (`Y`/`N`) to a [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 or 1 +- drop the last two columns since they all have the same value (which is 0) -In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`. +The `url` function streams the data from the web server into your ClickHouse table. The following command inserts 5 million rows into the `uk_price_paid` table: -The preprocessing is: -- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries; -- coverting the `time` field to date as it only contains 00:00 time; -- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis; -- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform); -- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1. - -Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion. - -```bash -clickhouse-local --input-format CSV --structure ' - uuid String, - price UInt32, - time DateTime, +```sql +INSERT INTO uk_price_paid +WITH + splitByChar(' ', postcode) AS p +SELECT + toUInt32(price_string) AS price, + parseDateTimeBestEffortUS(time) AS date, + p[1] AS postcode1, + p[2] AS postcode2, + transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type, + b = 'Y' AS is_new, + transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration, + addr1, + addr2, + street, + locality, + town, + district, + county +FROM url( + 'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv', + 'CSV', + 'uuid_string String, + price_string String, + time String, postcode String, a String, b String, @@ -78,154 +86,136 @@ clickhouse-local --input-format CSV --structure ' district String, county String, d String, - e String -' --query " - WITH splitByChar(' ', postcode) AS p - SELECT - price, - toDate(time) AS date, - p[1] AS postcode1, - p[2] AS postcode2, - transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type, - b = 'Y' AS is_new, - transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration, - addr1, - addr2, - street, - locality, - town, - district, - county, - d = 'B' AS category - FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV" + e String' +) SETTINGS max_http_get_redirects=10; ``` -It will take about 40 seconds. +Wait for the data to insert - it will take a minute or two depending on the network speed. ## Validate the Data {#validate-data} -Query: +Let's verify it worked by seeing how many rows were inserted: ```sql -SELECT count() FROM uk_price_paid; +SELECT count() +FROM uk_price_paid ``` -Result: - -```text -┌──count()─┐ -│ 26321785 │ -└──────────┘ -``` - -The size of dataset in ClickHouse is just 278 MiB, check it. - -Query: +At the time this query was executed, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse: ```sql -SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid'; +SELECT formatReadableSize(total_bytes) +FROM system.tables +WHERE name = 'uk_price_paid' ``` -Result: - -```text -┌─formatReadableSize(total_bytes)─┐ -│ 278.80 MiB │ -└─────────────────────────────────┘ -``` +Notice the size of the table is just 221.43 MiB! ## Run Some Queries {#run-queries} +Let's run some queries to analyze the data: + ### Query 1. Average Price Per Year {#average-price} -Query: - ```sql -SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year; +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 1000000, 80 +) +FROM uk_price_paid +GROUP BY year +ORDER BY year ``` -Result: +The result looks like: -```text +```response ┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐ -│ 1995 │ 67932 │ █████▍ │ -│ 1996 │ 71505 │ █████▋ │ -│ 1997 │ 78532 │ ██████▎ │ -│ 1998 │ 85436 │ ██████▋ │ -│ 1999 │ 96037 │ ███████▋ │ -│ 2000 │ 107479 │ ████████▌ │ -│ 2001 │ 118885 │ █████████▌ │ -│ 2002 │ 137941 │ ███████████ │ -│ 2003 │ 155889 │ ████████████▍ │ -│ 2004 │ 178885 │ ██████████████▎ │ -│ 2005 │ 189351 │ ███████████████▏ │ -│ 2006 │ 203528 │ ████████████████▎ │ -│ 2007 │ 219378 │ █████████████████▌ │ +│ 1995 │ 67934 │ █████▍ │ +│ 1996 │ 71508 │ █████▋ │ +│ 1997 │ 78536 │ ██████▎ │ +│ 1998 │ 85441 │ ██████▋ │ +│ 1999 │ 96038 │ ███████▋ │ +│ 2000 │ 107487 │ ████████▌ │ +│ 2001 │ 118888 │ █████████▌ │ +│ 2002 │ 137948 │ ███████████ │ +│ 2003 │ 155893 │ ████████████▍ │ +│ 2004 │ 178888 │ ██████████████▎ │ +│ 2005 │ 189359 │ ███████████████▏ │ +│ 2006 │ 203532 │ ████████████████▎ │ +│ 2007 │ 219375 │ █████████████████▌ │ │ 2008 │ 217056 │ █████████████████▎ │ │ 2009 │ 213419 │ █████████████████ │ -│ 2010 │ 236109 │ ██████████████████▊ │ +│ 2010 │ 236110 │ ██████████████████▊ │ │ 2011 │ 232805 │ ██████████████████▌ │ -│ 2012 │ 238367 │ ███████████████████ │ -│ 2013 │ 256931 │ ████████████████████▌ │ -│ 2014 │ 279915 │ ██████████████████████▍ │ -│ 2015 │ 297266 │ ███████████████████████▋ │ -│ 2016 │ 313201 │ █████████████████████████ │ -│ 2017 │ 346097 │ ███████████████████████████▋ │ -│ 2018 │ 350116 │ ████████████████████████████ │ -│ 2019 │ 351013 │ ████████████████████████████ │ -│ 2020 │ 369420 │ █████████████████████████████▌ │ -│ 2021 │ 386903 │ ██████████████████████████████▊ │ +│ 2012 │ 238381 │ ███████████████████ │ +│ 2013 │ 256927 │ ████████████████████▌ │ +│ 2014 │ 280008 │ ██████████████████████▍ │ +│ 2015 │ 297263 │ ███████████████████████▋ │ +│ 2016 │ 313518 │ █████████████████████████ │ +│ 2017 │ 346371 │ ███████████████████████████▋ │ +│ 2018 │ 350556 │ ████████████████████████████ │ +│ 2019 │ 352184 │ ████████████████████████████▏ │ +│ 2020 │ 375808 │ ██████████████████████████████ │ +│ 2021 │ 381105 │ ██████████████████████████████▍ │ +│ 2022 │ 362572 │ █████████████████████████████ │ └──────┴────────┴────────────────────────────────────────┘ ``` ### Query 2. Average Price per Year in London {#average-price-london} -Query: - ```sql -SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year; +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 2000000, 100 +) +FROM uk_price_paid +WHERE town = 'LONDON' +GROUP BY year +ORDER BY year ``` -Result: +The result looks like: -```text +```response ┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐ -│ 1995 │ 109116 │ █████▍ │ -│ 1996 │ 118667 │ █████▊ │ -│ 1997 │ 136518 │ ██████▋ │ -│ 1998 │ 152983 │ ███████▋ │ -│ 1999 │ 180637 │ █████████ │ -│ 2000 │ 215838 │ ██████████▋ │ -│ 2001 │ 232994 │ ███████████▋ │ -│ 2002 │ 263670 │ █████████████▏ │ -│ 2003 │ 278394 │ █████████████▊ │ -│ 2004 │ 304666 │ ███████████████▏ │ -│ 2005 │ 322875 │ ████████████████▏ │ -│ 2006 │ 356191 │ █████████████████▋ │ -│ 2007 │ 404054 │ ████████████████████▏ │ +│ 1995 │ 109110 │ █████▍ │ +│ 1996 │ 118659 │ █████▊ │ +│ 1997 │ 136526 │ ██████▋ │ +│ 1998 │ 153002 │ ███████▋ │ +│ 1999 │ 180633 │ █████████ │ +│ 2000 │ 215849 │ ██████████▋ │ +│ 2001 │ 232987 │ ███████████▋ │ +│ 2002 │ 263668 │ █████████████▏ │ +│ 2003 │ 278424 │ █████████████▊ │ +│ 2004 │ 304664 │ ███████████████▏ │ +│ 2005 │ 322887 │ ████████████████▏ │ +│ 2006 │ 356195 │ █████████████████▋ │ +│ 2007 │ 404062 │ ████████████████████▏ │ │ 2008 │ 420741 │ █████████████████████ │ -│ 2009 │ 427753 │ █████████████████████▍ │ -│ 2010 │ 480306 │ ████████████████████████ │ -│ 2011 │ 496274 │ ████████████████████████▋ │ -│ 2012 │ 519442 │ █████████████████████████▊ │ -│ 2013 │ 616212 │ ██████████████████████████████▋ │ -│ 2014 │ 724154 │ ████████████████████████████████████▏ │ -│ 2015 │ 792129 │ ███████████████████████████████████████▌ │ -│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │ -│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │ -│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │ -│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │ -│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │ -│ 2021 │ 960343 │ ████████████████████████████████████████████████ │ +│ 2009 │ 427754 │ █████████████████████▍ │ +│ 2010 │ 480322 │ ████████████████████████ │ +│ 2011 │ 496278 │ ████████████████████████▋ │ +│ 2012 │ 519482 │ █████████████████████████▊ │ +│ 2013 │ 616195 │ ██████████████████████████████▋ │ +│ 2014 │ 724121 │ ████████████████████████████████████▏ │ +│ 2015 │ 792101 │ ███████████████████████████████████████▌ │ +│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │ +│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │ +│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │ +│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │ +│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │ +│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │ +│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │ └──────┴─────────┴───────────────────────────────────────────────────────┘ ``` -Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020? +Something happened to home prices in 2020! But that is probably not a surprise... ### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods} -Query: - ```sql SELECT town, @@ -240,124 +230,123 @@ GROUP BY district HAVING c >= 100 ORDER BY price DESC -LIMIT 100; +LIMIT 100 ``` -Result: +The result looks like: -```text - -┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐ -│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │ -│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │ -│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │ -│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │ -│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │ -│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │ -│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │ -│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │ -│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │ -│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │ -│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │ -│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │ -│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │ -│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │ -│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │ -│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │ -│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │ -│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │ -│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │ -│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │ -│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │ -│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │ -│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │ -│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │ -│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │ -│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │ -│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │ -│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │ -│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │ -│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │ -│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │ -│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │ -│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │ -│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │ -│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │ -│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │ -│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │ -│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │ -│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │ -│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │ -│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │ -│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │ -│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │ -│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │ -│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │ -│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │ -│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │ -│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │ -│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │ -│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │ -│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │ -│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │ -│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │ -│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │ -│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │ -│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │ -│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │ -│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │ -│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │ -│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │ -│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │ -│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │ -│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │ -│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │ -│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │ -│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │ -│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │ -│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │ -│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │ -│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │ -│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │ -│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │ -│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │ -│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │ -│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │ -│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │ -│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │ -│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │ -│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │ -│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │ -│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │ -│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │ -│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │ -│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │ -│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │ -│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │ -│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │ -│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │ -│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │ -│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │ -│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │ -│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │ -│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │ -│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │ -│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │ -│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │ -│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │ -│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │ -│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │ -│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │ -└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘ +```response +┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐ +│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │ +│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │ +│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │ +│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │ +│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │ +│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │ +│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │ +│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │ +│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │ +│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │ +│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │ +│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │ +│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │ +│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │ +│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │ +│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │ +│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │ +│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │ +│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │ +│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │ +│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │ +│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │ +│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │ +│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │ +│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │ +│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │ +│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │ +│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │ +│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │ +│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │ +│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │ +│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │ +│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │ +│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │ +│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │ +│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │ +│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │ +│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │ +│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │ +│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │ +│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │ +│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │ +│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │ +│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │ +│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │ +│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │ +│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │ +│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │ +│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │ +│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │ +│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │ +│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │ +│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │ +│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │ +│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │ +│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │ +│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │ +│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │ +│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │ +│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │ +│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │ +│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │ +│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │ +│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │ +│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │ +│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │ +│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │ +│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │ +│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │ +│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │ +│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │ +│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │ +│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │ +│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │ +│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │ +│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │ +│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │ +│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │ +│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │ +│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │ +│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │ +│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │ +│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │ +│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │ +│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │ +│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │ +│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │ +│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │ +│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │ +│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │ +│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │ +│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │ +│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │ +│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │ +│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │ +│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │ +│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │ +│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │ +│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │ +│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │ +└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘ ``` ## Let's Speed Up Queries Using Projections {#speedup-with-projections} -[Projections](../../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data. +[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At execution time, ClickHouse will use your projection if it thinks the projection can improve the performance fo the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful). ### Build a Projection {#build-projection} -Create an aggregate projection by dimensions `toYear(date)`, `district`, `town`: +Let's create an aggregate projection by the dimensions `toYear(date)`, `district`, and `town`: ```sql ALTER TABLE uk_price_paid @@ -374,25 +363,23 @@ ALTER TABLE uk_price_paid toYear(date), district, town - ); + ) ``` -Populate the projection for existing data (without it projection will be created for only newly inserted data): +Populate the projection for existing data. (Without materializing it, the projection will be created for only newly inserted data): ```sql ALTER TABLE uk_price_paid MATERIALIZE PROJECTION projection_by_year_district_town -SETTINGS mutations_sync = 1; +SETTINGS mutations_sync = 1 ``` ## Test Performance {#test-performance} -Let's run the same 3 queries. +Let's run the same 3 queries again: ### Query 1. Average Price Per Year {#average-price-projections} -Query: - ```sql SELECT toYear(date) AS year, @@ -400,47 +387,18 @@ SELECT bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year -ORDER BY year ASC; +ORDER BY year ASC ``` -Result: - -```text -┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐ -│ 1995 │ 67932 │ █████▍ │ -│ 1996 │ 71505 │ █████▋ │ -│ 1997 │ 78532 │ ██████▎ │ -│ 1998 │ 85436 │ ██████▋ │ -│ 1999 │ 96037 │ ███████▋ │ -│ 2000 │ 107479 │ ████████▌ │ -│ 2001 │ 118885 │ █████████▌ │ -│ 2002 │ 137941 │ ███████████ │ -│ 2003 │ 155889 │ ████████████▍ │ -│ 2004 │ 178885 │ ██████████████▎ │ -│ 2005 │ 189351 │ ███████████████▏ │ -│ 2006 │ 203528 │ ████████████████▎ │ -│ 2007 │ 219378 │ █████████████████▌ │ -│ 2008 │ 217056 │ █████████████████▎ │ -│ 2009 │ 213419 │ █████████████████ │ -│ 2010 │ 236109 │ ██████████████████▊ │ -│ 2011 │ 232805 │ ██████████████████▌ │ -│ 2012 │ 238367 │ ███████████████████ │ -│ 2013 │ 256931 │ ████████████████████▌ │ -│ 2014 │ 279915 │ ██████████████████████▍ │ -│ 2015 │ 297266 │ ███████████████████████▋ │ -│ 2016 │ 313201 │ █████████████████████████ │ -│ 2017 │ 346097 │ ███████████████████████████▋ │ -│ 2018 │ 350116 │ ████████████████████████████ │ -│ 2019 │ 351013 │ ████████████████████████████ │ -│ 2020 │ 369420 │ █████████████████████████████▌ │ -│ 2021 │ 386903 │ ██████████████████████████████▊ │ -└──────┴────────┴────────────────────────────────────────┘ +The result is the same, but the performance is better! +```response +No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.) +With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.) ``` + ### Query 2. Average Price Per Year in London {#average-price-london-projections} -Query: - ```sql SELECT toYear(date) AS year, @@ -449,48 +407,19 @@ SELECT FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year -ORDER BY year ASC; +ORDER BY year ASC ``` -Result: +Same result, but notice the improvement in query performance: -```text -┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐ -│ 1995 │ 109116 │ █████▍ │ -│ 1996 │ 118667 │ █████▊ │ -│ 1997 │ 136518 │ ██████▋ │ -│ 1998 │ 152983 │ ███████▋ │ -│ 1999 │ 180637 │ █████████ │ -│ 2000 │ 215838 │ ██████████▋ │ -│ 2001 │ 232994 │ ███████████▋ │ -│ 2002 │ 263670 │ █████████████▏ │ -│ 2003 │ 278394 │ █████████████▊ │ -│ 2004 │ 304666 │ ███████████████▏ │ -│ 2005 │ 322875 │ ████████████████▏ │ -│ 2006 │ 356191 │ █████████████████▋ │ -│ 2007 │ 404054 │ ████████████████████▏ │ -│ 2008 │ 420741 │ █████████████████████ │ -│ 2009 │ 427753 │ █████████████████████▍ │ -│ 2010 │ 480306 │ ████████████████████████ │ -│ 2011 │ 496274 │ ████████████████████████▋ │ -│ 2012 │ 519442 │ █████████████████████████▊ │ -│ 2013 │ 616212 │ ██████████████████████████████▋ │ -│ 2014 │ 724154 │ ████████████████████████████████████▏ │ -│ 2015 │ 792129 │ ███████████████████████████████████████▌ │ -│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │ -│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │ -│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │ -│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │ -│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │ -│ 2021 │ 960343 │ ████████████████████████████████████████████████ │ -└──────┴─────────┴───────────────────────────────────────────────────────┘ +```response +No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.) +With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.) ``` ### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods-projections} -The condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020). - -Query: +The condition (date >= '2020-01-01') needs to be modified so that it matches the projection dimension (`toYear(date) >= 2020)`: ```sql SELECT @@ -506,138 +435,16 @@ GROUP BY district HAVING c >= 100 ORDER BY price DESC -LIMIT 100; +LIMIT 100 ``` -Result: +Again, the result is the same but notice the improvement in query performance: -```text -┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐ -│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │ -│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │ -│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │ -│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │ -│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │ -│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │ -│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │ -│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │ -│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │ -│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │ -│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │ -│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │ -│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │ -│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │ -│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │ -│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │ -│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │ -│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │ -│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │ -│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │ -│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │ -│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │ -│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │ -│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │ -│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │ -│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │ -│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │ -│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │ -│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │ -│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │ -│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │ -│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │ -│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │ -│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │ -│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │ -│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │ -│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │ -│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │ -│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │ -│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │ -│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │ -│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │ -│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │ -│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │ -│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │ -│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │ -│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │ -│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │ -│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │ -│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │ -│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │ -│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │ -│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │ -│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │ -│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │ -│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │ -│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │ -│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │ -│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │ -│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │ -│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │ -│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │ -│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │ -│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │ -│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │ -│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │ -│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │ -│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │ -│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │ -│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │ -│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │ -│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │ -│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │ -│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │ -│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │ -│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │ -│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │ -│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │ -│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │ -│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │ -│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │ -│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │ -│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │ -│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │ -│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │ -│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │ -│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │ -│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │ -│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │ -│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │ -│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │ -│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │ -│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │ -│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │ -│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │ -│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │ -│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │ -│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │ -│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │ -│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │ -└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘ +```response +No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.) +With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.) ``` -### Summary {#summary} - -All 3 queries work much faster and read fewer rows. - -```text -Query 1 - -no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.) - projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.) - - -Query 2 - -no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.) - projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.) - -Query 3 - -no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.) - projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.) -``` - -### Test It in Playground {#playground} +### Test it in the Playground {#playground} The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). diff --git a/docs/en/getting-started/index.md b/docs/en/getting-started/index.md new file mode 100644 index 00000000000..0bb3ae1ca71 --- /dev/null +++ b/docs/en/getting-started/index.md @@ -0,0 +1,26 @@ +--- +slug: /en/getting-started/example-datasets/ +sidebar_position: 0 +sidebar_label: Overview +keywords: [clickhouse, install, tutorial, sample, datasets] +pagination_next: 'en/tutorial' +--- + +# Tutorials and Example Datasets + +We have a lot of resources for helping you get started and learn how ClickHouse works: + +- If you need to get ClickHouse up and running, check out our [Quick Start](../quick-start.mdx) +- The [ClickHouse Tutorial](../tutorial.md) analyzes a dataset of New York City taxi rides + +In addition, the sample datasets provide a great experience on working with ClickHouse, +learning important techniques and tricks, and seeing how to take advantage of the many powerful +functions in ClickHouse. The sample datasets include: + +- The [UK Property Price Paid dataset](../getting-started/example-datasets/uk-price-paid.md) is a good starting point with some interesting SQL queries +- The [New York Taxi Data](../getting-started/example-datasets/nyc-taxi.md) has an example of how to insert data from S3 into ClickHouse +- The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse +- The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables +- The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data + +View the **Tutorials and Datasets** menu for a complete list of sample datasets. \ No newline at end of file diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 83561b07ade..61303eddab9 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -1,13 +1,34 @@ --- -sidebar_label: Installation -sidebar_position: 1 -keywords: [clickhouse, install, installation, docs] -description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture. -slug: /en/getting-started/install -title: Installation +sidebar_label: Install +keywords: [clickhouse, install, getting started, quick start] +slug: /en/install --- -## System Requirements {#system-requirements} +# Installing ClickHouse + +You have two options for getting up and running with ClickHouse: + +- **[ClickHouse Cloud](https://clickhouse.cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse +- **Self-managed ClickHouse:** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture + +## ClickHouse Cloud + +The quickest and easiest way to get up and running with ClickHouse is to create a new service in [ClickHouse Cloud](https://clickhouse.cloud/): + +
+ +![Create a ClickHouse Cloud service](@site/docs/en/_snippets/images/createservice1.png) +
+ +Once your Cloud service is provisioned, you will be able to [connect to it](/docs/en/integrations/connect-a-client.md) and start [inserting data](/docs/en/integrations/data-ingestion.md). + +:::note +The [Quick Start](/docs/en/quick-start.mdx) walks through the steps to get a ClickHouse Cloud service up and running, connecting to it, and inserting data. +::: + +## Self-Managed Requirements + +### CPU Architecture ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture. @@ -19,6 +40,55 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should [build ClickHouse from sources](#from-sources) with proper configuration adjustments. +ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz. + +It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload. + +### RAM {#ram} + +We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries. + +The required volume of RAM depends on: + +- The complexity of queries. +- The amount of data that is processed in queries. + +To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use. + +ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details. + +### Swap File {#swap-file} + +Disable the swap file for production environments. + +### Storage Subsystem {#storage-subsystem} + +You need to have 2GB of free disk space to install ClickHouse. + +The volume of storage required for your data should be calculated separately. Assessment should include: + +- Estimation of the data volume. + + You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. + +- The data compression coefficient. + + To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. + +To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas. + +### Network {#network} + +If possible, use networks of 10G or higher class. + +The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes. + +### Software {#software} + +ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. + +## Self-Managed Install + ## Available Installation Options {#available-installation-options} ### From DEB Packages {#install-from-deb-packages} @@ -58,9 +128,9 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. -You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs. +You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs. -You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable). +You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/). #### Packages {#packages} @@ -105,7 +175,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. -You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs. +You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs. Then run these commands to install packages: @@ -226,7 +296,7 @@ Use the `clickhouse client` to connect to the server, or `clickhouse local` to p ### From Sources {#from-sources} -To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md). +To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [Mac OS X](/docs/en/development/build-osx.md). You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs. @@ -281,7 +351,7 @@ If the configuration file is in the current directory, you do not need to specif ClickHouse supports access restriction settings. They are located in the `users.xml` file (next to `config.xml`). By default, access is allowed from anywhere for the `default` user, without a password. See `user/default/networks`. -For more information, see the section [“Configuration Files”](../operations/configuration-files.md). +For more information, see the section [“Configuration Files”](/docs/en/operations/configuration-files.md). After launching server, you can use the command-line client to connect to it: @@ -292,7 +362,7 @@ $ clickhouse-client By default, it connects to `localhost:9000` on behalf of the user `default` without a password. It can also be used to connect to a remote server using `--host` argument. The terminal must use UTF-8 encoding. -For more information, see the section [“Command-line client”](../interfaces/cli.md). +For more information, see the section [“Command-line client”](/docs/en/interfaces/cli.md). Example: @@ -317,6 +387,5 @@ SELECT 1 **Congratulations, the system works!** -To continue experimenting, you can download one of the test data sets or go through [tutorial](./../tutorial.md). +To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md). -[Original article](https://clickhouse.com/docs/en/getting_started/install/) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 1f45d1fa411..4f07f99fb26 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -3,6 +3,7 @@ slug: /en/interfaces/cli sidebar_position: 17 sidebar_label: Command-Line Client --- +import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_native.md'; # Command-line Client @@ -24,26 +25,76 @@ Connected to ClickHouse server version 20.13.1 revision 54442. Different client and server versions are compatible with one another, but some features may not be available in older clients. We recommend using the same version of the client as the server app. When you try to use a client of the older version, then the server, `clickhouse-client` displays the message: ```response -ClickHouse client version is older than ClickHouse server. It may lack support for new features. +ClickHouse client version is older than ClickHouse server. +It may lack support for new features. ``` ## Usage {#cli_usage} -The client can be used in interactive and non-interactive (batch) mode. To use batch mode, specify the ‘query’ parameter, or send data to ‘stdin’ (it verifies that ‘stdin’ is not a terminal), or both. Similar to the HTTP interface, when using the ‘query’ parameter and sending data to ‘stdin’, the request is a concatenation of the ‘query’ parameter, a line feed, and the data in ‘stdin’. This is convenient for large INSERT queries. +The client can be used in interactive and non-interactive (batch) mode. -Example of using the client to insert data: +### Gather your connection details + + +### Interactive + +To connect to your ClickHouse Cloud service, or any ClickHouse server using TLS and passwords, interactively use `--secure`, port 9440, and provide your username and password: + +```bash +clickhouse-client --host \ + --secure \ + --port 9440 \ + --user \ + --password +``` + +To connect to a self-managed ClickHouse server you will need the details for that server. Whether or not TLS is used, port numbers, and passwords are all configurable. Use the above example for ClickHouse Cloud as a starting point. + + +### Batch + +To use batch mode, specify the ‘query’ parameter, or send data to ‘stdin’ (it verifies that ‘stdin’ is not a terminal), or both. Similar to the HTTP interface, when using the ‘query’ parameter and sending data to ‘stdin’, the request is a concatenation of the ‘query’ parameter, a line feed, and the data in ‘stdin’. This is convenient for large INSERT queries. + +Examples of using the client to insert data: + +#### Inserting a CSV file into a remote ClickHouse service + +This example is appropriate for ClickHouse Cloud, or any ClickHouse server using TLS and a password. In this example a sample dataset CSV file, `cell_towers.csv` is inserted into an existing table `cell_towers` in the `default` database: + +```bash +clickhouse-client --host HOSTNAME.clickhouse.cloud \ + --secure \ + --port 9440 \ + --user default \ + --password PASSWORD \ + --query "INSERT INTO cell_towers FORMAT CSVWithNames" \ + < cell_towers.csv +``` + +:::note +To concentrate on the query syntax, the rest of the examples leave off the connection details (`--host`, `--port`, etc.). Add them in when you try the commands. +::: + +#### Three different ways of inserting data ``` bash -$ echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV"; +echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | \ + clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV"; +``` -$ cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV"; +```bash +cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV"; 3, 'some text', '2016-08-14 00:00:00' 4, 'some more text', '2016-08-14 00:00:01' _EOF - -$ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV"; ``` +```bash +cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV"; +``` + +### Notes + In batch mode, the default data format is TabSeparated. You can set the format in the FORMAT clause of the query. By default, you can only process a single query in batch mode. To make multiple queries from a “script,” use the `--multiquery` parameter. This works for all queries except INSERT. Query results are output consecutively without additional separators. Similarly, to process a large number of queries, you can run ‘clickhouse-client’ for each query. Note that it may take tens of milliseconds to launch the ‘clickhouse-client’ program. diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index 5ac2f5d5a60..807663be646 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -84,8 +84,8 @@ In the following example a table is created and loaded with data from a CSV file ``` bash ./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;" -echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt -cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" +echo -e "0,Input data for\n1,gRPC protocol example" > a.csv +cat a.csv | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" ./clickhouse-grpc-client.py --format PrettyCompact -q "SELECT * FROM grpc_example_table;" ``` diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 9eb34a2bf17..2085285ff66 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -6,16 +6,32 @@ sidebar_label: MySQL Interface # MySQL Interface -ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting in configuration file: +ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder: ``` xml -9004 + + 9004 + ``` -Example of connecting using command-line tool `mysql`: +Startup your ClickHouse server and look for a log message similar to the following that mentions Listening for MySQL compatibility protocol: + +``` +{} Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004 +``` + +## Connect mysql to ClickHouse + +The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse: + +```bash +mysql --protocol tcp -h [hostname] -u [username] -P [port_number] [database_name] +``` + +For example: ``` bash -$ mysql --protocol tcp -u default -P 9004 +$ mysql --protocol tcp -h 127.0.0.1 -u default -P 9004 default ``` Output if a connection succeeded: diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 8bf64bca28f..82fa5c114ea 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -5,6 +5,9 @@ sidebar_label: ClickHouse Keeper --- # ClickHouse Keeper +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md'; + + ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper. diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index 8a95f13e6f8..1d9ce829e79 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -3,7 +3,11 @@ slug: /en/operations/external-authenticators/ sidebar_position: 48 sidebar_label: External User Authenticators and Directories title: "External User Authenticators and Directories" +pagination_next: 'en/operations/external-authenticators/kerberos' --- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + + ClickHouse supports authenticating and managing users using external services. diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md index 689c3f66e04..c1360e880ad 100644 --- a/docs/en/operations/external-authenticators/kerberos.md +++ b/docs/en/operations/external-authenticators/kerberos.md @@ -2,6 +2,9 @@ slug: /en/operations/external-authenticators/kerberos --- # Kerberos +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + + Existing and properly configured ClickHouse users can be authenticated via Kerberos authentication protocol. diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md index 5a250a8f60f..0493f5a539f 100644 --- a/docs/en/operations/external-authenticators/ldap.md +++ b/docs/en/operations/external-authenticators/ldap.md @@ -2,6 +2,9 @@ slug: /en/operations/external-authenticators/ldap title: "LDAP" --- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + + LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this: diff --git a/docs/en/operations/external-authenticators/ssl-x509.md b/docs/en/operations/external-authenticators/ssl-x509.md index a6287bef45b..109913c2b18 100644 --- a/docs/en/operations/external-authenticators/ssl-x509.md +++ b/docs/en/operations/external-authenticators/ssl-x509.md @@ -2,6 +2,9 @@ slug: /en/operations/external-authenticators/ssl-x509 title: "SSL X.509 certificate authentication" --- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + + [SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index 8c08080e331..0b47450db61 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -5,6 +5,9 @@ sidebar_label: Monitoring --- # Monitoring +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md'; + + You can monitor: diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 0178d5bcfa9..7c63d4a9174 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -3,9 +3,12 @@ slug: /en/operations/optimizing-performance/sampling-query-profiler sidebar_position: 54 sidebar_label: Query Profiling --- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; # Sampling Query Profiler + + ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time. To use profiler: diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md index 6e185c121de..ec0cf007ff4 100644 --- a/docs/en/operations/performance-test.md +++ b/docs/en/operations/performance-test.md @@ -5,6 +5,10 @@ sidebar_label: Testing Hardware title: "How to Test Your Hardware with ClickHouse" --- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + + + You can run a basic ClickHouse performance test on any server without installation of ClickHouse packages. diff --git a/docs/en/operations/requirements.md b/docs/en/operations/requirements.md deleted file mode 100644 index dc05a7b4896..00000000000 --- a/docs/en/operations/requirements.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -slug: /en/operations/requirements -sidebar_position: 44 -sidebar_label: Requirements ---- - -# Requirements - -## CPU - -For installation from prebuilt deb packages, use a CPU with x86_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources. - -ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz. - -It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload. - -## RAM {#ram} - -We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries. - -The required volume of RAM depends on: - -- The complexity of queries. -- The amount of data that is processed in queries. - -To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct), [JOIN](../sql-reference/statements/select/join.md#select-join) and other operations you use. - -ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details. - -## Swap File {#swap-file} - -Disable the swap file for production environments. - -## Storage Subsystem {#storage-subsystem} - -You need to have 2GB of free disk space to install ClickHouse. - -The volume of storage required for your data should be calculated separately. Assessment should include: - -- Estimation of the data volume. - - You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. - -- The data compression coefficient. - - To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. - -To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas. - -## Network {#network} - -If possible, use networks of 10G or higher class. - -The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes. - -## Software {#software} - -ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. - -ClickHouse can also work in other operating system families. See details in the [install guide](../getting-started/install.md) section of the documentation. diff --git a/docs/en/operations/server-configuration-parameters/index.md b/docs/en/operations/server-configuration-parameters/index.md index 0a6b1953a62..27ade81ec55 100644 --- a/docs/en/operations/server-configuration-parameters/index.md +++ b/docs/en/operations/server-configuration-parameters/index.md @@ -2,6 +2,7 @@ slug: /en/operations/server-configuration-parameters/ sidebar_position: 54 sidebar_label: Server Configuration Parameters +pagination_next: en/operations/server-configuration-parameters/settings --- # Server Configuration Parameters diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index b7fe7d49b7b..dcda7536935 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -666,6 +666,7 @@ Keys: - `http_proxy` - Configure HTTP proxy for sending crash reports. - `debug` - Sets the Sentry client into debug mode. - `tmp_path` - Filesystem path for temporary crash report state. +- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse. **Recommended way to use** @@ -1498,9 +1499,24 @@ If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored. - `move_factor` is ignored. - `keep_free_space_bytes` is ignored. - `max_data_part_size_bytes` is ignored. -- Уou must have exactly one volume in that policy. +- Policy should have exactly one volume with local disks. ::: +## max_temporary_data_on_disk_size {#max_temporary_data_on_disk_size} + +Limit the amount of disk space consumed by temporary files in `tmp_path` for the server. +Queries that exceed this limit will fail with an exception. + +Default value: `0`. + +**See also** + +- [max_temporary_data_on_disk_size_for_user](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_user) +- [max_temporary_data_on_disk_size_for_query](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_query) +- [tmp_path](#tmp-path) +- [tmp_policy](#tmp-policy) +- [max_server_memory_usage](#max_server_memory_usage) + ## uncompressed_cache_size {#server-settings-uncompressed_cache_size} Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 35fadd295cd..8603257ea55 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -2,6 +2,7 @@ sidebar_label: Settings sidebar_position: 51 slug: /en/operations/settings/ +pagination_next: en/operations/settings/settings --- # Settings Overview diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 2295fe8b97e..338ecf9ffd3 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -15,7 +15,7 @@ Possible values: - Any positive integer. -Default value: 10. +Default value: 100. Override example in `config.xml`: @@ -231,7 +231,7 @@ Possible values: - Any positive integer. -Default value: 1800 +Default value: 10800 ## try_fetch_recompressed_part_timeout @@ -261,7 +261,7 @@ Possible values: - Any positive integer. -Default value: 10 +Default value: 100 ## max_suspicious_broken_parts_bytes diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 597d524dd3f..ce374f0f1c8 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -313,4 +313,19 @@ When inserting data, ClickHouse calculates the number of partitions in the inser > “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” +## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user} + +The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. +Zero means unlimited. + +Default value: 0. + + +## max_temporary_data_on_disk_size_for_query {#settings_max_temporary_data_on_disk_size_for_query} + +The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. +Zero means unlimited. + +Default value: 0. + [Original article](https://clickhouse.com/docs/en/operations/settings/query_complexity/) diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 753eef1fb42..c482d72ffca 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -35,7 +35,7 @@ Structure of the `users` section: expression - + diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 31609fe24be..efdce2d4a88 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -668,7 +668,7 @@ log_query_views=1 ## log_formatted_queries {#settings-log-formatted-queries} -Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table. +Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)). Possible values: @@ -1599,7 +1599,7 @@ Right now it requires `optimize_skip_unused_shards` (the reason behind this is t ## optimize_throw_if_noop {#setting-optimize_throw_if_noop} -Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) query didn’t perform a merge. +Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/optimize.md) query didn’t perform a merge. By default, `OPTIMIZE` returns successfully even if it didn’t do anything. This setting lets you differentiate these situations and get the reason in an exception message. @@ -2629,12 +2629,6 @@ Sets the maximum number of inserted blocks after which mergeable blocks are drop Default value: `64`. -## temporary_live_view_timeout {#temporary-live-view-timeout} - -Sets the interval in seconds after which [live view](../../sql-reference/statements/create/view.md#live-view) with timeout is deleted. - -Default value: `5`. - ## periodic_live_view_refresh {#periodic-live-view-refresh} Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md#live-view) is forced to refresh. @@ -3147,12 +3141,14 @@ Result: ## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions} -Enables or disables returning results of type `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth). +Enables or disables returning results of type: +- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth). +- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot). Possible values: -- 0 — Functions return `Date` for all types of arguments. -- 1 — Functions return `Date32` for `Date32` or `DateTime64` arguments and `Date` otherwise. +- 0 — Functions return `Date` or `DateTime` for all types of arguments. +- 1 — Functions return `Date32` or `DateTime64` for `Date32` or `DateTime64` arguments and `Date` or `DateTime` otherwise. Default value: `0`. diff --git a/docs/en/operations/ssl-zookeeper.md b/docs/en/operations/ssl-zookeeper.md index a38e9f81b41..c0ac8d2903d 100644 --- a/docs/en/operations/ssl-zookeeper.md +++ b/docs/en/operations/ssl-zookeeper.md @@ -5,6 +5,9 @@ sidebar_label: Secured Communication with Zookeeper --- # Optional secured communication between ClickHouse and Zookeeper +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md'; + + You should specify `ssl.keyStore.location`, `ssl.keyStore.password` and `ssl.trustStore.location`, `ssl.trustStore.password` for communication with ClickHouse client over SSL. These options are available from Zookeeper version 3.5.2. diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index a2b26c3684c..8b633fbe2f0 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -5,7 +5,7 @@ slug: /en/operations/system-tables/columns Contains information about columns in all the tables. -You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once. +You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md) query, but for multiple tables at once. Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field. diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index 1106562da53..f4c71eb1cd2 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -11,6 +11,7 @@ Columns: - `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system. - `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. - `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. +- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space which is not taken by reservations (`free_space` minus the size of reservations taken by merges, inserts, and other disk write operations currently running). - `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. **Example** diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index facf78c85bf..827a7e33ea3 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -4,6 +4,9 @@ sidebar_position: 58 sidebar_label: Usage Recommendations title: "Usage Recommendations" --- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md'; + + ## CPU Scaling Governor diff --git a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md index 4288b66bb2c..88a56463de1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md @@ -5,7 +5,7 @@ sidebar_position: 103 # anyHeavy -Selects a frequently occurring value using the [heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) algorithm. If there is a value that occurs more than in half the cases in each of the query’s execution threads, this value is returned. Normally, the result is nondeterministic. +Selects a frequently occurring value using the [heavy hitters](https://doi.org/10.1145/762471.762473) algorithm. If there is a value that occurs more than in half the cases in each of the query’s execution threads, this value is returned. Normally, the result is nondeterministic. ``` sql anyHeavy(column) diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index 658cddf1e6e..b025f6f6d54 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -7,7 +7,7 @@ sidebar_position: 108 Returns an array of the approximately most frequent values in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves). -Implements the [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). +Implements the [Filtered Space-Saving](https://doi.org/10.1016/j.ins.2010.08.024) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://doi.org/10.1016/j.ins.2015.09.003). ``` sql topK(N)(column) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 001c7822433..76f66db924f 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -268,15 +268,17 @@ Result: ``` :::note -The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default. +The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default. Behavior for -* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOf*`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. In case argument is out of normal range: +* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. In case argument is out of normal range: * If the argument is smaller than 1970, the result will be calculated from the argument `1970-01-01 (00:00:00)` instead. * If the return type is `DateTime` and the argument is larger than `2106-02-07 08:28:15`, the result will be calculated from the argument `2106-02-07 08:28:15` instead. * If the return type is `Date` and the argument is larger than `2149-06-06`, the result will be calculated from the argument `2149-06-06` instead. * If `toLastDayOfMonth` is called with an argument greater then `2149-05-31`, the result will be calculated from the argument `2149-05-31` instead. -* `enable_extended_results_for_datetime_functions = 1`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`. +* `enable_extended_results_for_datetime_functions = 1`: + * Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`. + * Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime` if their argument is a `Date` or `DateTime`, and they return `DateTime64` if their argument is a `Date32` or `DateTime64`. ::: ## toStartOfYear diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 642c8643c16..1c123aa3db2 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -294,6 +294,53 @@ Result: Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption. +## tryDecrypt + +Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key. + +**Examples** + +Let's create a table where `user_id` is the unique user id, `encrypted` is an encrypted string field, `iv` is an initial vector for decrypt/encrypt. Assume that users know their id and the key to decrypt the encrypted field: + +```sql +CREATE TABLE decrypt_null ( + dt DateTime, + user_id UInt32, + encrypted String, + iv String +) ENGINE = Memory; +``` + +Insert some data: + +```sql +INSERT INTO decrypt_null VALUES + ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'), + ('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'), + ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3'); +``` + +Query: + +```sql +SELECT + dt, + user_id, + tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) AS value +FROM decrypt_null +ORDER BY user_id ASC +``` + +Result: + +``` +┌──────────────────dt─┬─user_id─┬─value──┐ +│ 2022-08-02 00:00:00 │ 1 │ ᴺᵁᴸᴸ │ +│ 2022-09-02 00:00:00 │ 2 │ value2 │ +│ 2022-09-02 00:00:01 │ 3 │ ᴺᵁᴸᴸ │ +└─────────────────────┴─────────┴────────┘ +``` + ## aes_decrypt_mysql Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function. diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index ad66047a92e..cc66f62f714 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -296,7 +296,14 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 ## javaHash -Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. +Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452), +[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405), +[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410), +[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959), +[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060). +This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. + +Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types. **Syntax** @@ -312,6 +319,20 @@ A `Int32` data type hash value. Query: +```sql +SELECT javaHash(toInt32(123)); +``` + +Result: + +```response +┌─javaHash(toInt32(123))─┐ +│ 123 │ +└────────────────────────┘ +``` + +Query: + ```sql SELECT javaHash('Hello, world!'); ``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 45187abf61b..a8ba4843279 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -565,6 +565,10 @@ Result: └────────────────────────────┘ ``` +## tryBase58Decode(s) + +Similar to base58Decode, but returns an empty string in case of error. + ## base64Encode(s) Encodes ‘s’ string into base64 @@ -579,7 +583,7 @@ Alias: `FROM_BASE64`. ## tryBase64Decode(s) -Similar to base64Decode, but in case of error an empty string would be returned. +Similar to base64Decode, but returns an empty string in case of error. ## endsWith(s, suffix) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 12584909688..067a350dca7 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -127,7 +127,7 @@ Adds a comment to the column. If the `IF EXISTS` clause is specified, the query Each column can have one comment. If a comment already exists for the column, a new comment overwrites the previous comment. -Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](../../../sql-reference/statements/misc.md#misc-describe-table) query. +Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](../../../sql-reference/statements/describe-table.md) query. Example: @@ -253,7 +253,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](../../../sql-reference/statements/insert-into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](../../../sql-reference/statements/misc.md#misc_operations-rename) query and delete the old table. You can use the [clickhouse-copier](../../../operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. +If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](../../../sql-reference/statements/insert-into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](../../../sql-reference/statements/rename.md#rename-table) query and delete the old table. You can use the [clickhouse-copier](../../../operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index eeee5e03c8b..4027429cf0d 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -44,7 +44,7 @@ For `*MergeTree` tables mutations execute by **rewriting whole data parts**. The Mutations are totally ordered by their creation order and are applied to each part in that order. Mutations are also partially ordered with `INSERT INTO` queries: data that was inserted into the table before the mutation was submitted will be mutated and data that was inserted after that will not be mutated. Note that mutations do not block inserts in any way. -A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for non-replicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../../../operations/system-tables/mutations.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](../../../sql-reference/statements/misc.md#kill-mutation) query. +A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for non-replicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../../../operations/system-tables/mutations.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](../../../sql-reference/statements/kill.md#kill-mutation) query. Entries for finished mutations are not deleted right away (the number of preserved entries is determined by the `finished_mutations_to_keep` storage engine parameter). Older mutation entries are deleted. diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index a2d142c2a6d..a216de85cfc 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -319,7 +319,7 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. -All the rules above are also true for the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example: +All the rules above are also true for the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example: ``` sql OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL; diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index 1a2ec69e4f9..f9b428b74a1 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/statements/check-table sidebar_position: 41 -sidebar_label: CHECK +sidebar_label: CHECK TABLE title: "CHECK TABLE Statement" --- diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 6c80204688b..68fdd51e957 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -10,7 +10,7 @@ Creates new [roles](../../../operations/access-rights.md#role-management). Role Syntax: ``` sql -CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...] +CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 14c06ee0336..5833c43f55d 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -13,7 +13,7 @@ Creates a new view. Views can be [normal](#normal-view), [materialized](#materia Syntax: ``` sql -CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ... +CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ... ``` Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. @@ -166,23 +166,6 @@ SELECT * FROM [db.]live_view WHERE ... You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement. -### WITH TIMEOUT Clause - -When a live view is created with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view. - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ... -``` - -If the timeout value is not specified then the value specified by the [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout) setting is used. - -**Example:** - -```sql -CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; -CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt; -``` - ### WITH REFRESH Clause When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger. @@ -212,20 +195,6 @@ WATCH lv └─────────────────────┴──────────┘ ``` -You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause. - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ... -``` - -**Example:** - -```sql -CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now(); -``` - -After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries. - ```sql WATCH lv ``` diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index ff3361caadb..4864743abbc 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/statements/describe-table sidebar_position: 42 -sidebar_label: DESCRIBE +sidebar_label: DESCRIBE TABLE title: "DESCRIBE TABLE" --- diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 56bb4cd4b65..546a8b0958d 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -221,7 +221,7 @@ By default, a user account or a role has no privileges. If a user or a role has no privileges, it is displayed as [NONE](#grant-none) privilege. -Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`. +Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/optimize.md) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`. ### SELECT @@ -304,11 +304,11 @@ Examples of how this hierarchy is treated: - The `MODIFY SETTING` privilege allows modifying table engine settings. It does not affect settings or server configuration parameters. - The `ATTACH` operation needs the [CREATE](#grant-create) privilege. - The `DETACH` operation needs the [DROP](#grant-drop) privilege. -- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. +- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/kill.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. ### CREATE -Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/misc.md#attach) DDL-queries according to the following hierarchy of privileges: +Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/attach.md) DDL-queries according to the following hierarchy of privileges: - `CREATE`. Level: `GROUP` - `CREATE DATABASE`. Level: `DATABASE` @@ -323,7 +323,7 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A ### DROP -Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach) queries according to the following hierarchy of privileges: +Allows executing [DROP](../../sql-reference/statements/drop.md) and [DETACH](../../sql-reference/statements/detach.md) queries according to the following hierarchy of privileges: - `DROP`. Level: `GROUP` - `DROP DATABASE`. Level: `DATABASE` @@ -333,13 +333,13 @@ Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH ### TRUNCATE -Allows executing [TRUNCATE](../../sql-reference/statements/misc.md#truncate-statement) queries. +Allows executing [TRUNCATE](../../sql-reference/statements/truncate.md) queries. Privilege level: `TABLE`. ### OPTIMIZE -Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/misc.md#misc_operations-optimize) queries. +Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/optimize.md) queries. Privilege level: `TABLE`. @@ -359,7 +359,7 @@ A user has the `SHOW` privilege if it has any other privilege concerning the spe ### KILL QUERY -Allows executing [KILL](../../sql-reference/statements/misc.md#kill-query-statement) queries according to the following hierarchy of privileges: +Allows executing [KILL](../../sql-reference/statements/kill.md#kill-query) queries according to the following hierarchy of privileges: Privilege level: `GLOBAL`. diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 4a23c6d66bc..4da5f4cc420 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -430,9 +430,9 @@ FROM ### Cumulative sum. ```sql -CREATE TABLE events +CREATE TABLE warehouse ( - `metric` String, + `item` String, `ts` DateTime, `value` Float ) diff --git a/docs/ru/development/integrating_rust_libraries.md b/docs/ru/development/integrating_rust_libraries.md index aad83f61c14..df0ec8eee91 100644 --- a/docs/ru/development/integrating_rust_libraries.md +++ b/docs/ru/development/integrating_rust_libraries.md @@ -6,11 +6,50 @@ slug: /ru/development/integrating_rust_libraries Интеграция библиотек будет описываться на основе работы проведенной для библиотеки BLAKE3. -Первым шагом интеграции является создание форка библиотеки для внесения дальнейших изменений по совместимости методов на Rust с C/C++. +Первым шагом интеграции является добавление библиотеки в папку /rust. Для этого необходимо создать в папке пустой Rust-проект, подключив в Cargo.toml нужную библиотеку. Также необходимо компилировать новую библиотеку как статическую, для этого необходимо добавить `crate-type = ["staticlib"]` в Cargo.toml. -В форке необходимо будет изменить конфигурацию Cargo.toml, сменив таргет на статическую библиотеку. Кроме того, необходимо добавить crate cbindgen для его дальнейшего использования при сборке. +Далее необходимо подключить библиотеку к CMake. Для этого в ClickHouse была подключена библиотека Corrosion. Первым шагом является подключение папки с новой библиотекой в корневом CMakeLists.txt папки /rust. После этого следует добавить в директорию с библиотекой файл CMakeLists.txt, в котором будет вызвана функция из Corrosion. Как пример, приведем файл из BLAKE3: -Необходимо создать либо отредактировать сборочный скрипт build.rs, добавив в него запуск cbindgen - автогенератора заголовочных файлов .h. Пример такого запуска можно увидеть в build.rs для BLAKE3: +``` +corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD) + +target_include_directories(_ch_rust_blake3 INTERFACE include) +add_library(ch_rust::blake3 ALIAS _ch_rust_blake3) +``` + +Таким образом, мы создадим при помощи Corrosion корректный CMake-таргет, а затем переобозначим его более понятным именем. Стоит отметить, что имя `_ch_rust_blake3` происходит из Cargo.toml, где оно выступает в качестве имени проекта (`name = "_ch_rust_blake3"`). + + +Поскольку типы данных Rust не совместимы с типами данных C/C++, то в проекте мы опишем интерфейс для методов-прослоек, которые будут преобразовывать данные, получаемые из C/C++, вызывать методы библиотеки, а затем делать преобразование возвращаемых обратно данных. В частности, рассмотрим такой метод, написанный для BLAKE3: + +``` +#[no_mangle] +pub unsafe extern "C" fn blake3_apply_shim( + begin: *const c_char, + _size: u32, + out_char_data: *mut u8, +) -> *mut c_char { + if begin.is_null() { + let err_str = CString::new("input was a null pointer").unwrap(); + return err_str.into_raw(); + } + let mut hasher = blake3::Hasher::new(); + let input_bytes = CStr::from_ptr(begin); + let input_res = input_bytes.to_bytes(); + hasher.update(input_res); + let mut reader = hasher.finalize_xof(); + reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN)); + std::ptr::null_mut() +} +``` + +На вход метод принимает строку в C-совместимом формате, её размер и указатель, в который будет положен результат. Кроме того, для того, чтобы иметь возможность вывести ошибку, метод возвращает строку с ней как результат работы (и нулевой указатель в случае отсутствия ошибок). C-совместимые не используются в методах BLAKE3, поэтому они конвертируются посредством соотвествующих структур и методов в привычные форматы для языка Rust. Далее запускаются оригинальные методы библиотеки. Их результат следует преобразовать обратно в C-совместимые структуры, однако в данном случае удается избежать обратной конвертации, поскольку библиотека поддерживает запись напрямую по указателю *mut u8. + +Кроме того, стоит отметить обязательность аттрибута #[no_mangle] и указания extern "C" для всех таких методов. Без них не удастся провести корректную совместимую с C/C++ компиляцию. Кроме того, они необходимы для следующего этапа подключения библиотеки. + +После написания кода методов-прослоек нам необходимо подготовить заголовочный файл для библиотеки. Это можно сделать вручную, либо воспользоваться библиотекой cbindgen для автогенерации. В случае с использованием cbindgen, нам понадобится написать сборочный скрипт build.rs и подключить cbindgen в качестве build-dependency. + +Пример сборочного скрипта, которым можно автосгенерировать заголовочный файл: ``` let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); @@ -28,39 +67,7 @@ slug: /ru/development/integrating_rust_libraries } ``` -Скрипт назначает директорию для создания залоговочного файла и в конце запускает метод генерации cbindgen. +Если возникают пробемы с генерацией заголовков, может потребоваться поработать с конфигурацией cbindgen через файл cbindgen.toml, взяв оригинальный темплейт разработчика cbindgen: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml). -Далее необходимо подключить библиотеку к CMake. В BLAKE3 для этого были созданы два файла - CMakeLists.txt и файл, содержащий функцию для запуска cargo build как таргета, - build_rust_lib.cmake. Последний стоит скопировать в подключаемую библиотеку и отредактировать в соотвествии с требуемыми параметрами для сборки - добавить флаги или какие-либо настройки для разных архитектур. - -Завершив настройку CMake, можно приступить к созданию методов-прослоек, которые обеспечат совместимость библиотеки и остального кода ClickHouse. В частности, рассмотрим такой метод, написанный для BLAKE3: - -``` -#[no_mangle] -pub unsafe extern "C" fn blake3_apply_shim( - begin: *const c_char, - _size: u32, - out_char_data: *mut u8, -) -> *mut c_char { - if begin.is_null() { - let err_str = CString::new("input was a null pointer").unwrap(); - return err_str.into_raw(); - } - let mut hasher = Hasher::new(); - let input_bytes = CStr::from_ptr(begin); - let input_res = input_bytes.to_bytes(); - hasher.update(input_res); - let mut reader = hasher.finalize_xof(); - reader.fill(std::slice::from_raw_parts_mut(out_char_data, OUT_LEN)); - std::ptr::null_mut() -} -``` - -На вход метод принимает строку в C-совместимом формате, её размер и указатель, в который будет положен результат. Кроме того, для того, чтобы иметь возможность вывести ошибку, метод возвращает строку с ней как результат работы (и нулевой указатель в случае отсутствия ошибок). C-совместимые не используются в методах BLAKE3, поэтому они конвертируются посредством соотвествующих структур и методов в привычные форматы для языка Rust. Далее запускаются оригинальные методы библиотеки. Их результат следует преобразовать обратно в C-совместимые структуры, однако в данном случае удается избежать обратной конвертации, поскольку библиотека поддерживает запись напрямую по указателю *mut u8. - -Кроме того, стоит отметить обязательность аттрибута #[no_mangle] и указания extern "C" для всех таких методов. Без них не удастся провести корректную совместимую с C/C++ компиляцию и автогенерацию заголовков. - -После этих действий можно протестировать компиляцию и работу методов на небольшом проекте для выявляения несовместимостей и ошибок. Если возникают пробемы с генерацией заголовков, может потребоваться поработать с конфигурацией cbindgen через файл cbindgen.toml, найти который можно либо в BLAKE3, либо взяв оригинальный темплейт разработчика cbindgen: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml). - -В заключение, стоит отметить пару пробелм, возникших при интеграции BLAKE3: -1) Некоторые архитектуры могут потребовать настройки компиляции в build.rs и в build_rust_lib.cmake в связи со своими особенностями. -2) MemorySanitizer плохо понимает инициализацию памяти в Rust, поэтому для избежания ложноположительных срабатываний для BLAKE3 был создан альтернативный метод, который более явно, но при этом медленнее, инициализировал память. Он компилируется только для сборки с MemorySanitizer и в релиз не попадает. Вероятно, возможны и более красивые способы решения этой проблемы, но при интеграции BLAKE3 они не были обнаружены. +В заключение, стоит отметить проблему, с которой пришлось столкнуться при интеграции BLAKE3: +C++ MemorySanitizer плохо понимает инициализацию памяти в Rust, поэтому для избежания ложноположительных срабатываний для BLAKE3 был создан альтернативный метод, который более явно, но при этом медленнее, инициализировал память. Он компилируется только для сборки с MemorySanitizer и в релиз не попадает. Вероятно, возможны и более красивые способы решения этой проблемы, но при интеграции BLAKE3 они не были обнаружены. diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index b1abc787c5d..be6d138669a 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -488,7 +488,7 @@ FORMAT TSV max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." ``` -Optionally you can [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: +Optionally you can [OPTIMIZE](../sql-reference/statements/optimize.md) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: ``` bash clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" diff --git a/docs/ru/operations/access-rights.md b/docs/ru/operations/access-rights.md index 987f7fecc55..7f4e06205be 100644 --- a/docs/ru/operations/access-rights.md +++ b/docs/ru/operations/access-rights.md @@ -64,7 +64,7 @@ ClickHouse поддерживает управление доступом на - [CREATE USER](../sql-reference/statements/create/user.md#create-user-statement) - [ALTER USER](../sql-reference/statements/alter/user.md) -- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement) +- [DROP USER](../sql-reference/statements/drop.md#drop-user) - [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement) ### Применение настроек {#access-control-settings-applying} @@ -91,9 +91,9 @@ ClickHouse поддерживает управление доступом на - [CREATE ROLE](../sql-reference/statements/create/index.md#create-role-statement) - [ALTER ROLE](../sql-reference/statements/alter/role.md) -- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement) -- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement) -- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement) +- [DROP ROLE](../sql-reference/statements/drop.md#drop-role) +- [SET ROLE](../sql-reference/statements/set-role.md) +- [SET DEFAULT ROLE](../sql-reference/statements/set-role.md#set-default-role) - [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement) Привилегии можно присвоить роли с помощью запроса [GRANT](../sql-reference/statements/grant.md). Для отзыва привилегий у роли ClickHouse предоставляет запрос [REVOKE](../sql-reference/statements/revoke.md). @@ -106,7 +106,7 @@ ClickHouse поддерживает управление доступом на - [CREATE ROW POLICY](../sql-reference/statements/create/index.md#create-row-policy-statement) - [ALTER ROW POLICY](../sql-reference/statements/alter/row-policy.md) -- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement) +- [DROP ROW POLICY](../sql-reference/statements/drop.md#drop-row-policy) - [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement) @@ -118,7 +118,7 @@ ClickHouse поддерживает управление доступом на - [CREATE SETTINGS PROFILE](../sql-reference/statements/create/index.md#create-settings-profile-statement) - [ALTER SETTINGS PROFILE](../sql-reference/statements/alter/settings-profile.md) -- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement) +- [DROP SETTINGS PROFILE](../sql-reference/statements/drop.md#drop-settings-profile) - [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement) @@ -132,7 +132,7 @@ ClickHouse поддерживает управление доступом на - [CREATE QUOTA](../sql-reference/statements/create/index.md#create-quota-statement) - [ALTER QUOTA](../sql-reference/statements/alter/quota.md) -- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement) +- [DROP QUOTA](../sql-reference/statements/drop.md#drop-quota) - [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement) diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 832b19f9fe6..bffa3c39a60 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -624,6 +624,7 @@ ClickHouse поддерживает динамическое изменение - `http_proxy` - Настройка HTTP proxy для отсылки отчетов о сбоях. - `debug` - Настроить клиентскую библиотеку Sentry в debug режим. - `tmp_path` - Путь в файловой системе для временного хранения состояния отчетов о сбоях перед отправкой на сервер Sentry. +- `environment` - Произвольное название среды, в которой запущен сервер ClickHouse, которое будет упомянуто в каждом отчете от сбое. По умолчанию имеет значение `test` или `prod` в зависимости от версии ClickHouse. **Рекомендованные настройки** @@ -1342,12 +1343,13 @@ TCP порт для защищённого обмена данными с кли Если политика не задана, используется [tmp_path](#tmp-path). В противном случае `tmp_path` игнорируется. - :::note "Примечание" - - `move_factor` игнорируется. - - `keep_free_space_bytes` игнорируется. - - `max_data_part_size_bytes` игнорируется. - - В данной политике у вас должен быть ровно один том. - ::: +:::note "Примечание" +- `move_factor` игнорируется. +- `keep_free_space_bytes` игнорируется. +- `max_data_part_size_bytes` игнорируется. +- В данной политике должен быть ровно один том, содержащий только локальный диски. +::: + ## uncompressed_cache_size {#server-settings-uncompressed_cache_size} Размер кеша (в байтах) для несжатых данных, используемых движками таблиц семейства [MergeTree](../../operations/server-configuration-parameters/settings.md). diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 0d4f0c63210..a070dbd5e10 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1986,7 +1986,7 @@ SELECT * FROM test_table ## optimize_throw_if_noop {#setting-optimize_throw_if_noop} -Включает или отключает генерирование исключения в случаях, когда запрос [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) не выполняет мёрж. +Включает или отключает генерирование исключения в случаях, когда запрос [OPTIMIZE](../../sql-reference/statements/optimize.md) не выполняет мёрж. По умолчанию, `OPTIMIZE` завершается успешно и в тех случаях, когда он ничего не сделал. Настройка позволяет отделить подобные случаи и включает генерирование исключения с поясняющим сообщением. @@ -3258,12 +3258,6 @@ SELECT * FROM test2; Значение по умолчанию: `64`. -## temporary_live_view_timeout {#temporary-live-view-timeout} - -Задает время в секундах, после которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) удаляется. - -Значение по умолчанию: `5`. - ## periodic_live_view_refresh {#periodic-live-view-refresh} Задает время в секундах, по истечении которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) с установленным автообновлением обновляется. @@ -3799,17 +3793,6 @@ Exception: Total regexp lengths too large. Значение по умолчанию: `1`. -## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions} - -Включает или отключает возвращение результатов типа `Date32` с расширенным диапазоном (по сравнению с типом `Date`) для функций [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) и [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth). - -Возможные значения: - -- 0 — Функции возвращают результаты типа `Date` для всех типов аргументов. -- 1 — Функции возвращают результаты типа `Date32` для аргументов типа `Date32` или `DateTime64` и возвращают `Date` в других случаях. - -Значение по умолчанию: `0`. - **Пример** Запрос: @@ -3832,6 +3815,19 @@ SELECT * FROM positional_arguments ORDER BY 2,3; └─────┴─────┴───────┘ ``` +## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions} + +Включает или отключает возвращение результатов типа: +- `Date32` с расширенным диапазоном (по сравнению с типом `Date`) для функций [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) и [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth). +- `DateTime64` с расширенным диапазоном (по сравнению с типом `DateTime`) для функций [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) и [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot). + +Возможные значения: + +- 0 — Функции возвращают результаты типа `Date` или `DateTime` для всех типов аргументов. +- 1 — Функции возвращают результаты типа `Date32` или `DateTime64` для аргументов типа `Date32` или `DateTime64` и возвращают `Date` или `DateTime` в других случаях. + +Значение по умолчанию: `0`. + ## optimize_move_to_prewhere {#optimize_move_to_prewhere} Включает или отключает автоматическую оптимизацию [PREWHERE](../../sql-reference/statements/select/prewhere.md) в запросах [SELECT](../../sql-reference/statements/select/index.md). diff --git a/docs/ru/operations/system-tables/columns.md b/docs/ru/operations/system-tables/columns.md index 818da3d6ac6..cade6f0a557 100644 --- a/docs/ru/operations/system-tables/columns.md +++ b/docs/ru/operations/system-tables/columns.md @@ -5,7 +5,7 @@ slug: /ru/operations/system-tables/columns Содержит информацию о столбцах всех таблиц. -С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу. +С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md), но для многих таблиц сразу. Колонки [временных таблиц](../../sql-reference/statements/create/table.md#temporary-tables) содержатся в `system.columns` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких колонок пустое. diff --git a/docs/ru/operations/system-tables/disks.md b/docs/ru/operations/system-tables/disks.md index fc4c370cc1a..1d540b277d1 100644 --- a/docs/ru/operations/system-tables/disks.md +++ b/docs/ru/operations/system-tables/disks.md @@ -11,5 +11,6 @@ Cодержит информацию о дисках, заданных в [ко - `path` ([String](../../sql-reference/data-types/string.md)) — путь к точке монтирования в файловой системе. - `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — свободное место на диске в байтах. - `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — объём диска в байтах. +- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — не зарезервированное cвободное место в байтах (`free_space` минус размер места, зарезервированного на выполняемые в данный момент фоновые слияния, вставки и другие операции записи на диск). - `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — место, которое должно остаться свободным на диске в байтах. Задаётся значением параметра `keep_free_space_bytes` конфигурации дисков. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/ru/sql-reference/aggregate-functions/reference/anyheavy.md index 2845e5f56ed..3b1dcd2a088 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/anyheavy.md @@ -5,7 +5,7 @@ sidebar_position: 103 # anyHeavy {#anyheavyx} -Выбирает часто встречающееся значение с помощью алгоритма «[heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf)». Если существует значение, которое встречается чаще, чем в половине случаев, в каждом потоке выполнения запроса, то возвращается данное значение. В общем случае, результат недетерминирован. +Выбирает часто встречающееся значение с помощью алгоритма «[heavy hitters](https://doi.org/10.1145/762471.762473)». Если существует значение, которое встречается чаще, чем в половине случаев, в каждом потоке выполнения запроса, то возвращается данное значение. В общем случае, результат недетерминирован. ``` sql anyHeavy(column) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/topk.md b/docs/ru/sql-reference/aggregate-functions/reference/topk.md index 270db6a3ebb..1cf97ce543b 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/topk.md @@ -7,7 +7,7 @@ sidebar_position: 108 Возвращает массив наиболее часто встречающихся значений в указанном столбце. Результирующий массив упорядочен по убыванию частоты значения (не по самим значениям). -Реализует [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) алгоритм для анализа TopK, на основе reduce-and-combine алгоритма из методики [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). +Реализует [Filtered Space-Saving](https://doi.org/10.1016/j.ins.2010.08.024) алгоритм для анализа TopK, на основе reduce-and-combine алгоритма из методики [Parallel Space Saving](https://doi.org/10.1016/j.ins.2015.09.003). ``` sql topK(N)(column) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 27689426cbe..a7d2ce49fae 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -268,16 +268,19 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp; ``` :::note -Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `0`. +Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `0`. Поведение для -* `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат. +* `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`. Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат. В случае если значение аргумента вне нормального диапазона: * `1970-01-01 (00:00:00)` будет возвращён для моментов времени до 1970 года, * `2106-02-07 08:28:15` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `DateTime`, * `2149-06-06` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `Date`, * `2149-05-31` будет результатом функции `toLastDayOfMonth` при обработке аргумента больше `2149-05-31`. -* `enable_extended_results_for_datetime_functions = 1`: Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`. +* `enable_extended_results_for_datetime_functions = 1`: + * Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`. + * Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `DateTime64` если их аргумент `Date32` или `DateTime64`. + ::: ## toStartOfYear {#tostartofyear} @@ -1050,6 +1053,7 @@ formatDateTime(Time, Format[, Timezone]) | %w | номер дня недели, начиная с воскресенья (0-6) | 2 | | %y | год, последние 2 цифры (00-99) | 18 | | %Y | год, 4 цифры | 2018 | +| %z | Смещение времени от UTC +HHMM или -HHMM | -0500 | | %% | символ % | % | **Пример** diff --git a/docs/ru/sql-reference/index.md b/docs/ru/sql-reference/index.md index f55c5e859f1..95e2d6a3918 100644 --- a/docs/ru/sql-reference/index.md +++ b/docs/ru/sql-reference/index.md @@ -10,5 +10,4 @@ sidebar_position: 28 - [INSERT INTO](statements/insert-into.md) - [CREATE](statements/create/index.md) - [ALTER](statements/alter/index.md#query_language_queries_alter) -- [Прочие виды запросов](statements/misc.md) diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index c337b64f1d6..11ec72596c4 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -128,7 +128,7 @@ COMMENT COLUMN [IF EXISTS] name 'Text comment' Каждый столбец может содержать только один комментарий. При выполнении запроса существующий комментарий заменяется на новый. -Посмотреть комментарии можно в столбце `comment_expression` из запроса [DESCRIBE TABLE](../misc.md#misc-describe-table). +Посмотреть комментарии можно в столбце `comment_expression` из запроса [DESCRIBE TABLE](../describe-table.md). Пример: @@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp; Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`). -Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../misc.md#misc_operations-rename), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). +Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`. diff --git a/docs/ru/sql-reference/statements/check-table.md b/docs/ru/sql-reference/statements/check-table.md index 77d246b631e..633c3899006 100644 --- a/docs/ru/sql-reference/statements/check-table.md +++ b/docs/ru/sql-reference/statements/check-table.md @@ -1,7 +1,7 @@ --- slug: /ru/sql-reference/statements/check-table sidebar_position: 41 -sidebar_label: CHECK +sidebar_label: CHECK TABLE --- # CHECK TABLE Statement {#check-table} diff --git a/docs/ru/sql-reference/statements/create/role.md b/docs/ru/sql-reference/statements/create/role.md index 4a93de8a74c..1aa222d4de1 100644 --- a/docs/ru/sql-reference/statements/create/role.md +++ b/docs/ru/sql-reference/statements/create/role.md @@ -11,19 +11,19 @@ sidebar_label: "Роль" Синтаксис: ```sql -CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...] +CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...] ``` ## Управление ролями {#managing-roles} -Одному пользователю можно назначить несколько ролей. Пользователи могут применять назначенные роли в произвольных комбинациях с помощью выражения [SET ROLE](../misc.md#set-role-statement). Конечный объем привилегий — это комбинация всех привилегий всех примененных ролей. Если у пользователя имеются привилегии, присвоенные его аккаунту напрямую, они также прибавляются к привилегиям, присвоенным через роли. +Одному пользователю можно назначить несколько ролей. Пользователи могут применять назначенные роли в произвольных комбинациях с помощью выражения [SET ROLE](../set-role.md). Конечный объем привилегий — это комбинация всех привилегий всех примененных ролей. Если у пользователя имеются привилегии, присвоенные его аккаунту напрямую, они также прибавляются к привилегиям, присвоенным через роли. -Роли по умолчанию применяются при входе пользователя в систему. Установить роли по умолчанию можно с помощью выражений [SET DEFAULT ROLE](../misc.md#set-default-role-statement) или [ALTER USER](../alter/index.md#alter-user-statement). +Роли по умолчанию применяются при входе пользователя в систему. Установить роли по умолчанию можно с помощью выражений [SET DEFAULT ROLE](../set-role.md#set-default-role) или [ALTER USER](../alter/index.md#alter-user-statement). Для отзыва роли используется выражение [REVOKE](../../../sql-reference/statements/revoke.md). -Для удаления роли используется выражение [DROP ROLE](../misc.md#drop-role-statement). Удаленная роль автоматически отзывается у всех пользователей, которым была назначена. +Для удаления роли используется выражение [DROP ROLE](../drop.md#drop-role). Удаленная роль автоматически отзывается у всех пользователей, которым была назначена. ## Примеры {#create-role-examples} @@ -47,4 +47,4 @@ SET ROLE accountant; SELECT * FROM db.*; ``` - \ No newline at end of file + diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index a317bfdb596..6cbd4c6a30c 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -11,7 +11,7 @@ sidebar_label: "Представление" ## Обычные представления {#normal} ``` sql -CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ... +CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ... ``` Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление — это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md). @@ -156,23 +156,6 @@ SELECT * FROM [db.]live_view WHERE ... Чтобы принудительно обновить LIVE-представление, используйте запрос `ALTER LIVE VIEW [db.]table_name REFRESH`. -### Секция WITH TIMEOUT {#live-view-with-timeout} - -LIVE-представление, созданное с параметром `WITH TIMEOUT`, будет автоматически удалено через определенное количество секунд с момента предыдущего запроса [WATCH](../../../sql-reference/statements/watch.md), примененного к данному LIVE-представлению. - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ... -``` - -Если временной промежуток не указан, используется значение настройки [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout). - -**Пример:** - -```sql -CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; -CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt; -``` - ### Секция WITH REFRESH {#live-view-with-refresh} LIVE-представление, созданное с параметром `WITH REFRESH`, будет автоматически обновляться через указанные промежутки времени, начиная с момента последнего обновления. @@ -202,20 +185,6 @@ WATCH lv; └─────────────────────┴──────────┘ ``` -Параметры `WITH TIMEOUT` и `WITH REFRESH` можно сочетать с помощью `AND`. - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ... -``` - -**Пример:** - -```sql -CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now(); -``` - -По истечении 15 секунд представление будет автоматически удалено, если нет активного запроса `WATCH`. - ```sql WATCH lv; ``` diff --git a/docs/ru/sql-reference/statements/describe-table.md b/docs/ru/sql-reference/statements/describe-table.md index 73b4278352a..14f97af1dd5 100644 --- a/docs/ru/sql-reference/statements/describe-table.md +++ b/docs/ru/sql-reference/statements/describe-table.md @@ -1,7 +1,7 @@ --- slug: /ru/sql-reference/statements/describe-table sidebar_position: 42 -sidebar_label: DESCRIBE +sidebar_label: DESCRIBE TABLE --- # DESCRIBE TABLE {#misc-describe-table} diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index 79e3006d4ad..7c281634c98 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -221,7 +221,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION Отсутствие привилегий у пользователя или роли отображается как привилегия [NONE](#grant-none). -Выполнение некоторых запросов требует определенного набора привилегий. Например, чтобы выполнить запрос [RENAME](misc.md#misc_operations-rename), нужны следующие привилегии: `SELECT`, `CREATE TABLE`, `INSERT` и `DROP TABLE`. +Выполнение некоторых запросов требует определенного набора привилегий. Например, чтобы выполнить запрос [RENAME](rename.md#rename-table), нужны следующие привилегии: `SELECT`, `CREATE TABLE`, `INSERT` и `DROP TABLE`. ### SELECT {#grant-select} @@ -309,7 +309,7 @@ GRANT INSERT(x,y) ON db.table TO john ### CREATE {#grant-create} -Разрешает выполнять DDL-запросы [CREATE](../../sql-reference/statements/create/index.md) и [ATTACH](misc.md#attach) в соответствии со следующей иерархией привилегий: +Разрешает выполнять DDL-запросы [CREATE](../../sql-reference/statements/create/index.md) и [ATTACH](attach.md) в соответствии со следующей иерархией привилегий: - `CREATE`. Уровень: `GROUP` - `CREATE DATABASE`. Уровень: `DATABASE` @@ -324,7 +324,7 @@ GRANT INSERT(x,y) ON db.table TO john ### DROP {#grant-drop} -Разрешает выполнять запросы [DROP](misc.md#drop) и [DETACH](misc.md#detach-statement) в соответствии со следующей иерархией привилегий: +Разрешает выполнять запросы [DROP](drop.md) и [DETACH](detach.md) в соответствии со следующей иерархией привилегий: - `DROP`. Уровень: `GROUP` - `DROP DATABASE`. Уровень: `DATABASE` @@ -340,7 +340,7 @@ GRANT INSERT(x,y) ON db.table TO john ### OPTIMIZE {#grant-optimize} -Разрешает выполнять запросы [OPTIMIZE TABLE](misc.md#misc_operations-optimize). +Разрешает выполнять запросы [OPTIMIZE TABLE](optimize.md). Уровень: `TABLE`. diff --git a/docs/ru/sql-reference/statements/select/from.md b/docs/ru/sql-reference/statements/select/from.md index a7294d772d0..a6f589ba3eb 100644 --- a/docs/ru/sql-reference/statements/select/from.md +++ b/docs/ru/sql-reference/statements/select/from.md @@ -32,7 +32,7 @@ sidebar_label: FROM Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что: -- Данные мёржатся во время выполнения запроса. +- Данные мёржатся во время выполнения запроса в памяти, и это не приводит к физическому мёржу кусков на дисках. - Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе. **В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##} diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index 1583af60843..3a14a3ce55d 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -1,10 +1,450 @@ --- slug: /zh/getting-started/example-datasets/uk-price-paid -sidebar_label: UK Property Price Paid +sidebar_label: 英国房地产支付价格 sidebar_position: 1 -title: "UK Property Price Paid" +title: "英国房地产支付价格" --- -import Content from '@site/docs/en/getting-started/example-datasets/uk-price-paid.md'; +该数据集包含自 1995 年以来有关英格兰和威尔士房地产价格的数据。未压缩的大小约为 4 GiB,在 ClickHouse 中大约需要 278 MiB。 - +来源:https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads +字段说明:https://www.gov.uk/guidance/about-the-price-data + +包含 HM Land Registry data © Crown copyright and database right 2021.。此数据集需在 Open Government License v3.0 的许可下使用。 + +## 创建表 {#create-table} + +```sql +CREATE TABLE uk_price_paid +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0), + is_new UInt8, + duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2); +``` + +## 预处理和插入数据 {#preprocess-import-data} + +我们将使用 `url` 函数将数据流式传输到 ClickHouse。我们需要首先预处理一些传入的数据,其中包括: + +- 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`,因为这更适合存储和查询 +- 将`time` 字段转换为日期为它只包含 00:00 时间 +- 忽略 [UUid](../../sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析 +- 使用 [transform](../../sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段 +- 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1 +- 删除最后两列,因为它们都具有相同的值(即 0) + +`url` 函数将来自网络服务器的数据流式传输到 ClickHouse 表中。以下命令将 500 万行插入到 `uk_price_paid` 表中: + +```sql +INSERT INTO uk_price_paid +WITH + splitByChar(' ', postcode) AS p +SELECT + toUInt32(price_string) AS price, + parseDateTimeBestEffortUS(time) AS date, + p[1] AS postcode1, + p[2] AS postcode2, + transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type, + b = 'Y' AS is_new, + transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration, + addr1, + addr2, + street, + locality, + town, + district, + county +FROM url( + 'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv', + 'CSV', + 'uuid_string String, + price_string String, + time String, + postcode String, + a String, + b String, + c String, + addr1 String, + addr2 String, + street String, + locality String, + town String, + district String, + county String, + d String, + e String' +) SETTINGS max_http_get_redirects=10; +``` + +需要等待一两分钟以便数据插入,具体时间取决于网络速度。 + +## 验证数据 {#validate-data} + +让我们通过查看插入了多少行来验证它是否有效: + +```sql +SELECT count() +FROM uk_price_paid +``` + +在执行此查询时,数据集有 27,450,499 行。让我们看看 ClickHouse 中表的大小是多少: + +```sql +SELECT formatReadableSize(total_bytes) +FROM system.tables +WHERE name = 'uk_price_paid' +``` + +请注意,表的大小仅为 221.43 MiB! + +## 运行一些查询 {#run-queries} + +让我们运行一些查询来分析数据: + +### 查询 1. 每年平均价格 {#average-price} + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 1000000, 80 +) +FROM uk_price_paid +GROUP BY year +ORDER BY year +``` + +结果如下所示: + +```response +┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐ +│ 1995 │ 67934 │ █████▍ │ +│ 1996 │ 71508 │ █████▋ │ +│ 1997 │ 78536 │ ██████▎ │ +│ 1998 │ 85441 │ ██████▋ │ +│ 1999 │ 96038 │ ███████▋ │ +│ 2000 │ 107487 │ ████████▌ │ +│ 2001 │ 118888 │ █████████▌ │ +│ 2002 │ 137948 │ ███████████ │ +│ 2003 │ 155893 │ ████████████▍ │ +│ 2004 │ 178888 │ ██████████████▎ │ +│ 2005 │ 189359 │ ███████████████▏ │ +│ 2006 │ 203532 │ ████████████████▎ │ +│ 2007 │ 219375 │ █████████████████▌ │ +│ 2008 │ 217056 │ █████████████████▎ │ +│ 2009 │ 213419 │ █████████████████ │ +│ 2010 │ 236110 │ ██████████████████▊ │ +│ 2011 │ 232805 │ ██████████████████▌ │ +│ 2012 │ 238381 │ ███████████████████ │ +│ 2013 │ 256927 │ ████████████████████▌ │ +│ 2014 │ 280008 │ ██████████████████████▍ │ +│ 2015 │ 297263 │ ███████████████████████▋ │ +│ 2016 │ 313518 │ █████████████████████████ │ +│ 2017 │ 346371 │ ███████████████████████████▋ │ +│ 2018 │ 350556 │ ████████████████████████████ │ +│ 2019 │ 352184 │ ████████████████████████████▏ │ +│ 2020 │ 375808 │ ██████████████████████████████ │ +│ 2021 │ 381105 │ ██████████████████████████████▍ │ +│ 2022 │ 362572 │ █████████████████████████████ │ +└──────┴────────┴────────────────────────────────────────┘ +``` + +### 查询 2. 伦敦每年的平均价格 {#average-price-london} + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 2000000, 100 +) +FROM uk_price_paid +WHERE town = 'LONDON' +GROUP BY year +ORDER BY year +``` + +结果如下所示: + +```response +┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐ +│ 1995 │ 109110 │ █████▍ │ +│ 1996 │ 118659 │ █████▊ │ +│ 1997 │ 136526 │ ██████▋ │ +│ 1998 │ 153002 │ ███████▋ │ +│ 1999 │ 180633 │ █████████ │ +│ 2000 │ 215849 │ ██████████▋ │ +│ 2001 │ 232987 │ ███████████▋ │ +│ 2002 │ 263668 │ █████████████▏ │ +│ 2003 │ 278424 │ █████████████▊ │ +│ 2004 │ 304664 │ ███████████████▏ │ +│ 2005 │ 322887 │ ████████████████▏ │ +│ 2006 │ 356195 │ █████████████████▋ │ +│ 2007 │ 404062 │ ████████████████████▏ │ +│ 2008 │ 420741 │ █████████████████████ │ +│ 2009 │ 427754 │ █████████████████████▍ │ +│ 2010 │ 480322 │ ████████████████████████ │ +│ 2011 │ 496278 │ ████████████████████████▋ │ +│ 2012 │ 519482 │ █████████████████████████▊ │ +│ 2013 │ 616195 │ ██████████████████████████████▋ │ +│ 2014 │ 724121 │ ████████████████████████████████████▏ │ +│ 2015 │ 792101 │ ███████████████████████████████████████▌ │ +│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │ +│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │ +│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │ +│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │ +│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │ +│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │ +│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │ +└──────┴─────────┴───────────────────────────────────────────────────────┘ +``` + +2020 年房价出事了!但这并不令人意外…… + +### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods} + +```sql +SELECT + town, + district, + count() AS c, + round(avg(price)) AS price, + bar(price, 0, 5000000, 100) +FROM uk_price_paid +WHERE date >= '2020-01-01' +GROUP BY + town, + district +HAVING c >= 100 +ORDER BY price DESC +LIMIT 100 +``` + +结果如下所示: + +```response +┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐ +│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │ +│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │ +│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │ +│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │ +│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │ +│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │ +│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │ +│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │ +│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │ +│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │ +│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │ +│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │ +│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │ +│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │ +│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │ +│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │ +│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │ +│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │ +│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │ +│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │ +│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │ +│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │ +│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │ +│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │ +│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │ +│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │ +│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │ +│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │ +│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │ +│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │ +│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │ +│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │ +│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │ +│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │ +│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │ +│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │ +│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │ +│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │ +│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │ +│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │ +│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │ +│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │ +│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │ +│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │ +│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │ +│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │ +│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │ +│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │ +│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │ +│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │ +│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │ +│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │ +│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │ +│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │ +│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │ +│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │ +│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │ +│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │ +│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │ +│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │ +│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │ +│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │ +│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │ +│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │ +│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │ +│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │ +│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │ +│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │ +│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │ +│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │ +│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │ +│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │ +│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │ +│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │ +│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │ +│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │ +│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │ +│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │ +│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │ +│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │ +│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │ +│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │ +│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │ +│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │ +│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │ +│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │ +│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │ +│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │ +│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │ +│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │ +│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │ +│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │ +│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │ +│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │ +│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │ +│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │ +│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │ +│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │ +│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │ +│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │ +└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘ +``` + +## 使用 Projection 加速查询 {#speedup-with-projections} + +[Projections](../../sql-reference/statements/alter/projection.md) 允许我们通过存储任意格式的预先聚合的数据来提高查询速度。在此示例中,我们创建了一个按年份、地区和城镇分组的房产的平均价格、总价格和数量的 Projection。在执行时,如果 ClickHouse 认为 Projection 可以提高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。 + +### 构建投影{#build-projection} + +让我们通过维度 `toYear(date)`、`district` 和 `town` 创建一个聚合 Projection: + +```sql +ALTER TABLE uk_price_paid + ADD PROJECTION projection_by_year_district_town + ( + SELECT + toYear(date), + district, + town, + avg(price), + sum(price), + count() + GROUP BY + toYear(date), + district, + town + ) +``` + +填充现有数据的 Projection。 (如果不进行 materialize 操作,则 ClickHouse 只会为新插入的数据创建 Projection): + +```sql +ALTER TABLE uk_price_paid + MATERIALIZE PROJECTION projection_by_year_district_town +SETTINGS mutations_sync = 1 +``` + +## Test Performance {#test-performance} + +让我们再次运行相同的 3 个查询: + +### 查询 1. 每年平均价格 {#average-price-projections} + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 1000000, 80) +FROM uk_price_paid +GROUP BY year +ORDER BY year ASC +``` + +结果是一样的,但是性能更好! +```response +No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.) +With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.) +``` + + +### 查询 2. 伦敦每年的平均价格 {#average-price-london-projections} + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 2000000, 100) +FROM uk_price_paid +WHERE town = 'LONDON' +GROUP BY year +ORDER BY year ASC +``` + +Same result, but notice the improvement in query performance: + +```response +No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.) +With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.) +``` + +### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods-projections} + +注意:需要修改 (date >= '2020-01-01') 以使其与 Projection 定义的维度 (`toYear(date) >= 2020)` 匹配: + +```sql +SELECT + town, + district, + count() AS c, + round(avg(price)) AS price, + bar(price, 0, 5000000, 100) +FROM uk_price_paid +WHERE toYear(date) >= 2020 +GROUP BY + town, + district +HAVING c >= 100 +ORDER BY price DESC +LIMIT 100 +``` + +同样,结果是相同的,但请注意查询性能的改进: + +```response +No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.) +With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.) +``` + +### 在 Playground 上测试{#playground} + +也可以在 [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==) 上找到此数据集。 diff --git a/docs/zh/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/zh/sql-reference/aggregate-functions/reference/anyheavy.md index 0d6390f54a1..875f744665b 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/anyheavy.md @@ -5,7 +5,7 @@ sidebar_position: 103 # anyHeavy {#anyheavyx} -选择一个频繁出现的值,使用[heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) 算法。 如果某个值在查询的每个执行线程中出现的情况超过一半,则返回此值。 通常情况下,结果是不确定的。 +选择一个频繁出现的值,使用[heavy hitters](https://doi.org/10.1145/762471.762473) 算法。 如果某个值在查询的每个执行线程中出现的情况超过一半,则返回此值。 通常情况下,结果是不确定的。 ``` sql anyHeavy(column) diff --git a/docs/zh/sql-reference/aggregate-functions/reference/topk.md b/docs/zh/sql-reference/aggregate-functions/reference/topk.md index 9dc4a11a939..8841f2d86a7 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/topk.md @@ -7,7 +7,7 @@ sidebar_position: 108 返回指定列中近似最常见值的数组。 生成的数组按值的近似频率降序排序(而不是值本身)。 -实现了[过滤节省空间](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf)算法, 使用基于reduce-and-combine的算法,借鉴[并行节省空间](https://arxiv.org/pdf/1401.0702.pdf)。 +实现了[过滤节省空间](https://doi.org/10.1016/j.ins.2010.08.024)算法, 使用基于reduce-and-combine的算法,借鉴[并行节省空间](https://doi.org/10.1016/j.ins.2015.09.003)。 **语法** diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index 5daa4092732..be2f8d6ded3 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -13,7 +13,7 @@ sidebar_label: VIEW 语法: ``` sql -CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ... +CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ... ``` 普通视图不存储任何数据。 他们只是在每次访问时从另一个表执行读取。换句话说,普通视图只不过是一个保存的查询。 从视图中读取时,此保存的查询用作[FROM](../../../sql-reference/statements/select/from.md)子句中的子查询. @@ -164,23 +164,6 @@ SELECT * FROM [db.]live_view WHERE ... 您可以使用`ALTER LIVE VIEW [db.]table_name REFRESH`语法. -### WITH TIMEOUT条件 {#live-view-with-timeout} - -当使用`WITH TIMEOUT`子句创建实时视图时,[WATCH](../../../sql-reference/statements/watch.md)观察实时视图的查询。 - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ... -``` - -如果未指定超时值,则由指定的值[temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout)决定. - -**示例:** - -```sql -CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; -CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt; -``` - ### WITH REFRESH条件 {#live-view-with-refresh} 当使用`WITH REFRESH`子句创建实时视图时,它将在自上次刷新或触发后经过指定的秒数后自动刷新。 @@ -210,20 +193,6 @@ WATCH lv └─────────────────────┴──────────┘ ``` -您可以使用`AND`子句组合`WITH TIMEOUT`和`WITH REFRESH`子句。 - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ... -``` - -**示例:** - -```sql -CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now(); -``` - -15 秒后,如果没有活动的`WATCH`查询,实时视图将自动删除。 - ```sql WATCH lv ``` diff --git a/packages/clickhouse-server.init b/packages/clickhouse-server.init index 13aeffe13a7..f215e52b6f3 100755 --- a/packages/clickhouse-server.init +++ b/packages/clickhouse-server.init @@ -120,7 +120,11 @@ use_cron() if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then return 1 fi - # 2. disabled by config + # 2. checking whether the config is existed + if [ ! -f "$CLICKHOUSE_CRONFILE" ]; then + return 1 + fi + # 3. disabled by config if [ -z "$CLICKHOUSE_CRONFILE" ]; then return 2 fi diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 9cf7cb2b624..5b6c5b26633 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -7,12 +7,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake) # The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), # each of them may be built and linked as a separate library. # If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only. -if (USE_MUSL) - # Only clickhouse-keeper can be built with musl currently - option (ENABLE_CLICKHOUSE_ALL "Enable all ClickHouse modes by default" OFF) -else () - option (ENABLE_CLICKHOUSE_ALL "Enable all ClickHouse modes by default" ON) -endif () +option (ENABLE_CLICKHOUSE_ALL "Enable all ClickHouse modes by default" ON) option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_ALL}) option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)" @@ -194,7 +189,7 @@ else() message(STATUS "ClickHouse su: OFF") endif() -configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h) +configure_file (config_tools.h.in ${CONFIG_INCLUDE_PATH}/config_tools.h) macro(clickhouse_target_link_split_lib target name) if(NOT CLICKHOUSE_ONE_SHARED) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 5bd9d28d8e3..e616cb8cf72 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -12,10 +12,11 @@ #include #include "Client.h" #include "Core/Protocol.h" +#include "Parsers/formatAST.h" #include -#include +#include "config_version.h" #include #include #include @@ -514,6 +515,66 @@ static bool queryHasWithClause(const IAST & ast) return false; } +std::optional Client::processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query) +{ + processParsedSingleQuery(query_to_execute, query_to_execute, parsed_query); + + const auto * exception = server_exception ? server_exception.get() : client_exception.get(); + // Sometimes you may get TOO_DEEP_RECURSION from the server, + // and TOO_DEEP_RECURSION should not fail the fuzzer check. + if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION) + { + have_error = false; + server_exception.reset(); + client_exception.reset(); + return true; + } + + if (have_error) + { + fmt::print(stderr, "Error on processing query '{}': {}\n", parsed_query->formatForErrorMessage(), exception->message()); + + // Try to reconnect after errors, for two reasons: + // 1. We might not have realized that the server died, e.g. if + // it sent us a trace and closed connection properly. + // 2. The connection might have gotten into a wrong state and + // the next query will get false positive about + // "Unknown packet from server". + try + { + connection->forceConnected(connection_parameters.timeouts); + } + catch (...) + { + // Just report it, we'll terminate below. + fmt::print(stderr, + "Error while reconnecting to the server: {}\n", + getCurrentExceptionMessage(true)); + + // The reconnection might fail, but we'll still be connected + // in the sense of `connection->isConnected() = true`, + // in case when the requested database doesn't exist. + // Disconnect manually now, so that the following code doesn't + // have any doubts, and the connection state is predictable. + connection->disconnect(); + } + } + + if (!connection->isConnected()) + { + // Probably the server is dead because we found an assertion + // failure. Fail fast. + fmt::print(stderr, "Lost connection to the server.\n"); + + // Print the changed settings because they might be needed to + // reproduce the error. + printChangedSettings(); + + return false; + } + + return std::nullopt; +} /// Returns false when server is not available. bool Client::processWithFuzzing(const String & full_query) @@ -558,18 +619,33 @@ bool Client::processWithFuzzing(const String & full_query) // - SET -- The time to fuzz the settings has not yet come // (see comments in Client/QueryFuzzer.cpp) size_t this_query_runs = query_fuzzer_runs; - if (orig_ast->as() || - orig_ast->as() || - orig_ast->as() || - orig_ast->as()) + ASTs queries_for_fuzzed_tables; + + if (orig_ast->as()) { this_query_runs = 1; } + else if (const auto * create = orig_ast->as()) + { + if (QueryFuzzer::isSuitableForFuzzing(*create)) + this_query_runs = create_query_fuzzer_runs; + else + this_query_runs = 1; + } + else if (const auto * insert = orig_ast->as()) + { + this_query_runs = 1; + queries_for_fuzzed_tables = fuzzer.getInsertQueriesForFuzzedTables(full_query); + } + else if (const auto * drop = orig_ast->as()) + { + this_query_runs = 1; + queries_for_fuzzed_tables = fuzzer.getDropQueriesForFuzzedTables(*drop); + } String query_to_execute; - ASTPtr parsed_query; - ASTPtr fuzz_base = orig_ast; + for (size_t fuzz_step = 0; fuzz_step < this_query_runs; ++fuzz_step) { fmt::print(stderr, "Fuzzing step {} out of {}\n", fuzz_step, this_query_runs); @@ -630,9 +706,9 @@ bool Client::processWithFuzzing(const String & full_query) continue; } - parsed_query = ast_to_process; - query_to_execute = parsed_query->formatForErrorMessage(); - processParsedSingleQuery(full_query, query_to_execute, parsed_query); + query_to_execute = ast_to_process->formatForErrorMessage(); + if (auto res = processFuzzingStep(query_to_execute, ast_to_process)) + return *res; } catch (...) { @@ -645,60 +721,6 @@ bool Client::processWithFuzzing(const String & full_query) have_error = true; } - const auto * exception = server_exception ? server_exception.get() : client_exception.get(); - // Sometimes you may get TOO_DEEP_RECURSION from the server, - // and TOO_DEEP_RECURSION should not fail the fuzzer check. - if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION) - { - have_error = false; - server_exception.reset(); - client_exception.reset(); - return true; - } - - if (have_error) - { - fmt::print(stderr, "Error on processing query '{}': {}\n", ast_to_process->formatForErrorMessage(), exception->message()); - - // Try to reconnect after errors, for two reasons: - // 1. We might not have realized that the server died, e.g. if - // it sent us a trace and closed connection properly. - // 2. The connection might have gotten into a wrong state and - // the next query will get false positive about - // "Unknown packet from server". - try - { - connection->forceConnected(connection_parameters.timeouts); - } - catch (...) - { - // Just report it, we'll terminate below. - fmt::print(stderr, - "Error while reconnecting to the server: {}\n", - getCurrentExceptionMessage(true)); - - // The reconnection might fail, but we'll still be connected - // in the sense of `connection->isConnected() = true`, - // in case when the requested database doesn't exist. - // Disconnect manually now, so that the following code doesn't - // have any doubts, and the connection state is predictable. - connection->disconnect(); - } - } - - if (!connection->isConnected()) - { - // Probably the server is dead because we found an assertion - // failure. Fail fast. - fmt::print(stderr, "Lost connection to the server.\n"); - - // Print the changed settings because they might be needed to - // reproduce the error. - printChangedSettings(); - - return false; - } - // Check that after the query is formatted, we can parse it back, // format again and get the same result. Unfortunately, we can't // compare the ASTs, which would be more sensitive to errors. This @@ -729,13 +751,12 @@ bool Client::processWithFuzzing(const String & full_query) // query, but second and third. // If you have to add any more workarounds to this check, just remove // it altogether, it's not so useful. - if (parsed_query && !have_error && !queryHasWithClause(*parsed_query)) + if (ast_to_process && !have_error && !queryHasWithClause(*ast_to_process)) { ASTPtr ast_2; try { const auto * tmp_pos = query_to_execute.c_str(); - ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_execute.size(), false /* allow_multi_statements */); } catch (Exception & e) @@ -762,7 +783,7 @@ bool Client::processWithFuzzing(const String & full_query) "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", text_3, text_2); fmt::print(stderr, "In more detail:\n"); - fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree()); + fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", ast_to_process->dumpTree()); fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree()); fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2); @@ -784,6 +805,7 @@ bool Client::processWithFuzzing(const String & full_query) // so that it doesn't influence the exit code. server_exception.reset(); client_exception.reset(); + fuzzer.notifyQueryFailed(ast_to_process); have_error = false; } else if (ast_to_process->formatForErrorMessage().size() > 500) @@ -800,6 +822,35 @@ bool Client::processWithFuzzing(const String & full_query) } } + for (const auto & query : queries_for_fuzzed_tables) + { + std::cout << std::endl; + WriteBufferFromOStream ast_buf(std::cout, 4096); + formatAST(*query, ast_buf, false /*highlight*/); + ast_buf.next(); + std::cout << std::endl << std::endl; + + try + { + query_to_execute = query->formatForErrorMessage(); + if (auto res = processFuzzingStep(query_to_execute, query)) + return *res; + } + catch (...) + { + client_exception = std::make_unique(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode()); + have_error = true; + } + + if (have_error) + { + server_exception.reset(); + client_exception.reset(); + fuzzer.notifyQueryFailed(query); + have_error = false; + } + } + return true; } @@ -834,6 +885,7 @@ void Client::addOptions(OptionsDescription & options_description) ("compression", po::value(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).") ("query-fuzzer-runs", po::value()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.") + ("create-query-fuzzer-runs", po::value()->default_value(0), "") ("interleave-queries-file", po::value>()->multitoken(), "file path with queries to execute before every file from 'queries-file'; multiple files can be specified (--queries-file file1 file2...); this is needed to enable more aggressive fuzzing of newly added tests (see 'query-fuzzer-runs' option)") @@ -842,6 +894,7 @@ void Client::addOptions(OptionsDescription & options_description) ("no-warnings", "disable warnings when client connects to server") ("fake-drop", "Ignore all DROP queries, should be used only for testing") + ("accept-invalid-certificate", "Ignore certificate verification errors, equal to config parameters openSSL.client.invalidCertificateHandler.name=AcceptCertificateHandler and openSSL.client.verificationMode=none") ; /// Commandline options related to external tables. @@ -976,6 +1029,13 @@ void Client::processOptions(const OptionsDescription & options_description, config().setBool("no-warnings", true); if (options.count("fake-drop")) fake_drop = true; + if (options.count("accept-invalid-certificate")) + { + config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler"); + config().setString("openSSL.client.verificationMode", "none"); + } + else + config().setString("openSSL.client.invalidCertificateHandler.name", "RejectCertificateHandler"); if ((query_fuzzer_runs = options["query-fuzzer-runs"].as())) { @@ -986,6 +1046,17 @@ void Client::processOptions(const OptionsDescription & options_description, ignore_error = true; } + if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as())) + { + // Fuzzer implies multiquery. + config().setBool("multiquery", true); + // Ignore errors in parsing queries. + config().setBool("ignore-error", true); + + global_context->setSetting("allow_suspicious_low_cardinality_types", true); + ignore_error = true; + } + if (options.count("opentelemetry-traceparent")) { String traceparent = options["opentelemetry-traceparent"].as(); diff --git a/programs/client/Client.h b/programs/client/Client.h index 1fec282be51..63f28ca96a2 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -17,6 +17,7 @@ public: protected: bool processWithFuzzing(const String & full_query) override; + std::optional processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query); void connect() override; diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index f1787801dc4..30444e8c84e 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -1,6 +1,6 @@ -#pragma once +/// This file was autogenerated by CMake -// .h autogenerated by cmake ! +#pragma once #cmakedefine01 ENABLE_CLICKHOUSE_SERVER #cmakedefine01 ENABLE_CLICKHOUSE_CLIENT diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 9b1bae947d2..00c86571265 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -927,7 +927,11 @@ namespace executable.string(), config.string(), pid_file.string()); if (!user.empty()) - command = fmt::format("clickhouse su '{}' {}", user, command); + { + /// sudo respects limits in /etc/security/limits.conf e.g. open files, + /// that's why we are using it instead of the 'clickhouse su' tool. + command = fmt::format("sudo -u '{}' {}", user, command); + } fmt::print("Will run {}\n", command); executeScript(command, true); diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index fdfe0cef2b3..5077f59b7dd 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -24,8 +24,8 @@ #include #include -#include "config_core.h" -#include "Common/config_version.h" +#include "config.h" +#include "config_version.h" #if USE_SSL # include diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2b9d819f5eb..ffec435239e 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -203,7 +203,7 @@ void LocalServer::tryInitPath() global_context->setPath(path); - global_context->setTemporaryStorage(path + "tmp"); + global_context->setTemporaryStorage(path + "tmp", "", 0); global_context->setFlagsPath(path + "flags"); global_context->setUserFilesPath(""); // user's files are everywhere diff --git a/programs/main.cpp b/programs/main.cpp index 9b0e890cd76..f40bafc7027 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -219,7 +219,7 @@ auto instructionFailToString(InstructionFail fail) case InstructionFail::AVX512: ret("AVX512"); } - __builtin_unreachable(); + UNREACHABLE(); } @@ -345,7 +345,7 @@ struct Checker ; -#ifndef DISABLE_HARMFUL_ENV_VAR_CHECK +#if !defined(DISABLE_HARMFUL_ENV_VAR_CHECK) && !defined(USE_MUSL) /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. void checkHarmfulEnvironmentVariables(char ** argv) { @@ -408,6 +408,7 @@ void checkHarmfulEnvironmentVariables(char ** argv) /// 3rd-party uncontrolled dangerous libraries into the process address space, /// because it is insane. +#if !defined(USE_MUSL) extern "C" { void * dlopen(const char *, int) @@ -430,6 +431,7 @@ extern "C" return "ClickHouse does not allow dynamic library loading"; } } +#endif /// This allows to implement assert to forbid initialization of a class in static constructors. @@ -455,7 +457,7 @@ int main(int argc_, char ** argv_) /// Note: we forbid dlopen in our code. updatePHDRCache(); -#ifndef DISABLE_HARMFUL_ENV_VAR_CHECK +#if !defined(DISABLE_HARMFUL_ENV_VAR_CHECK) && !defined(USE_MUSL) checkHarmfulEnvironmentVariables(argv_); #endif diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index 76c0103d604..3ba8b182ba6 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_ODBC diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index 23ffd84663b..d57bbc0ca8a 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -2,7 +2,7 @@ #include #include -#include +#include "config.h" #include #if USE_ODBC diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index fe22d8facfd..0875cc2e9d9 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include "config.h" #include #include diff --git a/programs/odbc-bridge/ODBCHandlerFactory.cpp b/programs/odbc-bridge/ODBCHandlerFactory.cpp index 2ae533431d3..dd21358df8c 100644 --- a/programs/odbc-bridge/ODBCHandlerFactory.cpp +++ b/programs/odbc-bridge/ODBCHandlerFactory.cpp @@ -1,7 +1,7 @@ #include "ODBCHandlerFactory.h" #include "PingHandler.h" #include "ColumnInfoHandler.h" -#include +#include "config.h" #include #include #include diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h index 7afa77ca091..cb71a6fb5a2 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.h +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -2,7 +2,7 @@ #include #include -#include +#include "config.h" #include #if USE_ODBC diff --git a/programs/odbc-bridge/getIdentifierQuote.h b/programs/odbc-bridge/getIdentifierQuote.h index 53ee1afd720..703586cd08e 100644 --- a/programs/odbc-bridge/getIdentifierQuote.h +++ b/programs/odbc-bridge/getIdentifierQuote.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_ODBC diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 14f97923ce3..aed586a86f6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -79,7 +79,9 @@ #include #include #include +#if USE_BORINGSSL #include +#endif #include #include #include @@ -88,8 +90,8 @@ #include #include -#include "config_core.h" -#include "Common/config_version.h" +#include "config.h" +#include "config_version.h" #if defined(OS_LINUX) # include @@ -209,7 +211,7 @@ try fs::remove(it->path()); } else - LOG_DEBUG(log, "Skipped file in temporary path {}", it->path().string()); + LOG_DEBUG(log, "Found unknown file in temporary path {}", it->path().string()); } } catch (...) @@ -969,9 +971,10 @@ int Server::main(const std::vector & /*args*/) /// Storage with temporary data for processing of heavy queries. { - std::string tmp_path = config().getString("tmp_path", path / "tmp/"); - std::string tmp_policy = config().getString("tmp_policy", ""); - const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy); + std::string temporary_path = config().getString("tmp_path", path / "tmp/"); + std::string temporary_policy = config().getString("tmp_policy", ""); + size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); + const VolumePtr & volume = global_context->setTemporaryStorage(temporary_path, temporary_policy, max_size); for (const DiskPtr & disk : volume->getDisks()) setupTmpPath(log, disk->getPath()); } @@ -1263,8 +1266,9 @@ int Server::main(const std::vector & /*args*/) global_context->updateStorageConfiguration(*config); global_context->updateInterserverCredentials(*config); - +#if USE_BORINGSSL CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); +#endif #if USE_SSL CertificateReloader::instance().tryLoad(*config); #endif @@ -1417,8 +1421,7 @@ int Server::main(const std::vector & /*args*/) global_context->setAsynchronousInsertQueue(std::make_shared( global_context, settings.async_insert_threads, - settings.async_insert_max_data_size, - AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout_ms, .stale = settings.async_insert_stale_timeout_ms})); + settings.async_insert_cleanup_timeout_ms)); /// Size of cache for marks (index of MergeTree family of tables). size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); @@ -1470,9 +1473,10 @@ int Server::main(const std::vector & /*args*/) global_context->getMergeTreeSettings().sanityCheck(background_pool_tasks); global_context->getReplicatedMergeTreeSettings().sanityCheck(background_pool_tasks); } - +#if USE_BORINGSSL /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start. CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs"); +#endif SCOPE_EXIT({ /// Stop reloading of the main config. This must be done before `global_context->shutdown()` because diff --git a/programs/server/config.xml b/programs/server/config.xml index dcb8ac0804c..7f3a749b629 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1173,6 +1173,18 @@ 7500 + + + system + asynchronous_insert_log
+ + 7500 + event_date + event_date + INTERVAL 3 DAY +
+ devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== - 33554432 + 100000 - local - / + local + / diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index bc755220c2f..e32df110a18 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -4,10 +4,9 @@ import os import pytest -pytestmark = pytest.mark.skip - from helpers.cluster import ClickHouseCluster from helpers.utility import generate_values, replace_config, SafeThread +from azure.storage.blob import BlobServiceClient SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -573,8 +572,42 @@ def test_restart_during_load(cluster): def test_big_insert(cluster): node = cluster.instances[NODE_NAME] create_table(node, TABLE_NAME) + + check_query = "SELECT '2020-01-03', number, toString(number) FROM numbers(1000000)" + azure_query( node, - f"INSERT INTO {TABLE_NAME} select '2020-01-03', number, toString(number) from numbers(5000000)", + f"INSERT INTO {TABLE_NAME} {check_query}", ) - assert int(azure_query(node, f"SELECT count() FROM {TABLE_NAME}")) == 5000000 + assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == node.query( + check_query + ) + + blob_container_client = cluster.blob_service_client.get_container_client( + CONTAINER_NAME + ) + + blobs = blob_container_client.list_blobs() + max_single_part_upload_size = 100000 + checked = False + + for blob in blobs: + blob_client = cluster.blob_service_client.get_blob_client( + CONTAINER_NAME, blob.name + ) + committed, uncommited = blob_client.get_block_list() + + blocks = committed + last_id = len(blocks) + id = 1 + if len(blocks) > 1: + checked = True + + for block in blocks: + print(f"blob: {blob.name}, block size: {block.size}") + if id == last_id: + assert max_single_part_upload_size >= block.size + else: + assert max_single_part_upload_size == block.size + id += 1 + assert checked diff --git a/tests/integration/test_recovery_replica/test.py b/tests/integration/test_recovery_replica/test.py index 4a1298162da..0a63da4db22 100644 --- a/tests/integration/test_recovery_replica/test.py +++ b/tests/integration/test_recovery_replica/test.py @@ -196,6 +196,7 @@ def test_update_metadata(start_cluster): node1.query("ALTER TABLE update_metadata MODIFY COLUMN col1 String") node1.query("ALTER TABLE update_metadata ADD COLUMN col2 INT") + node3.query("SYSTEM SYNC REPLICA update_metadata") for i in range(1, 11): node3.query( "INSERT INTO update_metadata VALUES ({}, '{}', {})".format( diff --git a/tests/integration/test_replicated_merge_tree_compatibility/test.py b/tests/integration/test_replicated_merge_tree_compatibility/test.py index eb2b14ffb1a..68f2776e955 100644 --- a/tests/integration/test_replicated_merge_tree_compatibility/test.py +++ b/tests/integration/test_replicated_merge_tree_compatibility/test.py @@ -73,4 +73,4 @@ def test_replicated_merge_tree_defaults_compatibility(started_cluster): node2.restart_with_latest_version() node1.query(create_query.format(replica=1)) - node1.query("EXISTS TABLE test.table") == "1\n" + assert node1.query("EXISTS TABLE test.table") == "1\n" diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index 860b83d4ed1..1c559312105 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -8,11 +8,12 @@ from helpers.cluster import ClickHouseCluster logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) +cluster = ClickHouseCluster(__file__) + @pytest.fixture(scope="module") -def cluster(): +def started_cluster(): try: - cluster = ClickHouseCluster(__file__) cluster.add_instance( "node1", main_configs=["configs/config.d/s3.xml"], @@ -96,7 +97,7 @@ def wait_for_active_parts(node, num_expected_parts, table_name, timeout=30): # Result of `get_large_objects_count` can be changed in other tests, so run this case at the beginning @pytest.mark.order(0) @pytest.mark.parametrize("policy", ["s3"]) -def test_s3_zero_copy_replication(cluster, policy): +def test_s3_zero_copy_replication(started_cluster, policy): node1 = cluster.instances["node1"] node2 = cluster.instances["node2"] @@ -153,7 +154,7 @@ def test_s3_zero_copy_replication(cluster, policy): @pytest.mark.skip(reason="Test is flaky (and never was stable)") -def test_s3_zero_copy_on_hybrid_storage(cluster): +def test_s3_zero_copy_on_hybrid_storage(started_cluster): node1 = cluster.instances["node1"] node2 = cluster.instances["node2"] @@ -268,7 +269,9 @@ def insert_large_data(node, table): ("tiered_copy", True, 3), ], ) -def test_s3_zero_copy_with_ttl_move(cluster, storage_policy, large_data, iterations): +def test_s3_zero_copy_with_ttl_move( + started_cluster, storage_policy, large_data, iterations +): node1 = cluster.instances["node1"] node2 = cluster.instances["node2"] @@ -333,7 +336,7 @@ def test_s3_zero_copy_with_ttl_move(cluster, storage_policy, large_data, iterati (True, 3), ], ) -def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations): +def test_s3_zero_copy_with_ttl_delete(started_cluster, large_data, iterations): node1 = cluster.instances["node1"] node2 = cluster.instances["node2"] @@ -415,6 +418,22 @@ def wait_mutations(node, table, seconds): assert mutations == "0\n" +def wait_for_clean_old_parts(node, table, seconds): + time.sleep(1) + while seconds > 0: + seconds -= 1 + parts = node.query( + f"SELECT count() FROM system.parts WHERE table='{table}' AND active=0" + ) + if parts == "0\n": + return + time.sleep(1) + parts = node.query( + f"SELECT count() FROM system.parts WHERE table='{table}' AND active=0" + ) + assert parts == "0\n" + + def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): node1 = cluster.instances["node1"] node2 = cluster.instances["node2"] @@ -435,6 +454,8 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): node1.query("INSERT INTO unfreeze_test VALUES (0)") + wait_for_active_parts(node2, 1, "unfreeze_test") + node1.query("ALTER TABLE unfreeze_test FREEZE WITH NAME 'freeze_backup1'") node2.query("ALTER TABLE unfreeze_test FREEZE WITH NAME 'freeze_backup2'") wait_mutations(node1, "unfreeze_test", 10) @@ -472,11 +493,11 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): node2.query("DROP TABLE IF EXISTS unfreeze_test NO DELAY") -def test_s3_zero_copy_unfreeze_alter(cluster): +def test_s3_zero_copy_unfreeze_alter(started_cluster): s3_zero_copy_unfreeze_base(cluster, "ALTER TABLE unfreeze_test UNFREEZE WITH NAME") -def test_s3_zero_copy_unfreeze_system(cluster): +def test_s3_zero_copy_unfreeze_system(started_cluster): s3_zero_copy_unfreeze_base(cluster, "SYSTEM UNFREEZE WITH NAME") @@ -565,17 +586,17 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): check_objects_not_exisis(cluster, objects1) -def test_s3_zero_copy_drop_detached_alter(cluster): +def test_s3_zero_copy_drop_detached_alter(started_cluster): s3_zero_copy_drop_detached( cluster, "ALTER TABLE drop_detached_test UNFREEZE WITH NAME" ) -def test_s3_zero_copy_drop_detached_system(cluster): +def test_s3_zero_copy_drop_detached_system(started_cluster): s3_zero_copy_drop_detached(cluster, "SYSTEM UNFREEZE WITH NAME") -def test_s3_zero_copy_concurrent_merge(cluster): +def test_s3_zero_copy_concurrent_merge(started_cluster): node1 = cluster.instances["node1"] node2 = cluster.instances["node2"] @@ -620,3 +641,119 @@ def test_s3_zero_copy_concurrent_merge(cluster): for node in (node1, node2): assert node.query("select sum(id) from concurrent_merge").strip() == "1600" + + +def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): + node1 = cluster.instances["node1"] + node2 = cluster.instances["node2"] + + node1.query("DROP TABLE IF EXISTS zero_copy_mutation NO DELAY") + node2.query("DROP TABLE IF EXISTS zero_copy_mutation NO DELAY") + + node1.query( + """ + CREATE TABLE zero_copy_mutation (id UInt64, value1 String, value2 String, value3 String) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/zero_copy_mutation', '{replica}') + ORDER BY id + PARTITION BY (id % 4) + SETTINGS storage_policy='s3', + old_parts_lifetime=1000 + """ + ) + + node2.query( + """ + CREATE TABLE zero_copy_mutation (id UInt64, value1 String, value2 String, value3 String) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/zero_copy_mutation', '{replica}') + ORDER BY id + PARTITION BY (id % 4) + SETTINGS storage_policy='s3', + old_parts_lifetime=1000 + """ + ) + + node1.query( + """ + INSERT INTO zero_copy_mutation + SELECT * FROM generateRandom('id UInt64, value1 String, value2 String, value3 String') limit 1000000 + """ + ) + + wait_for_active_parts(node2, 4, "zero_copy_mutation") + + objects1 = node1.get_table_objects("zero_copy_mutation") + check_objects_exisis(cluster, objects1) + + node1.query( + """ + ALTER TABLE zero_copy_mutation + ADD COLUMN valueX String MATERIALIZED value1 + """ + ) + + node1.query( + """ + ALTER TABLE zero_copy_mutation + MATERIALIZE COLUMN valueX + """ + ) + + wait_mutations(node1, "zero_copy_mutation", 10) + wait_mutations(node2, "zero_copy_mutation", 10) + + # If bug present at least one node has metadata with incorrect ref_count values. + # But it may be any node depends on mutation execution order. + # We can try to find one, but this required knowledge about internal metadata structure. + # It can be change in future, so we do not find this node here. + # And with the bug test may be success sometimes. + nodeX = node1 + nodeY = node2 + + objectsY = nodeY.get_table_objects("zero_copy_mutation") + check_objects_exisis(cluster, objectsY) + + nodeX.query( + """ + ALTER TABLE zero_copy_mutation + DETACH PARTITION '0' + """ + ) + + nodeX.query( + """ + ALTER TABLE zero_copy_mutation + ATTACH PARTITION '0' + """ + ) + + wait_mutations(node1, "zero_copy_mutation", 10) + wait_mutations(node2, "zero_copy_mutation", 10) + + nodeX.query( + """ + DROP TABLE zero_copy_mutation SYNC + """ + ) + + # time to remove objects + time.sleep(10) + + nodeY.query( + """ + SELECT count() FROM zero_copy_mutation + WHERE value3 LIKE '%ab%' + """ + ) + + check_objects_exisis(cluster, objectsY) + + nodeY.query( + """ + DROP TABLE zero_copy_mutation SYNC + """ + ) + + # time to remove objects + time.sleep(10) + + check_objects_not_exisis(cluster, objectsY) diff --git a/tests/integration/test_storage_s3/configs/use_environment_credentials.xml b/tests/integration/test_storage_s3/configs/use_environment_credentials.xml new file mode 100644 index 00000000000..4c3f0b77525 --- /dev/null +++ b/tests/integration/test_storage_s3/configs/use_environment_credentials.xml @@ -0,0 +1,5 @@ + + + 1 + + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index f2ae714c6cb..6e1a6e8a66b 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -114,6 +114,16 @@ def started_cluster(): "s3_non_default", with_minio=True, ) + cluster.add_instance( + "s3_with_environment_credentials", + with_minio=True, + env_variables={ + "AWS_ACCESS_KEY_ID": "minio", + "AWS_SECRET_ACCESS_KEY": "minio123", + }, + main_configs=["configs/use_environment_credentials.xml"], + ) + logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") @@ -1712,3 +1722,19 @@ def test_ast_auth_headers(started_cluster): ) assert result.strip() == "1\t2\t3" + + +def test_environment_credentials(started_cluster): + filename = "test.csv" + bucket = started_cluster.minio_restricted_bucket + + instance = started_cluster.instances["s3_with_environment_credentials"] + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache3.jsonl') select * from numbers(100) settings s3_truncate_on_insert=1" + ) + assert ( + "100" + == instance.query( + f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache3.jsonl')" + ).strip() + ) diff --git a/tests/jepsen.clickhouse-keeper/project.clj b/tests/jepsen.clickhouse-keeper/project.clj index 187e91cd44d..98049835cb1 100644 --- a/tests/jepsen.clickhouse-keeper/project.clj +++ b/tests/jepsen.clickhouse-keeper/project.clj @@ -7,7 +7,8 @@ :main jepsen.clickhouse-keeper.main :plugins [[lein-cljfmt "0.7.0"]] :dependencies [[org.clojure/clojure "1.10.1"] - [jepsen "0.2.6"] + [jepsen "0.2.7"] [zookeeper-clj "0.9.4"] + [com.hierynomus/sshj "0.34.0"] [org.apache.zookeeper/zookeeper "3.6.1" :exclusions [org.slf4j/slf4j-log4j12]]] :repl-options {:init-ns jepsen.clickhouse-keeper.main}) diff --git a/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/main.clj b/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/main.clj index cd1aa540e45..46fc8651bfe 100644 --- a/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/main.clj +++ b/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/main.clj @@ -1,3 +1,98 @@ +(ns jepsen.control.sshj + (:require [jepsen.control [core :as core] + [sshj :as sshj]] + [slingshot.slingshot :refer [try+ throw+]]) + (:import (net.schmizz.sshj SSHClient + DefaultConfig) + (net.schmizz.sshj.transport.verification PromiscuousVerifier) + (java.util.concurrent Semaphore))) + +(defrecord SSHJRemote [concurrency-limit + conn-spec + ^SSHClient client + ^Semaphore semaphore] + core/Remote + (connect [this conn-spec] + (if (:dummy conn-spec) + (assoc this :conn-spec conn-spec) + (try+ (let [c (as-> (SSHClient.) client + (do + (if (:strict-host-key-checking conn-spec) + (.loadKnownHosts client) + (.addHostKeyVerifier client (PromiscuousVerifier.))) + (.connect client (:host conn-spec) (:port conn-spec)) + (auth! client conn-spec) + client))] + (assoc this + :conn-spec conn-spec + :client c + :semaphore (Semaphore. concurrency-limit true))) + (catch Exception e + ; SSHJ wraps InterruptedException in its own exceptions, so we + ; have to see through that and rethrow properly. + (let [cause (util/ex-root-cause e)] + (when (instance? InterruptedException cause) + (throw cause))) + (throw+ (assoc conn-spec + :type :jepsen.control/session-error + :message "Error opening SSH session. Verify username, password, and node hostnames are correct.")))))) + + (disconnect! [this] + (when-let [c client] + (.close c))) + + (execute! [this ctx action] + ; (info :permits (.availablePermits semaphore)) + (when (:dummy conn-spec) + (throw+ {:type :jepsen.control/dummy})) + (.acquire semaphore) + (sshj/with-errors conn-spec ctx + (try + (with-open [session (.startSession client)] + (let [cmd (.exec session (:cmd action)) + ; Feed it input + _ (when-let [input (:in action)] + (let [stream (.getOutputStream cmd)] + (bs/transfer input stream) + (send-eof! client session) + (.close stream))) + ; Read output + out (.toString (IOUtils/readFully (.getInputStream cmd))) + err (.toString (IOUtils/readFully (.getErrorStream cmd))) + ; Wait on command + _ (.join cmd)] + ; Return completion + (assoc action + :out out + :err err + ; There's also a .getExitErrorMessage that might be + ; interesting here? + :exit (.getExitStatus cmd)))) + (finally + (.release semaphore))))) + + (upload! [this ctx local-paths remote-path _opts] + (when (:dummy conn-spec) + (throw+ {:type :jepsen.control/dummy})) + (with-errors conn-spec ctx + (with-open [sftp (.newSFTPClient client)] + (.put sftp (FileSystemFile. local-paths) remote-path)))) + + (download! [this ctx remote-paths local-path _opts] + (when (:dummy conn-spec) + (throw+ {:type :jepsen.control/dummy})) + (with-errors conn-spec ctx + (with-open [sftp (.newSFTPClient client)] + (.get sftp remote-paths (FileSystemFile. local-path)))))) + +(defn remote + "Constructs an SSHJ remote." + [] + (-> (SSHJRemote. concurrency-limit nil nil nil) + ; We *can* use our own SCP, but shelling out is faster. + scp/remote + retry/remote)) + (ns jepsen.clickhouse-keeper.main (:require [clojure.tools.logging :refer :all] [jepsen.clickhouse-keeper.utils :refer :all] @@ -17,7 +112,6 @@ [checker :as checker] [cli :as cli] [client :as client] - [control :as c] [db :as db] [nemesis :as nemesis] [generator :as gen] diff --git a/tests/performance/bitmap_array_element.xml b/tests/performance/bitmap_array_element.xml new file mode 100644 index 00000000000..914aba243c8 --- /dev/null +++ b/tests/performance/bitmap_array_element.xml @@ -0,0 +1,12 @@ + + + WITH + ( + SELECT bitmapBuild(groupArray(number)) + FROM numbers(3000000) + ) AS a, + [a, a, a] AS b + SELECT sum(bitmapCardinality(b[(number % 3) + 1])) + FROM numbers(10000) + + diff --git a/tests/performance/cryptographic_hashes.xml b/tests/performance/cryptographic_hashes.xml index fbe0babd43c..9cab0014ce9 100644 --- a/tests/performance/cryptographic_hashes.xml +++ b/tests/performance/cryptographic_hashes.xml @@ -8,6 +8,7 @@ SHA224 SHA256 halfMD5 + BLAKE3 diff --git a/tests/queries/0_stateless/00107_totals_after_having.sql b/tests/queries/0_stateless/00107_totals_after_having.sql index 40a598a194d..d0a9a3a318c 100644 --- a/tests/queries/0_stateless/00107_totals_after_having.sql +++ b/tests/queries/0_stateless/00107_totals_after_having.sql @@ -30,7 +30,7 @@ SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM (S SELECT '*** External aggregation.'; -SET max_bytes_before_external_group_by=1000000; +SET max_bytes_before_external_group_by = 1000000; SET group_by_two_level_threshold = 100000; SELECT '**** totals_mode = after_having_auto'; diff --git a/tests/queries/0_stateless/00294_shard_enums.sql b/tests/queries/0_stateless/00294_shard_enums.sql index dcd74ac3e3a..d5b929b2db6 100644 --- a/tests/queries/0_stateless/00294_shard_enums.sql +++ b/tests/queries/0_stateless/00294_shard_enums.sql @@ -51,7 +51,7 @@ select * from enums order by e; select * from enums order by e desc; -- GROUP BY -select count(), e from enums group by e; +select count(), e from enums group by e order by e; select any(e) from enums; -- IN diff --git a/tests/queries/0_stateless/00302_http_compression.reference b/tests/queries/0_stateless/00302_http_compression.reference index 909e30d2992..b868768dccd 100644 --- a/tests/queries/0_stateless/00302_http_compression.reference +++ b/tests/queries/0_stateless/00302_http_compression.reference @@ -78,17 +78,44 @@ 7 8 9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 < Content-Encoding: gzip < Content-Encoding: deflate < Content-Encoding: gzip < Content-Encoding: br < Content-Encoding: xz < Content-Encoding: zstd +< Content-Encoding: lz4 +< Content-Encoding: bz2 +< Content-Encoding: snappy 1 1 1 1 1 +1 +1 +Hello, world +Hello, world Hello, world Hello, world Hello, world @@ -96,3 +123,4 @@ Hello, world Hello, world 0 Part1 Part2 +Part1 Part2 diff --git a/tests/queries/0_stateless/00302_http_compression.sh b/tests/queries/0_stateless/00302_http_compression.sh index 69800d6e0bf..f7656268868 100755 --- a/tests/queries/0_stateless/00302_http_compression.sh +++ b/tests/queries/0_stateless/00302_http_compression.sh @@ -18,6 +18,8 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept- ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: br' -d 'SELECT number FROM system.numbers LIMIT 10' | brotli -d; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: xz' -d 'SELECT number FROM system.numbers LIMIT 10' | xz -d; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: zstd' -d 'SELECT number FROM system.numbers LIMIT 10' | zstd -d; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: lz4' -d 'SELECT number FROM system.numbers LIMIT 10' | lz4 -d; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: bz2' -d 'SELECT number FROM system.numbers LIMIT 10' | bzip2 -d; ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; @@ -27,20 +29,28 @@ ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: br' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: xz' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: zstd' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: lz4' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: bz2' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: snappy' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; echo "SELECT 1" | ${CLICKHOUSE_CURL} -sS --data-binary @- "${CLICKHOUSE_URL}"; echo "SELECT 1" | gzip -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}"; echo "SELECT 1" | brotli | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: br' "${CLICKHOUSE_URL}"; echo "SELECT 1" | xz -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: xz' "${CLICKHOUSE_URL}"; echo "SELECT 1" | zstd -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: zstd' "${CLICKHOUSE_URL}"; +echo "SELECT 1" | lz4 -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: lz4' "${CLICKHOUSE_URL}"; +echo "SELECT 1" | bzip2 -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: bz2' "${CLICKHOUSE_URL}"; echo "'Hello, world'" | ${CLICKHOUSE_CURL} -sS --data-binary @- "${CLICKHOUSE_URL}&query=SELECT"; echo "'Hello, world'" | gzip -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}&query=SELECT"; echo "'Hello, world'" | brotli | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: br' "${CLICKHOUSE_URL}&query=SELECT"; echo "'Hello, world'" | xz -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: xz' "${CLICKHOUSE_URL}&query=SELECT"; echo "'Hello, world'" | zstd -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: zstd' "${CLICKHOUSE_URL}&query=SELECT"; +echo "'Hello, world'" | lz4 -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: lz4' "${CLICKHOUSE_URL}&query=SELECT"; +echo "'Hello, world'" | bzip2 -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: bz2' "${CLICKHOUSE_URL}&query=SELECT"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' | wc -c; -# POST multiple concatenated gzip streams. +# POST multiple concatenated gzip and bzip2 streams. (echo -n "SELECT 'Part1" | gzip -c; echo " Part2'" | gzip -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}" --data-binary @- +(echo -n "SELECT 'Part1" | bzip2 -c; echo " Part2'" | bzip2 -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: bz2' "${CLICKHOUSE_URL}" --data-binary @- diff --git a/tests/queries/0_stateless/00405_PrettyCompactMonoBlock.sh b/tests/queries/0_stateless/00405_PrettyCompactMonoBlock.sh index fb89199acb1..a5eca3d987e 100755 --- a/tests/queries/0_stateless/00405_PrettyCompactMonoBlock.sh +++ b/tests/queries/0_stateless/00405_PrettyCompactMonoBlock.sh @@ -11,4 +11,4 @@ ${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(1) UNION ALL SELECT * FROM nu echo 'extremes' ${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(3)" --format PrettyCompactMonoBlock --extremes=1 echo 'totals' -${CLICKHOUSE_LOCAL} --query="SELECT sum(number) FROM numbers(3) GROUP BY number%2 WITH TOTALS" --format PrettyCompactMonoBlock +${CLICKHOUSE_LOCAL} --query="SELECT sum(number) FROM numbers(3) GROUP BY number%2 WITH TOTALS ORDER BY number%2" --format PrettyCompactMonoBlock diff --git a/tests/queries/0_stateless/00502_custom_partitioning_local.reference b/tests/queries/0_stateless/00502_custom_partitioning_local.reference index 7b14a2d4edc..fff28819e74 100644 --- a/tests/queries/0_stateless/00502_custom_partitioning_local.reference +++ b/tests/queries/0_stateless/00502_custom_partitioning_local.reference @@ -9,7 +9,7 @@ Sum before DETACH PARTITION: Sum after DETACH PARTITION: 0 system.detached_parts after DETACH PARTITION: -default not_partitioned all all_1_2_1 default 1 2 1 +default not_partitioned all all_1_2_1 1 2 1 *** Partitioned by week *** Parts before OPTIMIZE: 1999-12-27 19991227_1_1_0 diff --git a/tests/queries/0_stateless/00502_custom_partitioning_local.sql b/tests/queries/0_stateless/00502_custom_partitioning_local.sql index b7eb08c919e..c85a978af68 100644 --- a/tests/queries/0_stateless/00502_custom_partitioning_local.sql +++ b/tests/queries/0_stateless/00502_custom_partitioning_local.sql @@ -1,4 +1,3 @@ --- Tags: no-s3-storage SELECT '*** Not partitioned ***'; DROP TABLE IF EXISTS not_partitioned; @@ -19,7 +18,7 @@ ALTER TABLE not_partitioned DETACH PARTITION ID 'all'; SELECT 'Sum after DETACH PARTITION:'; SELECT sum(x) FROM not_partitioned; SELECT 'system.detached_parts after DETACH PARTITION:'; -SELECT * FROM system.detached_parts WHERE database = currentDatabase() AND table = 'not_partitioned'; +SELECT system.detached_parts.* EXCEPT disk FROM system.detached_parts WHERE database = currentDatabase() AND table = 'not_partitioned'; DROP TABLE not_partitioned; diff --git a/tests/queries/0_stateless/00524_time_intervals_months_underflow.reference b/tests/queries/0_stateless/00524_time_intervals_months_underflow.reference index 6e5555b0df8..9c6bb9d0b91 100644 --- a/tests/queries/0_stateless/00524_time_intervals_months_underflow.reference +++ b/tests/queries/0_stateless/00524_time_intervals_months_underflow.reference @@ -166,3 +166,63 @@ 2005-01-01 2004-01-01 2003-01-01 +2216-09-23 +2216-10-13 +2216-11-02 +2216-11-22 +2216-12-12 +2217-01-01 +2217-01-21 +2217-02-10 +2217-03-02 +2217-03-22 +2217-04-11 +2217-03-22 +2217-03-02 +2217-02-10 +2217-01-21 +2217-01-01 +2216-12-12 +2216-11-22 +2216-11-02 +2216-10-13 +2215-05-01 +2215-09-01 +2216-01-01 +2216-05-01 +2216-09-01 +2217-01-01 +2217-05-01 +2217-09-01 +2218-01-01 +2218-05-01 +2218-09-01 +2218-05-01 +2218-01-01 +2217-09-01 +2217-05-01 +2217-01-01 +2216-09-01 +2216-05-01 +2216-01-01 +2215-09-01 +2197-01-01 +2201-01-01 +2205-01-01 +2209-01-01 +2213-01-01 +2217-01-01 +2221-01-01 +2225-01-01 +2229-01-01 +2233-01-01 +2237-01-01 +2233-01-01 +2229-01-01 +2225-01-01 +2221-01-01 +2217-01-01 +2213-01-01 +2209-01-01 +2205-01-01 +2201-01-01 diff --git a/tests/queries/0_stateless/00524_time_intervals_months_underflow.sql b/tests/queries/0_stateless/00524_time_intervals_months_underflow.sql index 6b8ecc3a9fb..09c1ce9bf6b 100644 --- a/tests/queries/0_stateless/00524_time_intervals_months_underflow.sql +++ b/tests/queries/0_stateless/00524_time_intervals_months_underflow.sql @@ -53,8 +53,18 @@ SELECT toDate('2017-01-01') - INTERVAL 1 YEAR AS x; SELECT toDate('2017-01-01') - INTERVAL -1 YEAR AS x; -SELECT toDate('2017-01-01') + INTERVAL number - 15 MONTH AS x FROM system.numbers LIMIT 30; +SELECT INTERVAL number - 15 MONTH + toDate('2017-01-01') AS x FROM system.numbers LIMIT 30; SELECT toDate('2017-01-01') - INTERVAL number - 15 MONTH AS x FROM system.numbers LIMIT 30; -SELECT toDate('2017-01-01') + INTERVAL number - 15 YEAR AS x FROM system.numbers LIMIT 30; +SELECT INTERVAL number - 15 YEAR + toDate('2017-01-01') AS x FROM system.numbers LIMIT 30; SELECT toDate('2017-01-01') - INTERVAL number - 15 YEAR AS x FROM system.numbers LIMIT 30; + + +SELECT toDate32('2217-01-01') + INTERVAL number * 20 - 100 DAY AS x FROM system.numbers LIMIT 10; +SELECT INTERVAL 100 - number * 20 DAY + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10; + +SELECT INTERVAL number * 4 - 20 MONTH + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10; +SELECT toDate32('2217-01-01') - INTERVAL number * 4 - 20 MONTH AS x FROM system.numbers LIMIT 10; + +SELECT INTERVAL number * 4 - 20 YEAR + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10; +SELECT toDate32('2217-01-01') - INTERVAL number * 4 - 20 YEAR AS x FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/00676_group_by_in.sql b/tests/queries/0_stateless/00676_group_by_in.sql index 9296458dfa3..1c5dbb29314 100644 --- a/tests/queries/0_stateless/00676_group_by_in.sql +++ b/tests/queries/0_stateless/00676_group_by_in.sql @@ -9,4 +9,5 @@ SELECT number IN (1, 2) AS x, count() FROM numbers(10) -GROUP BY x; +GROUP BY x +ORDER BY x; diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql index 3ca7b482b84..a11d9e2d9fe 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql @@ -66,10 +66,10 @@ drop table if exists lc_null_fix_str_1; select '-'; SELECT toLowCardinality('a') AS s, toTypeName(s), toTypeName(length(s)) from system.one; -select toLowCardinality('a') as val group by val; -select (toLowCardinality('a') as val) || 'b' group by val; -select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val; -select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) as z) group by val; +select toLowCardinality('a') as val group by val order by val; +select (toLowCardinality('a') as val) || 'b' group by val order by val; +select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val order by val; +select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) as z) group by val order by val; select '-'; drop table if exists lc_str_uuid; diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index 2510517a740..11396dd34eb 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash # Tags: long, no-s3-storage +# no-s3 because read FileOpen metric set -e diff --git a/tests/queries/0_stateless/00732_base64_functions.sql b/tests/queries/0_stateless/00732_base64_functions.sql index 3ba4180c20c..4ed86e20913 100644 --- a/tests/queries/0_stateless/00732_base64_functions.sql +++ b/tests/queries/0_stateless/00732_base64_functions.sql @@ -1,8 +1,16 @@ -- Tags: no-fasttest SET send_logs_level = 'fatal'; + SELECT base64Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); + SELECT base64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val); SELECT base64Decode(base64Encode('foo')) = 'foo', base64Encode(base64Decode('Zm9v')) == 'Zm9v'; + SELECT tryBase64Decode('Zm9vYmF=Zm9v'); -SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError 117 } \ No newline at end of file + +SELECT base64Encode(val, 'excess argument') FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); -- { serverError 42 } +SELECT base64Decode(val, 'excess argument') FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val); -- { serverError 42 } +SELECT tryBase64Decode('Zm9vYmF=Zm9v', 'excess argument'); -- { serverError 42 } + +SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError 117 } diff --git a/tests/queries/0_stateless/00800_function_java_hash.reference b/tests/queries/0_stateless/00800_function_java_hash.reference index 5e1fde8441f..db651777113 100644 --- a/tests/queries/0_stateless/00800_function_java_hash.reference +++ b/tests/queries/0_stateless/00800_function_java_hash.reference @@ -1,3 +1,13 @@ +123 +-123 +123 +-123 +123 +-123 +123 +122 +-539222985 +-539222986 96354 -676697544 138768 diff --git a/tests/queries/0_stateless/00800_function_java_hash.sql b/tests/queries/0_stateless/00800_function_java_hash.sql index ec26895ed60..fc4a0557599 100644 --- a/tests/queries/0_stateless/00800_function_java_hash.sql +++ b/tests/queries/0_stateless/00800_function_java_hash.sql @@ -1,5 +1,15 @@ -- Tags: no-fasttest +select javaHash(toInt8(123)); +select javaHash(toInt8(-123)); +select javaHash(toInt16(123)); +select javaHash(toInt16(-123)); +select javaHash(toInt32(123)); +select javaHash(toInt32(-123)); +select javaHash(toInt64(123)); +select javaHash(toInt64(-123)); +select javaHash(toInt64(12345678901)); +select javaHash(toInt64(-12345678901)); select javaHash('abc'); select javaHash('874293087'); select javaHashUTF16LE(convertCharset('a1가', 'utf-8', 'utf-16le')); diff --git a/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.reference b/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.reference new file mode 100644 index 00000000000..fd8cdc5d5bb --- /dev/null +++ b/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.reference @@ -0,0 +1,4 @@ +Not supported +Not supported +Not supported +Not supported diff --git a/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.sh b/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.sh new file mode 100755 index 00000000000..03e87a90a95 --- /dev/null +++ b/tests/queries/0_stateless/00800_function_java_hash_with_unsigined_types.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +exception_pattern='DB::Exception:' + +function check() +{ + ${CLICKHOUSE_CLIENT} -q "$1" |& { + if [[ `grep -F $exception_pattern | wc -l` -gt 0 ]] + then + echo 'Not supported' + fi + } +} + +check "SELECT javaHash(toUInt8(1))" +check "SELECT javaHash(toUInt16(1))" +check "SELECT javaHash(toUInt32(1))" +check "SELECT javaHash(toUInt64(1))" diff --git a/tests/queries/0_stateless/00829_bitmap64_function.sql b/tests/queries/0_stateless/00829_bitmap64_function.sql index c4e0293e9d5..94704d2f64e 100644 --- a/tests/queries/0_stateless/00829_bitmap64_function.sql +++ b/tests/queries/0_stateless/00829_bitmap64_function.sql @@ -20,13 +20,14 @@ INSERT INTO bitmap_state_test SELECT city_id, groupBitmapState(uid) AS uv FROM bitmap_test -GROUP BY pickup_date, city_id; +GROUP BY pickup_date, city_id +ORDER BY pickup_date, city_id; -SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date; +SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date order by pickup_date; SELECT groupBitmap( uid ) AS user_num FROM bitmap_test; -SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date; +SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date order by pickup_date; SELECT bitmapCardinality(day_today) AS today_users, @@ -37,11 +38,11 @@ SELECT bitmapXorCardinality(day_today, day_before) AS diff_users FROM ( - SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id + SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id ) js1 ALL LEFT JOIN ( - SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id + SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id ) js2 USING city_id; @@ -54,11 +55,11 @@ SELECT bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users FROM ( - SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id + SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id ) js1 ALL LEFT JOIN ( - SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id + SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id ) js2 USING city_id; @@ -68,7 +69,7 @@ SELECT count(*) FROM bitmap_test WHERE bitmapHasAny(bitmapBuild([uid]), (SELECT SELECT count(*) FROM bitmap_test WHERE 0 = bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), bitmapBuild([uid])); -SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([4294967296, 4294967297, 4294967298], 'Array(UInt64)')))) FROM bitmap_test GROUP BY city_id; +SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([4294967296, 4294967297, 4294967298], 'Array(UInt64)')))) FROM bitmap_test GROUP BY city_id ORDER BY city_id; DROP TABLE bitmap_state_test; DROP TABLE bitmap_test; diff --git a/tests/queries/0_stateless/00829_bitmap_function.sql b/tests/queries/0_stateless/00829_bitmap_function.sql index 6a21f5caf0f..420b5edcf20 100644 --- a/tests/queries/0_stateless/00829_bitmap_function.sql +++ b/tests/queries/0_stateless/00829_bitmap_function.sql @@ -20,7 +20,7 @@ INSERT INTO bitmap_test SELECT '2019-01-03', 2, number FROM numbers(1,10); SELECT groupBitmap( uid ) AS user_num FROM bitmap_test; -SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date; +SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date ORDER BY pickup_date; SELECT bitmapCardinality(day_today) AS today_users, @@ -31,11 +31,11 @@ SELECT bitmapXorCardinality(day_today, day_before) AS diff_users FROM ( - SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id + SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id ) js1 ALL LEFT JOIN ( - SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id + SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id ) js2 USING city_id; @@ -48,11 +48,11 @@ SELECT bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users FROM ( - SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id + SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id ) js1 ALL LEFT JOIN ( - SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id + SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id ) js2 USING city_id; @@ -67,7 +67,7 @@ SELECT count(*) FROM bitmap_test WHERE bitmapContains((SELECT groupBitmapState(u SELECT count(*) FROM bitmap_test WHERE 0 = bitmapContains((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), uid); -- PR#8082 -SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_test GROUP BY city_id; +SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_test GROUP BY city_id ORDER BY city_id; -- bitmap state test DROP TABLE IF EXISTS bitmap_state_test; @@ -87,7 +87,7 @@ INSERT INTO bitmap_state_test SELECT FROM bitmap_test GROUP BY pickup_date, city_id; -SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date; +SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date order by pickup_date; -- between column and expression test DROP TABLE IF EXISTS bitmap_column_expr_test; diff --git a/tests/queries/0_stateless/00961_temporary_live_view_watch.reference b/tests/queries/0_stateless/00961_temporary_live_view_watch.reference deleted file mode 100644 index 6fbbedf1b21..00000000000 --- a/tests/queries/0_stateless/00961_temporary_live_view_watch.reference +++ /dev/null @@ -1,3 +0,0 @@ -0 1 -6 2 -21 3 diff --git a/tests/queries/0_stateless/00961_temporary_live_view_watch.sql b/tests/queries/0_stateless/00961_temporary_live_view_watch.sql deleted file mode 100644 index 8bf6fa5e07b..00000000000 --- a/tests/queries/0_stateless/00961_temporary_live_view_watch.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv WITH TIMEOUT AS SELECT sum(a) FROM mt; - -WATCH lv LIMIT 0; - -INSERT INTO mt VALUES (1),(2),(3); - -WATCH lv LIMIT 0; - -INSERT INTO mt VALUES (4),(5),(6); - -WATCH lv LIMIT 0; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py index 3bc649e92dc..983b330e24a 100755 --- a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py +++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py @@ -29,7 +29,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("DROP TABLE IF EXISTS test.lv") client1.expect(prompt) client1.send( - "CREATE LIVE VIEW test.lv WITH TIMEOUT 60 AND REFRESH 1" + "CREATE LIVE VIEW test.lv WITH REFRESH 1" " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'" ) client1.expect(prompt) @@ -43,16 +43,3 @@ with client(name="client1>", log=log) as client1, client( if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) - # poll until live view table is dropped - start_time = time.time() - while True: - client1.send("SELECT * FROM test.lv FORMAT JSONEachRow") - client1.expect(prompt) - if "Table test.lv doesn't exist" in client1.before: - break - if time.time() - start_time > 90: - break - # check table is dropped - client1.send("DROP TABLE test.lv") - client1.expect("Table test.lv doesn't exist") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py b/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py deleted file mode 100755 index 0358c28bf91..00000000000 --- a/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - client1.send("WATCH test.lv") - client1.expect("_version") - client1.expect(r"0.*1" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(r"6.*2" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (4),(5),(6)") - client1.expect(r"21.*3" + end_of_block) - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.reference b/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py b/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py index bafb283e487..c8902203a3d 100755 --- a/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py +++ b/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py @@ -33,7 +33,7 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt") + client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") client1.expect(prompt) client1.send("WATCH test.lv EVENTS FORMAT CSV") client1.expect("Progress: 1.00 rows.*\)") diff --git a/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py b/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py index 3cb1220bb49..b499f673cc0 100755 --- a/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py +++ b/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py @@ -33,7 +33,7 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt") + client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") client1.expect(prompt) client1.send("WATCH test.lv") client1.expect("_version") diff --git a/tests/queries/0_stateless/00980_create_temporary_live_view.reference b/tests/queries/0_stateless/00980_create_temporary_live_view.reference deleted file mode 100644 index 49d86fc2fbf..00000000000 --- a/tests/queries/0_stateless/00980_create_temporary_live_view.reference +++ /dev/null @@ -1,4 +0,0 @@ -temporary_live_view_timeout 5 -live_view_heartbeat_interval 15 -lv -0 diff --git a/tests/queries/0_stateless/00980_create_temporary_live_view.sql b/tests/queries/0_stateless/00980_create_temporary_live_view.sql deleted file mode 100644 index e01a6d9643e..00000000000 --- a/tests/queries/0_stateless/00980_create_temporary_live_view.sql +++ /dev/null @@ -1,18 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -SELECT name, value from system.settings WHERE name = 'temporary_live_view_timeout'; -SELECT name, value from system.settings WHERE name = 'live_view_heartbeat_interval'; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv WITH TIMEOUT 1 AS SELECT sum(a) FROM mt; - -SHOW TABLES WHERE database=currentDatabase() and name LIKE 'lv'; -SELECT sleep(2); -SHOW TABLES WHERE database=currentDatabase() and name LIKE 'lv'; - -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python b/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python deleted file mode 100644 index 8ddb1a1ea81..00000000000 --- a/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 - -import subprocess -import threading -import queue as queue -import os -import sys -import signal - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_query_in_process_group(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query, '--live_view_heartbeat_interval=1', '--progress'] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) - - -def read_lines_and_push_to_queue(pipe, queue): - try: - for line in iter(pipe.readline, ''): - line = line.strip() - # print(line) - sys.stdout.flush() - queue.put(line) - except KeyboardInterrupt: - pass - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) - thread.start() - - line = q.get() - # print(line) - assert (line.endswith('0\t1')) - assert ('Progress: 0.00 rows' in line) - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - assert (line.endswith('6\t2')) - assert ('Progress: 1.00 rows' in line) - - # send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() - # line = q.get() - # print(line) - # assert (line.endswith('6\t2')) - # assert ('Progress: 1.00 rows' in line) - - # Send Ctrl+C to client. - os.killpg(os.getpgid(p.pid), signal.SIGINT) - # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() - line = q.get() - # print(line) - # assert (line is None) - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.reference b/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python b/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python deleted file mode 100644 index a417cdf2937..00000000000 --- a/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 - -import subprocess -import threading -import queue as queue -import os -import sys -import signal - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_query_in_process_group(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) - - -def read_lines_and_push_to_queue(pipe, queue): - try: - for line in iter(pipe.readline, ''): - line = line.strip() - print(line) - sys.stdout.flush() - queue.put(line) - except KeyboardInterrupt: - pass - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) - thread.start() - - line = q.get() - print(line) - assert (line == '0\t1') - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - print(line) - assert (line == '6\t2') - - send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() - line = q.get() - print(line) - assert (line == '21\t3') - - # Send Ctrl+C to client. - os.killpg(os.getpgid(p.pid), signal.SIGINT) - # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() - line = q.get() - print(line) - assert (line is None) - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference b/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference deleted file mode 100644 index 1e94cdade41..00000000000 --- a/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference +++ /dev/null @@ -1,7 +0,0 @@ -0 1 -0 1 -6 2 -6 2 -21 3 -21 3 -None diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index a4e2f380eb8..9b130f11df6 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -1,5 +1,5 @@ =DICTIONARY in Ordinary DB -CREATE DICTIONARY db_01018.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY db_01018.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) dict1 1 db_01018 dict1 @@ -12,7 +12,7 @@ db_01018 dict1 ==DROP DICTIONARY 0 =DICTIONARY in Memory DB -CREATE DICTIONARY memory_db.dict2\n(\n `key_column` UInt64 DEFAULT 0 INJECTIVE,\n `second_column` UInt8 DEFAULT 1 EXPRESSION rand() % 222,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY memory_db.dict2\n(\n `key_column` UInt64 DEFAULT 0 INJECTIVE,\n `second_column` UInt8 DEFAULT 1 EXPRESSION rand() % 222,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) dict2 1 memory_db dict2 diff --git a/tests/queries/0_stateless/01050_group_array_sample.sql b/tests/queries/0_stateless/01050_group_array_sample.sql index 8c7c6a9648c..58b9abf73d2 100644 --- a/tests/queries/0_stateless/01050_group_array_sample.sql +++ b/tests/queries/0_stateless/01050_group_array_sample.sql @@ -1,4 +1,4 @@ -select k, groupArraySample(10, 123456)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k; +select k, groupArraySample(10, 123456)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k order by k; -- different seed -select k, groupArraySample(10, 1)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k; +select k, groupArraySample(10, 1)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k order by k; diff --git a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql index 506fd23904f..566bb16b10c 100644 --- a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql +++ b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql @@ -1,4 +1,5 @@ -- Tags: no-parallel, no-s3-storage +-- With s3 policy TTL TO DISK 'default' doesn't work (because we have no default, only 's3') drop table if exists ttl; set mutations_sync = 2; diff --git a/tests/queries/0_stateless/01107_join_right_table_totals.sql b/tests/queries/0_stateless/01107_join_right_table_totals.sql index f894b6bf8bb..ad8954d5d70 100644 --- a/tests/queries/0_stateless/01107_join_right_table_totals.sql +++ b/tests/queries/0_stateless/01107_join_right_table_totals.sql @@ -3,17 +3,17 @@ CREATE TABLE t (item_id UInt64, price_sold Float32, date Date) ENGINE MergeTree SELECT item_id FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l -FULL JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) r +FULL JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) r USING (item_id); SELECT id FROM (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS) l -FULL JOIN (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS) r +FULL JOIN (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS ORDER BY item_id) r USING (id); SELECT item_id FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l -INNER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) r +INNER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) r USING (item_id); SELECT id @@ -26,75 +26,77 @@ FROM ( SELECT item_id AS id, SUM(price_sold) AS recent FROM t WHERE (date BETWEEN '2019-12-16' AND '2020-03-08') GROUP BY id WITH TOTALS + ORDER BY id ) ll FULL JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t WHERE (date BETWEEN '2018-12-17' AND '2019-03-10') GROUP BY id WITH TOTALS + ORDER BY id ) rr USING (id); SELECT id, yago FROM ( SELECT item_id AS id FROM t GROUP BY id ) AS ll -FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr +FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr USING (id); SELECT id, yago -FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ) AS ll +FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr USING (id); SELECT id, yago -FROM ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()) FROM t GROUP BY id WITH TOTALS ) AS ll +FROM ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr USING (id); SELECT id, yago FROM ( SELECT item_id AS id FROM t GROUP BY id ) AS ll -FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr +FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr USING (id); SELECT id, yago -FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ) AS ll -FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr +FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll +FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr USING (id); INSERT INTO t VALUES (1, 100, '1970-01-01'), (1, 200, '1970-01-02'); SELECT * -FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l +FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l LEFT JOIN (SELECT item_id FROM t ) r ON l.item_id = r.item_id; SELECT * -FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l +FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l RIGHT JOIN (SELECT item_id FROM t ) r ON l.item_id = r.item_id; SELECT * FROM (SELECT item_id FROM t) l -LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r +LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r ON l.item_id = r.item_id; SELECT * FROM (SELECT item_id FROM t) l -RIGHT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r +RIGHT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r ON l.item_id = r.item_id; SELECT * -FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l -LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r +FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l +LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r ON l.item_id = r.item_id; SELECT * -FROM (SELECT item_id, 'foo' AS key, 1 AS val FROM t GROUP BY item_id WITH TOTALS) l -LEFT JOIN (SELECT item_id, sum(price_sold) AS val FROM t GROUP BY item_id WITH TOTALS ) r +FROM (SELECT item_id, 'foo' AS key, 1 AS val FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l +LEFT JOIN (SELECT item_id, sum(price_sold) AS val FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r ON l.item_id = r.item_id; SELECT * -FROM (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS) l -LEFT JOIN (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ) r +FROM (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ORDER BY item_id, price_sold, date) l +LEFT JOIN (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ORDER BY item_id, price_sold, date ) r ON l.item_id = r.item_id; DROP TABLE t; diff --git a/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference b/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference index 0a935516722..69018bef2ef 100644 --- a/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference +++ b/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference @@ -1,3 +1,3 @@ World -CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64,\n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED) +CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64,\n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED) Hello diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 992c655c760..a95029de257 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -26,8 +26,8 @@ function drop_db() { while true; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") - if [[ "$database" == "$CLICKHOUSE_DATABASE" ]]; then return; fi - if [ -z "$database" ]; then return; fi + if [[ "$database" == "$CLICKHOUSE_DATABASE" ]]; then continue; fi + if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT -n --query \ "drop database if exists $database" 2>&1| grep -Fa "Exception: " sleep 0.$RANDOM @@ -38,7 +38,7 @@ function sync_db() { while true; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") - if [ -z "$database" ]; then return; fi + if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --receive_timeout=1 -q \ "system sync database replica $database" 2>&1| grep -Fa "Exception: " | grep -Fv TIMEOUT_EXCEEDED | grep -Fv "only with Replicated engine" | grep -Fv UNKNOWN_DATABASE sleep 0.$RANDOM @@ -49,7 +49,7 @@ function create_table() { while true; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") - if [ -z "$database" ]; then return; fi + if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ "create table $database.rmt_${RANDOM}_${RANDOM}_${RANDOM} (n int) engine=ReplicatedMergeTree order by tuple() -- suppress $CLICKHOUSE_TEST_ZOOKEEPER_PREFIX" \ 2>&1| grep -Fa "Exception: " | grep -Fv "Macro 'uuid' and empty arguments" | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE @@ -61,9 +61,9 @@ function alter_table() { while true; do table=$($CLICKHOUSE_CLIENT -q "select database || '.' || name from system.tables where database like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") - if [ -z "$table" ]; then return; fi + if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ - "alter table $table on cluster $database update n = n + (select max(n) from merge(REGEXP('${CLICKHOUSE_DATABASE}.*'), '.*')) where 1 settings allow_nondeterministic_mutations=1" \ + "alter table $table update n = n + (select max(n) from merge(REGEXP('${CLICKHOUSE_DATABASE}.*'), '.*')) where 1 settings allow_nondeterministic_mutations=1" \ 2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY sleep 0.$RANDOM done @@ -73,7 +73,7 @@ function insert() { while true; do table=$($CLICKHOUSE_CLIENT -q "select database || '.' || name from system.tables where database like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") - if [ -z "$table" ]; then return; fi + if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT -q \ "insert into $table values ($RANDOM)" 2>&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY done diff --git a/tests/queries/0_stateless/01190_full_attach_syntax.reference b/tests/queries/0_stateless/01190_full_attach_syntax.reference index 9d74a8cb3ce..f924c2ec780 100644 --- a/tests/queries/0_stateless/01190_full_attach_syntax.reference +++ b/tests/queries/0_stateless/01190_full_attach_syntax.reference @@ -1,5 +1,5 @@ -CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) -CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) CREATE TABLE test_01190.log\n(\n `s` String\n)\nENGINE = Log CREATE TABLE test_01190.log\n(\n `s` String\n)\nENGINE = Log test diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.reference b/tests/queries/0_stateless/01192_rename_database_zookeeper.reference index 5b430f0a5b1..13f2a780e0b 100644 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.reference +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.reference @@ -14,7 +14,7 @@ renamed 10 45 10 45 ok -CREATE DICTIONARY test_01192_atomic.dict UUID \'00001192-0000-4000-8000-000000000002\'\n(\n `n` UInt64,\n `_part` String DEFAULT \'no\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'mt\' DB \'test_01192\'))\nLAYOUT(DIRECT()) +CREATE DICTIONARY test_01192_atomic.dict UUID \'00001192-0000-4000-8000-000000000002\'\n(\n `n` UInt64,\n `_part` String DEFAULT \'no\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'mt\' DB \'test_01192\'))\nLAYOUT(DIRECT()) test_01192_atomic dict NOT_LOADED 00001192-0000-4000-8000-000000000002 no ok diff --git a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference index 5a05edcad58..d80501b3f4d 100644 --- a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference +++ b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference @@ -6,7 +6,7 @@ CREATE DICTIONARY dict_db_01224.dict `val` UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()) NOT_LOADED @@ -17,7 +17,7 @@ CREATE TABLE dict_db_01224_dictionary.`dict_db_01224.dict` ) ENGINE = Dictionary(`dict_db_01224.dict`) NOT_LOADED -Dictionary 1 CREATE DICTIONARY dict_db_01224.dict (`key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'dict_data\' PASSWORD \'\' DB \'dict_db_01224\')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()) +Dictionary 1 CREATE DICTIONARY dict_db_01224.dict (`key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'dict_data\' PASSWORD \'\' DB \'dict_db_01224\')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()) NOT_LOADED key UInt64 val UInt64 diff --git a/tests/queries/0_stateless/01259_combinator_distinct.sql b/tests/queries/0_stateless/01259_combinator_distinct.sql index 879c95a61fe..543538be0d8 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.sql +++ b/tests/queries/0_stateless/01259_combinator_distinct.sql @@ -5,7 +5,7 @@ SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM numb SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM numbers(1000)); SELECT round(corrStable(x, y), 5) FROM (SELECT DISTINCT number % 10 AS x, number % 5 AS y FROM numbers(1000)); -SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x; +SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x ORDER BY x; SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000); EXPLAIN SYNTAX SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000); diff --git a/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference b/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference index d1e2cba5663..48d8b4ee8a1 100644 --- a/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference +++ b/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference @@ -1 +1 @@ -CREATE USER user IDENTIFIED WITH plaintext_password BY 'hello' +CREATE USER user IDENTIFIED WITH plaintext_password BY 'hello' diff --git a/tests/queries/0_stateless/01318_decrypt.reference b/tests/queries/0_stateless/01318_decrypt.reference index cabb7bb9b83..e8584acab40 100644 --- a/tests/queries/0_stateless/01318_decrypt.reference +++ b/tests/queries/0_stateless/01318_decrypt.reference @@ -87,3 +87,7 @@ aes-256-gcm 1 aes-256-gcm 1 aes-256-gcm 1 F56E87055BC32D0EEB31B2EACC2BF2A5 1 +2022-09-02 00:00:00 2 +2022-08-02 00:00:00 1 \N +2022-09-02 00:00:00 2 value2 +2022-09-02 00:00:01 3 \N diff --git a/tests/queries/0_stateless/01318_decrypt.sql b/tests/queries/0_stateless/01318_decrypt.sql index 565fbd02e0a..8cd1414d11b 100644 --- a/tests/queries/0_stateless/01318_decrypt.sql +++ b/tests/queries/0_stateless/01318_decrypt.sql @@ -129,4 +129,18 @@ SELECT hex(decrypt('aes-256-gcm', concat(ciphertext, tag), key, iv, aad)) as plaintext_actual, plaintext_actual = hex(plaintext); +-- tryDecrypt +CREATE TABLE decrypt_null ( + dt DateTime, + user_id UInt32, + encrypted String, + iv String +) ENGINE = Memory; + +INSERT INTO decrypt_null VALUES ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'), ('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'), ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3'); + +SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2'); --{serverError 454} +SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2'); +SELECT dt, user_id, (tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv)) as value FROM decrypt_null ORDER BY user_id; + DROP TABLE encryption_test; diff --git a/tests/queries/0_stateless/01356_state_resample.sql b/tests/queries/0_stateless/01356_state_resample.sql index 6be28e19d87..a3fb4d59afd 100644 --- a/tests/queries/0_stateless/01356_state_resample.sql +++ b/tests/queries/0_stateless/01356_state_resample.sql @@ -2,7 +2,7 @@ select sumResample(0, 20, 1)(number, number % 20) from numbers(200); select arrayMap(x -> finalizeAggregation(x), state) from (select sumStateResample(0, 20, 1)(number, number % 20) as state from numbers(200)); select arrayMap(x -> finalizeAggregation(x), state) from ( - select sumStateResample(0,20,1)(number, number%20) as state from numbers(200) group by number % 3 + select sumStateResample(0,20,1)(number, number%20) as state from numbers(200) group by number % 3 order by number % 3 ); select groupArrayResample(0, 20, 1)(number, number % 20) from numbers(50); diff --git a/tests/queries/0_stateless/01441_array_combinator.sql b/tests/queries/0_stateless/01441_array_combinator.sql index 68fd050940d..75a511f8461 100644 --- a/tests/queries/0_stateless/01441_array_combinator.sql +++ b/tests/queries/0_stateless/01441_array_combinator.sql @@ -1 +1 @@ -SELECT number % 100 AS k, sumArray(emptyArrayUInt8()) AS v FROM numbers(10) GROUP BY k; +SELECT number % 100 AS k, sumArray(emptyArrayUInt8()) AS v FROM numbers(10) GROUP BY k ORDER BY k; diff --git a/tests/queries/0_stateless/01472_many_rows_in_totals.sql b/tests/queries/0_stateless/01472_many_rows_in_totals.sql index 4869e264d76..d79d189a28d 100644 --- a/tests/queries/0_stateless/01472_many_rows_in_totals.sql +++ b/tests/queries/0_stateless/01472_many_rows_in_totals.sql @@ -1,9 +1,9 @@ set output_format_write_statistics = 0; -select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format Pretty; +select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format Pretty; select '--'; -select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format TSV; +select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format TSV; select '--'; -select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format JSON; +select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format JSON; select '--'; diff --git a/tests/queries/0_stateless/01509_dictionary_preallocate.reference b/tests/queries/0_stateless/01509_dictionary_preallocate.reference index 2f1e1d2c386..fe42689bc81 100644 --- a/tests/queries/0_stateless/01509_dictionary_preallocate.reference +++ b/tests/queries/0_stateless/01509_dictionary_preallocate.reference @@ -1,5 +1,5 @@ -CREATE DICTIONARY default.dict_01509\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 0)) -CREATE DICTIONARY default.dict_01509_preallocate\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 1)) +CREATE DICTIONARY default.dict_01509\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 0)) +CREATE DICTIONARY default.dict_01509_preallocate\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 1)) HashedDictionary: Preallocated 10000 elements - 0 diff --git a/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh b/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh index 55b6110918b..9325cac0ae6 100755 --- a/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh +++ b/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash -# Tags: long, no-replicated-database, no-s3-storage +# Tags: long, no-replicated-database # Tag no-replicated-database: Fails due to additional replicas or shards -# Tag no-s3-storage: Merge assigned to replica 2, but replication queues are stopped for it set -e diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index a61f13fc807..94d81c110cb 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -1,5 +1,5 @@ -- Tags: no-s3-storage --- Temporary supressed +-- no-s3 because read FileOpen metric DROP TABLE IF EXISTS nested; SET flatten_nested = 0; diff --git a/tests/queries/0_stateless/01660_system_parts_smoke.sql b/tests/queries/0_stateless/01660_system_parts_smoke.sql index 8a1b0a12f81..cc925680425 100644 --- a/tests/queries/0_stateless/01660_system_parts_smoke.sql +++ b/tests/queries/0_stateless/01660_system_parts_smoke.sql @@ -28,7 +28,7 @@ SELECT name, active FROM system.parts WHERE database = currentDatabase() AND tab SELECT '# optimize'; SYSTEM START MERGES data_01660; OPTIMIZE TABLE data_01660 FINAL; -SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state; +SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state ORDER BY _state; -- TRUNCATE does not remove parts instantly SELECT '# truncate'; diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference index 75435aebd67..aee06a3350a 100644 --- a/tests/queries/0_stateless/01691_DateTime64_clamp.reference +++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference @@ -10,11 +10,11 @@ SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Asia/Istanbul')); 2020-01-01 00:00:00 SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Asia/Istanbul') FORMAT Null; SELECT toTimeZone(toDateTime(-2., 2), 'Asia/Istanbul'); -1970-01-01 02:00:00.00 +1970-01-01 01:59:58.00 SELECT toDateTime64(-2., 2, 'Asia/Istanbul'); -1970-01-01 02:00:00.00 +1970-01-01 01:59:58.00 SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul'); -2106-02-07 09:28:16.00 +2242-03-16 15:56:32.00 SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul') FORMAT Null; -- These are outsize of extended range and hence clamped SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/01710_projections.reference b/tests/queries/0_stateless/01710_projections.reference index 1e4f659c639..578f7523830 100644 --- a/tests/queries/0_stateless/01710_projections.reference +++ b/tests/queries/0_stateless/01710_projections.reference @@ -3,5 +3,4 @@ 2020-10-24 00:00:00 1.3619605237696326 0.16794469697335793 0.7637956767025532 0.8899329799574005 0.6227685185389797 0.30795997278638165 0.7637956767025532 2020-10-24 00:00:00 19 -1.9455094931672063 0.7759802460082872 0.6 0 2020-10-24 00:00:00 852 894 -2 -1 999 diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.reference b/tests/queries/0_stateless/01734_datetime64_from_float.reference index 4021b6451aa..b8baabccd2b 100644 --- a/tests/queries/0_stateless/01734_datetime64_from_float.reference +++ b/tests/queries/0_stateless/01734_datetime64_from_float.reference @@ -1,3 +1,13 @@ 2005-03-18 03:58:31.222 2005-03-18 03:58:31.222 2005-03-18 03:58:31.222 +1970-01-01 00:00:00.000000000 +1970-01-01 00:00:00.000000000 +1900-04-15 00:53:20.000000000 +1900-04-15 00:53:20.000000000 +1900-01-01 00:00:00.000000000 +1900-01-01 00:00:00.000000000 +1900-01-01 00:00:00.000000000 +1900-01-01 00:00:00.000000000 +2261-07-15 11:33:20.000000000 +2261-07-15 11:33:20.000000000 diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.sql b/tests/queries/0_stateless/01734_datetime64_from_float.sql index bb837c681e3..c4290a0cadb 100644 --- a/tests/queries/0_stateless/01734_datetime64_from_float.sql +++ b/tests/queries/0_stateless/01734_datetime64_from_float.sql @@ -1,3 +1,22 @@ SELECT CAST(1111111111.222 AS DateTime64(3, 'Asia/Istanbul')); SELECT toDateTime(1111111111.222, 3, 'Asia/Istanbul'); SELECT toDateTime64(1111111111.222, 3, 'Asia/Istanbul'); + +SELECT toDateTime64(0.0, 9, 'UTC') ; +SELECT toDateTime64(0, 9, 'UTC'); + +SELECT toDateTime64(-2200000000.0, 9, 'UTC'); -- 1900-01-01 < value +SELECT toDateTime64(-2200000000, 9, 'UTC'); + +SELECT toDateTime64(-2300000000.0, 9, 'UTC'); -- value < 1900-01-01 +SELECT toDateTime64(-2300000000, 9, 'UTC'); + +SELECT toDateTime64(-999999999999.0, 9, 'UTC'); -- value << 1900-01-01 +SELECT toDateTime64(-999999999999, 9, 'UTC'); + +SELECT toDateTime64(9200000000.0, 9, 'UTC'); -- value < 2262-04-11 +SELECT toDateTime64(9200000000, 9, 'UTC'); + +SELECT toDateTime64(9300000000.0, 9, 'UTC'); -- { serverError 407 } # 2262-04-11 < value +SELECT toDateTime64(9300000000, 9, 'UTC'); -- { serverError 407 } + diff --git a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql index 11ecaf1ca2e..050aa33464e 100644 --- a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql +++ b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql @@ -47,5 +47,5 @@ SELECT field2 FROM agg_view01747 WHERE field1 = 'test'; drop table summing_table01747; drop view rates01747; drop view agg_view01747; -drop table dictst01747; drop DICTIONARY default.dict01747; +drop table dictst01747; diff --git a/tests/queries/0_stateless/01780_column_sparse.reference b/tests/queries/0_stateless/01780_column_sparse.reference index 1cbcf715d7f..3393361a19f 100644 --- a/tests/queries/0_stateless/01780_column_sparse.reference +++ b/tests/queries/0_stateless/01780_column_sparse.reference @@ -124,14 +124,14 @@ SELECT * FROM t_sparse WHERE arr2 != [] ORDER BY id; 192 0 [] [0,1] SELECT sum(u) FROM t_sparse; 1900 -SELECT sum(u) FROM t_sparse GROUP BY id % 7; -210 -360 -300 -240 -190 -330 -270 +SELECT id % 7, sum(u) FROM t_sparse GROUP BY id % 7 ORDER BY id % 7; +0 210 +1 360 +2 300 +3 240 +4 190 +5 330 +6 270 SELECT arrayFilter(x -> x % 2 = 1, arr2) FROM t_sparse WHERE arr2 != [] LIMIT 5; [1] [1,3] diff --git a/tests/queries/0_stateless/01780_column_sparse.sql b/tests/queries/0_stateless/01780_column_sparse.sql index 25cb2845322..8e3c4372d05 100644 --- a/tests/queries/0_stateless/01780_column_sparse.sql +++ b/tests/queries/0_stateless/01780_column_sparse.sql @@ -25,7 +25,7 @@ SELECT * FROM t_sparse WHERE arr1 != [] ORDER BY id; SELECT * FROM t_sparse WHERE arr2 != [] ORDER BY id; SELECT sum(u) FROM t_sparse; -SELECT sum(u) FROM t_sparse GROUP BY id % 7; +SELECT id % 7, sum(u) FROM t_sparse GROUP BY id % 7 ORDER BY id % 7; SELECT arrayFilter(x -> x % 2 = 1, arr2) FROM t_sparse WHERE arr2 != [] LIMIT 5; diff --git a/tests/queries/0_stateless/01921_datatype_date32.reference b/tests/queries/0_stateless/01921_datatype_date32.reference index b5bf4e06a4c..dcfc193e119 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.reference +++ b/tests/queries/0_stateless/01921_datatype_date32.reference @@ -136,7 +136,7 @@ 61885 61885 17216 -17217 +17218 2686 -------toRelativeDayNum--------- 39969 diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference b/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference index d4cb1477ed6..dddab828a25 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference @@ -95,4 +95,5 @@ FROM FROM numbers(10) ) GROUP BY b +ORDER BY b ASC 1 1 1 diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql index a4729e89755..1f08439c0b6 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -58,8 +58,8 @@ SELECT quantileBFloat16Weighted(0.2)(d, 1), quantileBFloat16Weighted(0.3)(d, 1), EXPLAIN SYNTAX SELECT quantile(0.2)(d) as k, quantile(0.3)(d) FROM datetime order by quantile(0.2)(d); -SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b; -EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b; +SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b; +EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b; -- fuzzer SELECT quantileDeterministic(0.99)(1023) FROM datetime FORMAT Null; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/02024_create_dictionary_with_comment.reference b/tests/queries/0_stateless/02024_create_dictionary_with_comment.reference index 45f2c41f0b0..dd04f942e41 100644 --- a/tests/queries/0_stateless/02024_create_dictionary_with_comment.reference +++ b/tests/queries/0_stateless/02024_create_dictionary_with_comment.reference @@ -1,2 +1,2 @@ -CREATE DICTIONARY default.`2024_dictionary_with_comment`\n(\n `id` UInt64,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'source_table\'))\nLIFETIME(MIN 0 MAX 1000)\nLAYOUT(FLAT())\nCOMMENT \'Test dictionary with comment\' +CREATE DICTIONARY default.`2024_dictionary_with_comment`\n(\n `id` UInt64,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'source_table\'))\nLIFETIME(MIN 0 MAX 1000)\nLAYOUT(FLAT())\nCOMMENT \'Test dictionary with comment\' Test dictionary with comment diff --git a/tests/queries/0_stateless/02029_test_options_requests.reference b/tests/queries/0_stateless/02029_test_options_requests.reference index 8b001eacbe3..ddf02c99a85 100644 --- a/tests/queries/0_stateless/02029_test_options_requests.reference +++ b/tests/queries/0_stateless/02029_test_options_requests.reference @@ -3,3 +3,8 @@ < Access-Control-Allow-Headers: origin, x-requested-with < Access-Control-Allow-Methods: POST, GET, OPTIONS < Access-Control-Max-Age: 86400 +< HTTP/1.1 403 Forbidden +< Access-Control-Allow-Origin: * +< Access-Control-Allow-Headers: origin, x-requested-with +< Access-Control-Allow-Methods: POST, GET, OPTIONS +< Access-Control-Max-Age: 86400 diff --git a/tests/queries/0_stateless/02029_test_options_requests.sh b/tests/queries/0_stateless/02029_test_options_requests.sh index 8ea09e3ffe6..136e2d13a58 100755 --- a/tests/queries/0_stateless/02029_test_options_requests.sh +++ b/tests/queries/0_stateless/02029_test_options_requests.sh @@ -6,3 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # grep all fields, that should be set for CORS support (see CORS.xml) $CLICKHOUSE_CURL "${CLICKHOUSE_URL}" -X OPTIONS -vs 2>&1 | grep -E "HTTP/1.1 204 No Content|Access-Control-Allow-Origin|Access-Control-Allow-Headers|Access-Control-Allow-Methods|Access-Control-Max-Age" + +# grep all fields, that should be set for CORS support (see CORS.xml) +echo 'SELECT 1' | $CLICKHOUSE_CURL -X POST -H 'Origin: clickhouse-test' "${CLICKHOUSE_URL}&password=wrong_password" --data @- -vs 2>&1 | grep -E "HTTP/1.1 403 Forbidden|Access-Control-Allow-Origin|Access-Control-Allow-Headers|Access-Control-Allow-Methods|Access-Control-Max-Age" diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 02a0d339e3a..ad27b86c6f5 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -12,7 +12,6 @@ CREATE TABLE system.asynchronous_inserts `table` String, `format` String, `first_update` DateTime64(6), - `last_update` DateTime64(6), `total_bytes` UInt64, `entries.query_id` Array(String), `entries.bytes` Array(UInt64), @@ -184,6 +183,7 @@ CREATE TABLE system.disks `path` String, `free_space` UInt64, `total_space` UInt64, + `unreserved_space` UInt64, `keep_free_space` UInt64, `type` String, `is_encrypted` UInt8, diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 4116453b69a..59719f75d7c 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -1,4 +1,10 @@ #!/usr/bin/env bash +# Tags: no-debug + +# no-debug: Query is canceled by timeout after max_execution_time, +# but sending an exception to the client may hang +# for more than MAX_PROCESS_WAIT seconds in a slow debug build, +# and test will fail. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference index 437b934c28c..8a7291c1f08 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference @@ -8,11 +8,13 @@ ExpressionTransform (Limit) Limit (Sorting) - MergingSortedTransform 2 → 1 - (Expression) - ExpressionTransform × 2 - (ReadFromMergeTree) - MergeTreeInOrder × 2 0 → 1 + FinishSortingTransform + PartialSortingTransform + MergingSortedTransform 2 → 1 + (Expression) + ExpressionTransform × 2 + (ReadFromMergeTree) + MergeTreeInOrder × 2 0 → 1 2020-10-01 9 2020-10-01 9 2020-10-01 9 @@ -23,14 +25,16 @@ ExpressionTransform (Limit) Limit (Sorting) - MergingSortedTransform 2 → 1 - (Expression) - ExpressionTransform × 2 - (ReadFromMergeTree) - ReverseTransform - MergeTreeReverse 0 → 1 + FinishSortingTransform + PartialSortingTransform + MergingSortedTransform 2 → 1 + (Expression) + ExpressionTransform × 2 + (ReadFromMergeTree) ReverseTransform MergeTreeReverse 0 → 1 + ReverseTransform + MergeTreeReverse 0 → 1 2020-10-01 9 2020-10-01 9 2020-10-01 9 diff --git a/tests/queries/0_stateless/02251_last_day_of_month.reference b/tests/queries/0_stateless/02251_last_day_of_month.reference index 0b83aff1e42..cf2dce76cc3 100644 --- a/tests/queries/0_stateless/02251_last_day_of_month.reference +++ b/tests/queries/0_stateless/02251_last_day_of_month.reference @@ -5,3 +5,4 @@ 2021-12-31 2021-12-31 2021-12-31 2020-12-31 2020-12-31 2020-12-31 2020-12-31 2020-12-31 +1970-01-31 1970-01-31 1900-01-31 diff --git a/tests/queries/0_stateless/02251_last_day_of_month.sql b/tests/queries/0_stateless/02251_last_day_of_month.sql index 1261f051e17..dc7a076c07d 100644 --- a/tests/queries/0_stateless/02251_last_day_of_month.sql +++ b/tests/queries/0_stateless/02251_last_day_of_month.sql @@ -44,3 +44,12 @@ SELECT toLastDayOfMonth(date_value), toLastDayOfMonth(date_time_value), toLastDa WITH toDate('2020-12-12') AS date_value SELECT last_day(date_value), LAST_DAY(date_value); + +-- boundaries +WITH + toDate('1970-01-01') AS date_value, + toDateTime('1970-01-01 11:22:33') AS date_time_value, + toDateTime64('1900-01-01 11:22:33', 3) AS date_time_64_value +SELECT toLastDayOfMonth(date_value), toLastDayOfMonth(date_time_value), toLastDayOfMonth(date_time_64_value) +SETTINGS enable_extended_results_for_datetime_functions = true; + diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference index 628c2fc0714..26232d1e281 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference @@ -110,3 +110,10 @@ select distinct a, b, x, y from (select a, b, 1 as x, 2 as y from distinct_in_or 0 -- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct 0 +-- bug 42185, distinct in order and empty sort description +-- distinct in order, sorting key tuple() +1 +0 +-- distinct in order, sorting key contains function +2000-01-01 00:00:00 +2000-01-01 diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql index a1e7d7340a3..a794709caba 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql @@ -95,3 +95,23 @@ select count() as diff from (select distinct * from distinct_in_order except sel drop table if exists distinct_in_order; drop table if exists ordinary_distinct; drop table if exists distinct_cardinality_low; + +-- bug 42185 +drop table if exists sorting_key_empty_tuple; +drop table if exists sorting_key_contain_function; + +select '-- bug 42185, distinct in order and empty sort description'; +select '-- distinct in order, sorting key tuple()'; +create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple(); +insert into sorting_key_empty_tuple select number % 2, number % 5 from numbers(1,10); +select distinct a from sorting_key_empty_tuple; + +select '-- distinct in order, sorting key contains function'; +create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime)); +insert into sorting_key_contain_function values ('2000-01-01', 1); +insert into sorting_key_contain_function values ('2000-01-01', 2); +select distinct datetime from sorting_key_contain_function; +select distinct toDate(datetime) from sorting_key_contain_function; + +drop table sorting_key_empty_tuple; +drop table sorting_key_contain_function; diff --git a/tests/queries/0_stateless/02337_base58.reference b/tests/queries/0_stateless/02337_base58.reference index 20b9124c150..a51eeec7bd1 100644 --- a/tests/queries/0_stateless/02337_base58.reference +++ b/tests/queries/0_stateless/02337_base58.reference @@ -8,6 +8,22 @@ fooba foobar Hello world! +f +fo +foo +foob +fooba +foobar +Hello world! + + +foob + + + +foobar + + 2m 8o8 bQbp diff --git a/tests/queries/0_stateless/02337_base58.sql b/tests/queries/0_stateless/02337_base58.sql index 3d6da61c50a..416f975ecf6 100644 --- a/tests/queries/0_stateless/02337_base58.sql +++ b/tests/queries/0_stateless/02337_base58.sql @@ -5,6 +5,8 @@ SELECT base58Encode('Hold my beer...', 'Second arg'); -- { serverError 42 } SELECT base58Decode('Hold my beer...'); -- { serverError 36 } SELECT base58Decode(encoded) FROM (SELECT base58Encode(val) as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); +SELECT tryBase58Decode(encoded) FROM (SELECT base58Encode(val) as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); +SELECT tryBase58Decode(val) FROM (SELECT arrayJoin(['Hold my beer', 'Hold another beer', '3csAg9', 'And a wine', 'And another wine', 'And a lemonade', 't1Zv2yaZ', 'And another wine']) val); SELECT base58Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ', '']) val); diff --git a/tests/queries/0_stateless/02343_read_from_s3_compressed_blocks.sql b/tests/queries/0_stateless/02343_read_from_s3_compressed_blocks.sql index 03e32d32497..4049cb7b382 100644 --- a/tests/queries/0_stateless/02343_read_from_s3_compressed_blocks.sql +++ b/tests/queries/0_stateless/02343_read_from_s3_compressed_blocks.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage +-- Tags: no-parallel, no-fasttest DROP TABLE IF EXISTS t_s3_compressed_blocks; diff --git a/tests/queries/0_stateless/02347_rank_corr_size_overflow.reference b/tests/queries/0_stateless/02347_rank_corr_size_overflow.reference new file mode 100644 index 00000000000..3a2e3f4984a --- /dev/null +++ b/tests/queries/0_stateless/02347_rank_corr_size_overflow.reference @@ -0,0 +1 @@ +-1 diff --git a/tests/queries/0_stateless/02347_rank_corr_size_overflow.sql b/tests/queries/0_stateless/02347_rank_corr_size_overflow.sql new file mode 100644 index 00000000000..3ca1ced8dd5 --- /dev/null +++ b/tests/queries/0_stateless/02347_rank_corr_size_overflow.sql @@ -0,0 +1 @@ +SELECT round(rankCorr(number, -number)) FROM numbers(5000000); diff --git a/tests/queries/0_stateless/02369_lost_part_intersecting_merges.reference b/tests/queries/0_stateless/02369_lost_part_intersecting_merges.reference new file mode 100644 index 00000000000..954a1f2c345 --- /dev/null +++ b/tests/queries/0_stateless/02369_lost_part_intersecting_merges.reference @@ -0,0 +1,11 @@ +1 1 all_0_1_1 +1 2 all_0_1_1 +2 1 all_0_0_0 +2 2 all_1_1_0 +0 +3 1 all_0_2_2 +3 2 all_0_2_2 +3 3 all_0_2_2 +4 1 all_0_2_2 +4 2 all_0_2_2 +4 3 all_0_2_2 diff --git a/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh b/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh new file mode 100755 index 00000000000..11caf1e45de --- /dev/null +++ b/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists rmt1;" +$CLICKHOUSE_CLIENT -q "drop table if exists rmt2;" + +$CLICKHOUSE_CLIENT -q "create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{database}', '1') order by n;" +$CLICKHOUSE_CLIENT -q "create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{database}', '2') order by n;" + +$CLICKHOUSE_CLIENT -q "insert into rmt1 values (1);" +$CLICKHOUSE_CLIENT -q "insert into rmt1 values (2);" + +$CLICKHOUSE_CLIENT -q "system sync replica rmt1;" +$CLICKHOUSE_CLIENT -q "system sync replica rmt2;" +$CLICKHOUSE_CLIENT -q "system stop merges rmt2;" +$CLICKHOUSE_CLIENT -q "optimize table rmt1 final;" + +$CLICKHOUSE_CLIENT -q "select 1, *, _part from rmt1 order by n;" +$CLICKHOUSE_CLIENT -q "select 2, *, _part from rmt2 order by n;" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt1' and name='all_0_1_1'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -rf $path + +$CLICKHOUSE_CLIENT -q "select * from rmt1;" 2>/dev/null + +$CLICKHOUSE_CLIENT -q "detach table rmt1;" +$CLICKHOUSE_CLIENT -q "attach table rmt1;" + +$CLICKHOUSE_CLIENT -q "insert into rmt1 values (3);" +$CLICKHOUSE_CLIENT -q "system start merges rmt2;" +$CLICKHOUSE_CLIENT -q "system sync replica rmt1;" +$CLICKHOUSE_CLIENT -q "optimize table rmt1 final;" + +$CLICKHOUSE_CLIENT -q "system sync replica rmt1;" +$CLICKHOUSE_CLIENT -q "system sync replica rmt2;" +$CLICKHOUSE_CLIENT -q "select 3, *, _part from rmt1 order by n;" +$CLICKHOUSE_CLIENT -q "select 4, *, _part from rmt2 order by n;" + +$CLICKHOUSE_CLIENT -q "drop table rmt1;" +$CLICKHOUSE_CLIENT -q "drop table rmt2;" diff --git a/tests/queries/0_stateless/02396_system_parts_race_condition_rm.reference b/tests/queries/0_stateless/02396_system_parts_race_condition_rm.reference new file mode 100644 index 00000000000..c3165c3d6ef --- /dev/null +++ b/tests/queries/0_stateless/02396_system_parts_race_condition_rm.reference @@ -0,0 +1,2 @@ +Replication did not hang: synced all replicas of alter_table +Consistency: 1 diff --git a/tests/queries/0_stateless/02396_system_parts_race_condition_rm.sh b/tests/queries/0_stateless/02396_system_parts_race_condition_rm.sh new file mode 100755 index 00000000000..5df1a9ba095 --- /dev/null +++ b/tests/queries/0_stateless/02396_system_parts_race_condition_rm.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Tags: race, zookeeper, no-parallel, disabled + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + +set -e + +# NOTE this test is copy of 00992_system_parts_race_condition_zookeeper_long, but with extra thread7 + +$CLICKHOUSE_CLIENT -n -q " + DROP TABLE IF EXISTS alter_table0; + DROP TABLE IF EXISTS alter_table1; + + CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0; + CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0 +" + +function thread1() +{ + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted + while true; do $CLICKHOUSE_CLIENT --query "SELECT * FROM system.parts FORMAT Null"; done +} + +function thread2() +{ + while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table0 ADD COLUMN h String DEFAULT '0'; ALTER TABLE alter_table0 MODIFY COLUMN h UInt64; ALTER TABLE alter_table0 DROP COLUMN h;"; done +} + +function thread3() +{ + while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table0 SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done +} + +function thread4() +{ + while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"; done +} + +function thread5() +{ + while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; done +} + +function thread7() +{ + while true; do + path=$($CLICKHOUSE_CLIENT -q "SELECT path FROM system.parts WHERE database='$CLICKHOUSE_DATABASE' AND table LIKE 'alter_table%' ORDER BY rand() LIMIT 1") + if [ -z "$path" ]; then continue; fi + # ensure that path is absolute before removing + $CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path') format Null" || exit + rm -rf $path 2> /dev/null + sleep 0.$RANDOM; + done +} + +# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout +export -f thread1; +export -f thread2; +export -f thread3; +export -f thread4; +export -f thread5; +export -f thread7; + +TIMEOUT=10 + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & + +timeout $TIMEOUT bash -c thread7 & + +wait +check_replication_consistency "alter_table" "count(), sum(a), sum(b), round(sum(c))" + +$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table0;" 2> >(grep -F -v 'is already started to be removing by another replica right now') & +$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table1;" 2> >(grep -F -v 'is already started to be removing by another replica right now') & +wait diff --git a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.reference b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.reference new file mode 100644 index 00000000000..6e705f05f04 --- /dev/null +++ b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.reference @@ -0,0 +1,2 @@ +Replication did not hang: synced all replicas of alter_table_ +Consistency: 1 diff --git a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh new file mode 100755 index 00000000000..2372d30497e --- /dev/null +++ b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash +# Tags: race, zookeeper, no-parallel, no-backward-compatibility-check, disabled + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + +set -e + +# NOTE this test is copy of 00993_system_parts_race_condition_drop_zookeeper, but with extra thread7 + +function thread1() +{ + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted + while true; do + $CLICKHOUSE_CLIENT --query "SELECT * FROM system.parts FORMAT Null"; + done +} + +function thread2() +{ + while true; do + REPLICA=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table_$REPLICA ADD COLUMN h String '0'; ALTER TABLE alter_table_$REPLICA MODIFY COLUMN h UInt64; ALTER TABLE alter_table_$REPLICA DROP COLUMN h;"; + done +} + +function thread3() +{ + while true; do + REPLICA=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table_$REPLICA SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; + done +} + +function thread4() +{ + while true; do + REPLICA=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table_$REPLICA FINAL"; + sleep 0.$RANDOM; + done +} + +function thread5() +{ + while true; do + REPLICA=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table_$REPLICA DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; + sleep 0.$RANDOM; + done +} + +function thread6() +{ + while true; do + REPLICA=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS alter_table_$REPLICA; + CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;"; + sleep 0.$RANDOM; + done +} + +function thread7() +{ + while true; do + path=$($CLICKHOUSE_CLIENT -q "SELECT path FROM system.parts WHERE database='$CLICKHOUSE_DATABASE' AND table LIKE 'alter_table_%' ORDER BY rand() LIMIT 1") + if [ -z "$path" ]; then continue; fi + # ensure that path is absolute before removing + $CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path') format Null" || exit + rm -rf $path 2> /dev/null + sleep 0.$RANDOM; + done +} + +# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout +export -f thread1; +export -f thread2; +export -f thread3; +export -f thread4; +export -f thread5; +export -f thread6; +export -f thread7; + +TIMEOUT=15 + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & +timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & +timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & +timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" & + +timeout $TIMEOUT bash -c thread1 2> /dev/null & +timeout $TIMEOUT bash -c thread2 2> /dev/null & +timeout $TIMEOUT bash -c thread3 2> /dev/null & +timeout $TIMEOUT bash -c thread4 2> /dev/null & +timeout $TIMEOUT bash -c thread5 2> /dev/null & +timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" & + +timeout $TIMEOUT bash -c thread7 & + +wait + +check_replication_consistency "alter_table_" "count(), sum(a), sum(b), round(sum(c))" + +for i in {0..9}; do + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS alter_table_$i" 2>&1 | grep "was not completely removed from ZooKeeper" & +done + +wait diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.python b/tests/queries/0_stateless/02403_big_http_chunk_size.python index 72aa4da0bac..4e2e97e487b 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.python +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.python @@ -13,7 +13,7 @@ def main(): sock = socket(AF_INET, SOCK_STREAM) sock.connect((host, port)) - sock.settimeout(5) + sock.settimeout(60) s = "POST /play HTTP/1.1\r\n" s += "Host: %s\r\n" % host s += "Content-type: multipart/form-data\r\n" diff --git a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference index aa950215f59..5773810bf64 100644 --- a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference +++ b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference @@ -26,6 +26,22 @@ toLastDayOfMonth;toDate32;true 1920-02-29 type;toLastDayOfMonth;toDate32;true Date32 toLastDayOfMonth;toDateTime64;true 1920-02-29 type;toLastDayOfMonth;toDateTime64;true Date32 +toStartOfDay;toDateTime64;true 1920-02-02 00:00:00.000 +type;toStartOfDay;toDateTime64;true DateTime64(3, \'UTC\') +toStartOfHour;toDateTime64;true 1920-02-02 10:00:00.000 +type;toStartOfHour;toDateTime64;true DateTime64(3, \'UTC\') +toStartOfMinute;toDateTime64;true 1920-02-02 10:23:00.000 +type;toStartOfMinute;toDateTime64;true DateTime64(3, \'UTC\') +toStartOfFiveMinutes;toDateTime64;true 1920-02-02 10:20:00.000 +type;toStartOfFiveMinutes;toDateTime64;true DateTime64(3, \'UTC\') +toStartOfTenMinutes;toDateTime64;true 1920-02-02 10:20:00.000 +type;toStartOfTenMinutes;toDateTime64;true DateTime64(3, \'UTC\') +toStartOfFifteenMinutes;toDateTime64;true 1920-02-02 10:15:00.000 +type;toStartOfFifteenMinutes;toDateTime64;true DateTime64(3, \'UTC\') +timeSlot;toDateTime64;true 1920-02-02 10:00:00.000 +type;timeSlot;toDateTime64;true DateTime64(3, \'UTC\') +toStartOfDay;toDate32;true 1920-02-02 00:00:00.000 +type;toStartOfDay;toDate32;true DateTime64(3, \'UTC\') toStartOfYear;toDate32;false 1970-01-01 type;toStartOfYear;toDate32;false Date toStartOfYear;toDateTime64;false 1970-01-01 @@ -54,3 +70,19 @@ toLastDayOfMonth;toDate32;false 1970-01-01 type;toLastDayOfMonth;toDate32;false Date toLastDayOfMonth;toDateTime64;false 1970-01-01 type;toLastDayOfMonth;toDateTime64;false Date +toStartOfDay;toDateTime64;false 1970-01-01 00:00:00 +type;toStartOfDay;toDateTime64;false DateTime(\'UTC\') +toStartOfHour;toDateTime64;false 1970-01-01 00:00:00 +type;toStartOfHour;toDateTime64;false DateTime(\'UTC\') +toStartOfMinute;toDateTime64;false 1970-01-01 00:00:00 +type;toStartOfMinute;toDateTime64;false DateTime(\'UTC\') +toStartOfFiveMinutes;toDateTime64;false 2056-03-09 16:48:16 +type;toStartOfFiveMinutes;toDateTime64;false DateTime(\'UTC\') +toStartOfTenMinutes;toDateTime64;false 2056-03-09 16:48:16 +type;toStartOfTenMinutes;toDateTime64;false DateTime(\'UTC\') +toStartOfFifteenMinutes;toDateTime64;false 2056-03-09 16:43:16 +type;toStartOfFifteenMinutes;toDateTime64;false DateTime(\'UTC\') +timeSlot;toDateTime64;false 2056-03-09 16:58:16 +type;timeSlot;toDateTime64;false DateTime(\'UTC\') +toStartOfDay;toDate32;false 1970-01-01 00:00:00 +type;toStartOfDay;toDate32;false DateTime(\'UTC\') diff --git a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.sql.j2 b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.sql.j2 index 70c07c7792a..00a1a0030aa 100644 --- a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.sql.j2 +++ b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.sql.j2 @@ -5,5 +5,10 @@ SELECT 'type;{{ date_fun }};toDate32;{{ option_value }}', toTypeName({{ date_fun SELECT '{{ date_fun }};toDateTime64;{{ option_value }}', {{ date_fun }}(toDateTime64('1920-02-02 10:20:30', 3)) SETTINGS enable_extended_results_for_datetime_functions = {{ option_value }}; SELECT 'type;{{ date_fun }};toDateTime64;{{ option_value }}', toTypeName({{ date_fun }}(toDateTime64('1920-02-02 10:20:30', 3))) SETTINGS enable_extended_results_for_datetime_functions = {{ option_value }}; {% endfor -%} +{% for date_fun in ['toStartOfDay', 'toStartOfHour', 'toStartOfMinute', 'toStartOfFiveMinutes', 'toStartOfTenMinutes', 'toStartOfFifteenMinutes', 'timeSlot'] -%} +SELECT '{{ date_fun }};toDateTime64;{{ option_value }}', {{ date_fun }}(toDateTime64('1920-02-02 10:23:33.789', 3, 'UTC'), 'UTC') SETTINGS enable_extended_results_for_datetime_functions = {{ option_value }}; +SELECT 'type;{{ date_fun }};toDateTime64;{{ option_value }}', toTypeName({{ date_fun }}(toDateTime64('1920-02-02 10:20:30', 3, 'UTC'), 'UTC')) SETTINGS enable_extended_results_for_datetime_functions = {{ option_value }}; +{% endfor -%} +SELECT 'toStartOfDay;toDate32;{{ option_value }}', toStartOfDay(toDate32('1920-02-02', 'UTC'), 'UTC') SETTINGS enable_extended_results_for_datetime_functions = {{ option_value }}; +SELECT 'type;toStartOfDay;toDate32;{{ option_value }}', toTypeName(toStartOfDay(toDate32('1920-02-02', 'UTC'), 'UTC')) SETTINGS enable_extended_results_for_datetime_functions = {{ option_value }}; {% endfor -%} - diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 6e0e41f11b8..c7ac00ee18f 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -908,6 +908,7 @@ trimBoth trimLeft trimRight trunc +tryBase58Decode tumble tumbleEnd tumbleStart diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql index 3202a28cdd0..ed95c06d016 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql @@ -15,5 +15,5 @@ AND name NOT IN ( 'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3', 'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo', 'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD', - 'lemmatize', 'tokenize', 'stem', 'synonyms' + 'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test ) ORDER BY name; diff --git a/tests/queries/0_stateless/02421_explain_subquery.reference b/tests/queries/0_stateless/02421_explain_subquery.reference new file mode 100644 index 00000000000..c18b4e9b082 --- /dev/null +++ b/tests/queries/0_stateless/02421_explain_subquery.reference @@ -0,0 +1,8 @@ +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02421_explain_subquery.sql b/tests/queries/0_stateless/02421_explain_subquery.sql new file mode 100644 index 00000000000..af80e51bca3 --- /dev/null +++ b/tests/queries/0_stateless/02421_explain_subquery.sql @@ -0,0 +1,23 @@ +SELECT count() > 3 FROM (EXPLAIN PIPELINE header = 1 SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain LIKE '%Header: number UInt64%'; +SELECT count() > 0 FROM (EXPLAIN PLAN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%'; +SELECT count() > 0 FROM (EXPLAIN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%'; +SELECT count() > 0 FROM (EXPLAIN CURRENT TRANSACTION); +SELECT count() == 1 FROM (EXPLAIN SYNTAX SELECT number FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE 'SELECT%'; +SELECT trim(explain) == 'Asterisk' FROM (EXPLAIN AST SELECT * FROM system.numbers LIMIT 10) WHERE explain LIKE '%Asterisk%'; + +SELECT * FROM ( + EXPLAIN AST SELECT * FROM ( + EXPLAIN PLAN SELECT * FROM ( + EXPLAIN SYNTAX SELECT trim(explain) == 'Asterisk' FROM ( + EXPLAIN AST SELECT * FROM system.numbers LIMIT 10 + ) WHERE explain LIKE '%Asterisk%' + ) + ) +) FORMAT Null; + +CREATE TABLE t1 ( a UInt64 ) Engine = MergeTree ORDER BY tuple() AS SELECT number AS a FROM system.numbers LIMIT 100000; + +SELECT rows > 1000 FROM (EXPLAIN ESTIMATE SELECT sum(a) FROM t1); +SELECT count() == 1 FROM (EXPLAIN ESTIMATE SELECT sum(a) FROM t1); + +DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference new file mode 100644 index 00000000000..378b7d8cec4 --- /dev/null +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference @@ -0,0 +1,21 @@ +0 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql new file mode 100644 index 00000000000..579f468ee54 --- /dev/null +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql @@ -0,0 +1,32 @@ +SELECT toDecimal32(1.555,3) IN (1.5551); +SELECT toDecimal32(1.555,3) IN (1.5551,1.555); +SELECT toDecimal32(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal32(1.555,3) IN (1.550,1.5); + +SELECT toDecimal64(1.555,3) IN (1.5551); +SELECT toDecimal64(1.555,3) IN (1.5551,1.555); +SELECT toDecimal64(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal64(1.555,3) IN (1.550,1.5); + +SELECT toDecimal128(1.555,3) IN (1.5551); +SELECT toDecimal128(1.555,3) IN (1.5551,1.555); +SELECT toDecimal128(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal128(1.555,3) IN (1.550,1.5); + +SELECT toDecimal256(1.555,3) IN (1.5551); +SELECT toDecimal256(1.555,3) IN (1.5551,1.555); +SELECT toDecimal256(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal256(1.555,3) IN (1.550,1.5); + +DROP TABLE IF EXISTS decimal_in_float_test; + +CREATE TABLE decimal_in_float_test ( `a` Decimal(18, 0), `b` Decimal(36, 2) ) ENGINE = Memory; +INSERT INTO decimal_in_float_test VALUES ('33', '44.44'); + +SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33); +SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33.0); +SELECT count() == 1 FROM decimal_in_float_test WHERE a NOT IN (33.333); +SELECT count() == 1 FROM decimal_in_float_test WHERE b IN (44.44); +SELECT count() == 1 FROM decimal_in_float_test WHERE b NOT IN (44.4,44.444); + +DROP TABLE IF EXISTS decimal_in_float_test; diff --git a/tests/queries/0_stateless/02428_index_analysis_with_null_literal.reference b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql new file mode 100644 index 00000000000..33b0ea4b818 --- /dev/null +++ b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql @@ -0,0 +1,21 @@ +-- From https://github.com/ClickHouse/ClickHouse/issues/41814 +drop table if exists test; + +create table test(a UInt64, m UInt64, d DateTime) engine MergeTree partition by toYYYYMM(d) order by (a, m, d); + +insert into test select number, number, '2022-01-01 00:00:00' from numbers(1000000); + +select count() from test where a = (select toUInt64(1) where 1 = 2) settings enable_early_constant_folding = 0, force_primary_key = 1; + +drop table test; + +-- From https://github.com/ClickHouse/ClickHouse/issues/34063 +drop table if exists test_null_filter; + +create table test_null_filter(key UInt64, value UInt32) engine MergeTree order by key; + +insert into test_null_filter select number, number from numbers(10000000); + +select count() from test_null_filter where key = null and value > 0 settings force_primary_key = 1; + +drop table test_null_filter; diff --git a/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference new file mode 100644 index 00000000000..0ecea03c64e --- /dev/null +++ b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference @@ -0,0 +1,6 @@ +CREATE DICTIONARY default.dict\n(\n `id` UInt32,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' DB \'default\' TABLE \'view\'))\nLIFETIME(MIN 0 MAX 600)\nLAYOUT(HASHED()) +CREATE TABLE default.table\n(\n `col` String MATERIALIZED dictGet(\'default.dict\', \'value\', toUInt32(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +1 v +1 v +1 v +2 a diff --git a/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.sql b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.sql new file mode 100644 index 00000000000..49e1e36acc9 --- /dev/null +++ b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.sql @@ -0,0 +1,40 @@ +DROP TABLE IF EXISTS table; +DROP DICTIONARY IF EXISTS dict; +DROP TABLE IF EXISTS view; + +CREATE TABLE view (id UInt32, value String) ENGINE=ReplicatedMergeTree('/test/2449/{database}', '1') ORDER BY id; +INSERT INTO view VALUES (1, 'v'); + +CREATE DICTIONARY dict (id UInt32, value String) +PRIMARY KEY id +SOURCE(CLICKHOUSE(host 'localhost' port tcpPort() user 'default' db currentDatabase() table 'view')) +LAYOUT (HASHED()) LIFETIME (600); + +SHOW CREATE dict; + +CREATE TABLE table +( + col MATERIALIZED dictGet(currentDatabase() || '.dict', 'value', toUInt32(1)) +) +ENGINE = MergeTree() +ORDER BY tuple(); + +SHOW CREATE TABLE table; + +SELECT * FROM dictionary('dict'); + +DROP TABLE view; -- {serverError HAVE_DEPENDENT_OBJECTS} + +-- check that table is not readonly +INSERT INTO view VALUES (2, 'a'); + +DROP DICTIONARY dict; -- {serverError HAVE_DEPENDENT_OBJECTS} + +-- check that dictionary was not detached +SELECT * FROM dictionary('dict'); +SYSTEM RELOAD DICTIONARY dict; +SELECT * FROM dictionary('dict') ORDER BY id; + +DROP TABLE table; +DROP DICTIONARY dict; +DROP TABLE view; diff --git a/tests/queries/0_stateless/02451_order_by_monotonic.reference b/tests/queries/0_stateless/02451_order_by_monotonic.reference new file mode 100644 index 00000000000..d3de324a7e1 --- /dev/null +++ b/tests/queries/0_stateless/02451_order_by_monotonic.reference @@ -0,0 +1,22 @@ +4 +2022-09-09 12:00:00 0 +2022-09-09 12:00:00 0x +2022-09-09 12:00:00 0x +2022-09-09 12:00:00 1 +2022-09-09 12:00:00 1x + Prefix sort description: toStartOfMinute(t) ASC + Result sort description: toStartOfMinute(t) ASC, c1 ASC + Prefix sort description: toStartOfMinute(t) ASC + Result sort description: toStartOfMinute(t) ASC + Prefix sort description: negate(a) ASC + Result sort description: negate(a) ASC + Prefix sort description: negate(a) ASC, negate(b) ASC + Result sort description: negate(a) ASC, negate(b) ASC + Prefix sort description: a DESC, negate(b) ASC + Result sort description: a DESC, negate(b) ASC + Prefix sort description: negate(a) ASC, b DESC + Result sort description: negate(a) ASC, b DESC + Prefix sort description: negate(a) ASC + Result sort description: negate(a) ASC, b ASC + Prefix sort description: a ASC + Result sort description: a ASC, negate(b) ASC diff --git a/tests/queries/0_stateless/02451_order_by_monotonic.sh b/tests/queries/0_stateless/02451_order_by_monotonic.sh new file mode 100755 index 00000000000..cc26ba91e1c --- /dev/null +++ b/tests/queries/0_stateless/02451_order_by_monotonic.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function explain_sort_description() +{ + out=$($CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "EXPLAIN PLAN actions = 1 $1") + echo "$out" | grep "Prefix sort description:" + echo "$out" | grep "Result sort description:" +} + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (t DateTime, c1 String) ENGINE = MergeTree ORDER BY (t, c1) + AS SELECT '2022-09-09 12:00:00', toString(number % 2) FROM numbers(2) UNION ALL + SELECT '2022-09-09 12:00:30', toString(number % 2)|| 'x' FROM numbers(3)" + +$CLICKHOUSE_CLIENT --optimize_aggregation_in_order=1 -q "SELECT count() FROM + (SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic GROUP BY s, c1)" + +$CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1" + +explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1" +explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s" + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b)" + +$CLICKHOUSE_CLIENT -q "INSERT INTO t_order_by_monotonic VALUES (1, 1) (1, 2), (2, 1) (2, 2)" + +explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a" +explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, -b" +explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a DESC, -b" +explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b DESC" +explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b" +explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a, -b" + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" diff --git a/tests/queries/0_stateless/02454_set_parameters_formatting.reference b/tests/queries/0_stateless/02454_set_parameters_formatting.reference new file mode 100644 index 00000000000..1098a8159f3 --- /dev/null +++ b/tests/queries/0_stateless/02454_set_parameters_formatting.reference @@ -0,0 +1,3 @@ +SET param_a = 1 +SET max_threads = 1, param_a = 1 +SET max_threads = 1, param_a = 1 diff --git a/tests/queries/0_stateless/02454_set_parameters_formatting.sh b/tests/queries/0_stateless/02454_set_parameters_formatting.sh new file mode 100755 index 00000000000..8c2de5635d3 --- /dev/null +++ b/tests/queries/0_stateless/02454_set_parameters_formatting.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT" + +echo "set param_a = 1" | $format +echo "set max_threads = 1, param_a = 1" | $format +echo "set param_a = 1, max_threads = 1" | $format diff --git a/tests/queries/0_stateless/02455_count_state_asterisk.reference b/tests/queries/0_stateless/02455_count_state_asterisk.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02455_count_state_asterisk.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02455_count_state_asterisk.sql b/tests/queries/0_stateless/02455_count_state_asterisk.sql new file mode 100644 index 00000000000..cb6ded3def0 --- /dev/null +++ b/tests/queries/0_stateless/02455_count_state_asterisk.sql @@ -0,0 +1,11 @@ +drop table if exists a; +drop table if exists b; + +create table a (i int, j int) engine Log; +create materialized view b engine Log as select countState(*) from a; + +insert into a values (1, 2); +select countMerge(*) from b; + +drop table b; +drop table a; diff --git a/tests/queries/0_stateless/02455_default_union_except_intersect.reference b/tests/queries/0_stateless/02455_default_union_except_intersect.reference new file mode 100644 index 00000000000..371c3784fae --- /dev/null +++ b/tests/queries/0_stateless/02455_default_union_except_intersect.reference @@ -0,0 +1,9 @@ +SELECT 1 +UNION +SELECT 1 +SELECT 2 +EXCEPT +SELECT 2 +SELECT 3 +INTERSECT +SELECT 3 diff --git a/tests/queries/0_stateless/02455_default_union_except_intersect.sh b/tests/queries/0_stateless/02455_default_union_except_intersect.sh new file mode 100755 index 00000000000..285b3357f46 --- /dev/null +++ b/tests/queries/0_stateless/02455_default_union_except_intersect.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT" + +echo "SELECT 1 UNION SELECT 1" | $format +echo "SELECT 2 EXCEPT SELECT 2" | $format +echo "SELECT 3 INTERSECT SELECT 3" | $format diff --git a/tests/queries/0_stateless/00962_temporary_live_view_watch_live.reference b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.reference similarity index 100% rename from tests/queries/0_stateless/00962_temporary_live_view_watch_live.reference rename to tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.reference diff --git a/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql new file mode 100644 index 00000000000..d000fb4479c --- /dev/null +++ b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql @@ -0,0 +1,4 @@ +CREATE TABLE test_a (id UInt32, company UInt32, total UInt64) ENGINE=SummingMergeTree() PARTITION BY company PRIMARY KEY (id) ORDER BY (id, company); +INSERT INTO test_a SELECT number%10 as id, number%2 as company, count() as total FROM numbers(100) GROUP BY id,company; +CREATE TABLE test_b (id UInt32, company UInt32, total UInt64) ENGINE=SummingMergeTree() PARTITION BY company ORDER BY (id, company); +ALTER TABLE test_b REPLACE PARTITION '0' FROM test_a; -- {serverError BAD_ARGUMENTS} diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.reference b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.reference new file mode 100644 index 00000000000..1f446c9a08e --- /dev/null +++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.reference @@ -0,0 +1,2 @@ +fred81 4081 Fred Lane 1981-05-18 +fred81 4081 Fred Lane 1981-05-18 diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh new file mode 100755 index 00000000000..7506e78455d --- /dev/null +++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') +cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $user_files_path/ + +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=1000000000" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=100000000" diff --git a/tests/queries/0_stateless/02456_BLAKE3_hash_function_test.reference b/tests/queries/0_stateless/02456_BLAKE3_hash_function_test.reference new file mode 100644 index 00000000000..9d915a6f127 --- /dev/null +++ b/tests/queries/0_stateless/02456_BLAKE3_hash_function_test.reference @@ -0,0 +1,3 @@ +0C673DA1EF75D2DAA895483138340F041881EA975D57C1435D487F454A111B74 +007ED777B7A1CBA08D37BDA339EFABB42FA460D953070779903125B0F4D5FB5F +E25232688E2A4D3A55174DECB33815A27B2A92DC8839E3CDA456105C259BB071 diff --git a/tests/queries/0_stateless/02456_BLAKE3_hash_function_test.sql b/tests/queries/0_stateless/02456_BLAKE3_hash_function_test.sql new file mode 100644 index 00000000000..88484f482b5 --- /dev/null +++ b/tests/queries/0_stateless/02456_BLAKE3_hash_function_test.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest + +SELECT hex(BLAKE3('test_1')); +SELECT hex(BLAKE3('test_2')); +SELECT hex(BLAKE3('test_3')); diff --git a/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.reference b/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.sql b/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.sql new file mode 100644 index 00000000000..d66c5f0e59e --- /dev/null +++ b/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS t1 SYNC; +CREATE TABLE t1 (v UInt64) ENGINE=ReplicatedMergeTree('/test/tables/{database}/test/t1', 'r1') ORDER BY v PARTITION BY v; +INSERT INTO t1 values(1); +ALTER TABLE t1 ADD COLUMN s String; +INSERT INTO t1 values(1, '1'); +ALTER TABLE t1 MODIFY COLUMN s Nullable(String); +-- SELECT _part, * FROM t1; + +alter table t1 detach partition 1; + +SELECT _part, * FROM t1; +--0 rows in set. Elapsed: 0.001 sec. + +alter table t1 attach partition 1; +select count() from t1; + diff --git a/tests/queries/0_stateless/02456_alter-nullable-column-bag.reference b/tests/queries/0_stateless/02456_alter-nullable-column-bag.reference new file mode 100644 index 00000000000..fa033ae7677 --- /dev/null +++ b/tests/queries/0_stateless/02456_alter-nullable-column-bag.reference @@ -0,0 +1 @@ +1,"one",1,0 diff --git a/tests/queries/0_stateless/02456_alter-nullable-column-bag.sql b/tests/queries/0_stateless/02456_alter-nullable-column-bag.sql new file mode 100644 index 00000000000..6fab3fa3735 --- /dev/null +++ b/tests/queries/0_stateless/02456_alter-nullable-column-bag.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS column_modify_test; + +CREATE TABLE column_modify_test (id UInt64, val String, other_col UInt64) engine=MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part=0; +INSERT INTO column_modify_test VALUES (1,'one',0); +INSERT INTO column_modify_test VALUES (2,'two',0); + +-- on 21.9 that was done via mutations mechanism +ALTER TABLE column_modify_test MODIFY COLUMN val Nullable(String); + +-- but since 21.10 it only applies that to new part, so old parts keep the old schema +--SELECT * FROM system.mutations; + +INSERT INTO column_modify_test VALUES (3,Null,0); + +--select name, path, type, active, modification_time from system.parts_columns where table='column_modify_test' and column='val'; + +-- till now everythings looks ok +--SELECT * FROM column_modify_test; + +-- now we do mutation. It will affect one of the parts +-- and it what part it will update columns.txt to the latest 'correct' state w/o updating the column file! +alter table column_modify_test update other_col=1 where id = 1 SETTINGS mutations_sync=1; + +-- row 1 is damaged now: the column files of val columns & columns.txt is out of sync! +SELECT *, throwIf(val <> 'one') FROM column_modify_test WHERE id = 1 FORMAT CSV; + diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.reference b/tests/queries/0_stateless/02456_async_inserts_logs.reference new file mode 100644 index 00000000000..efd8a88eca4 --- /dev/null +++ b/tests/queries/0_stateless/02456_async_inserts_logs.reference @@ -0,0 +1,7 @@ +5 + Values 21 1 Ok 1 +t_async_inserts_logs JSONEachRow 39 1 Ok 1 +t_async_inserts_logs Values 8 1 Ok 1 +t_async_inserts_logs JSONEachRow 6 0 ParsingError 1 +t_async_inserts_logs Values 6 0 ParsingError 1 +t_async_inserts_logs Values 8 0 FlushError 1 diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.sh b/tests/queries/0_stateless/02456_async_inserts_logs.sh new file mode 100755 index 00000000000..006455e2d42 --- /dev/null +++ b/tests/queries/0_stateless/02456_async_inserts_logs.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_async_inserts_logs" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_async_inserts_logs (id UInt32, s String) ENGINE = MergeTree ORDER BY id" + +${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO t_async_inserts_logs FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' & +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_async_inserts_logs VALUES (1, 'a')" & + +${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO t_async_inserts_logs FORMAT JSONEachRow qqqqqq' > /dev/null 2>&1 & +${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO t_async_inserts_logs VALUES qqqqqq' > /dev/null 2>&1 & + +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', currentDatabase(), t_async_inserts_logs) VALUES (1, 'aaa') (2, 'bbb')" & + +wait + +${CLICKHOUSE_CLIENT} -q "OPTIMIZE TABLE t_async_inserts_logs FINAL" +${CLICKHOUSE_CLIENT} -q "ALTER TABLE t_async_inserts_logs MODIFY SETTING parts_to_throw_insert = 1" + +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_async_inserts_logs VALUES (1, 'a')" > /dev/null 2>&1 & + +wait + +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_async_inserts_logs" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q " + SELECT table, format, bytes, empty(exception), status, + status = 'ParsingError' ? flush_time_microseconds = 0 : flush_time_microseconds > event_time_microseconds AS time_ok + FROM system.asynchronous_insert_log + WHERE database = '$CLICKHOUSE_DATABASE' OR query ILIKE 'INSERT INTO FUNCTION%$CLICKHOUSE_DATABASE%' + ORDER BY table, status, format" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE t_async_inserts_logs" diff --git a/tests/queries/0_stateless/02456_bloom_filter_assert.reference b/tests/queries/0_stateless/02456_bloom_filter_assert.reference new file mode 100644 index 00000000000..a21090781f3 --- /dev/null +++ b/tests/queries/0_stateless/02456_bloom_filter_assert.reference @@ -0,0 +1,7 @@ +0 +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02456_bloom_filter_assert.sql.j2 b/tests/queries/0_stateless/02456_bloom_filter_assert.sql.j2 new file mode 100644 index 00000000000..8322d3d29e7 --- /dev/null +++ b/tests/queries/0_stateless/02456_bloom_filter_assert.sql.j2 @@ -0,0 +1,28 @@ +{% for type in ["Int8", "Int16", "Int32", "Int64", 'UInt8', 'UInt16', 'UInt32'] -%} + +DROP TABLE IF EXISTS bftest__fuzz_21; + +CREATE TABLE bftest__fuzz_21 +( + `k` Int64, + `x` Array({{ type }}), + INDEX ix1 x TYPE bloom_filter GRANULARITY 3 +) +ENGINE = MergeTree +ORDER BY k; + +INSERT INTO bftest__fuzz_21 (k, x) SELECT + number, + arrayMap(i -> (rand64() % 565656), range(10)) +FROM numbers(1000); + +{% if 'UInt' in type -%} + SELECT count() FROM bftest__fuzz_21 WHERE hasAll(x, [42, -42]) SETTINGS use_skip_indexes=1; + SELECT count() FROM bftest__fuzz_21 WHERE hasAll(x, [42, -42]) SETTINGS use_skip_indexes=1, force_data_skipping_indices='ix1'; -- { serverError INDEX_NOT_USED } +{% else -%} + SELECT count() FROM bftest__fuzz_21 WHERE hasAll(x, [42, -42]) SETTINGS use_skip_indexes=1, force_data_skipping_indices='ix1'; +{% endif -%} + +DROP TABLE IF EXISTS bftest__fuzz_21; + +{% endfor -%} diff --git a/tests/queries/0_stateless/02456_summing_mt_lc.reference b/tests/queries/0_stateless/02456_summing_mt_lc.reference new file mode 100644 index 00000000000..439cf103b38 --- /dev/null +++ b/tests/queries/0_stateless/02456_summing_mt_lc.reference @@ -0,0 +1,2 @@ +1 6 2020-01-01 00:00:00 +2 6 2020-01-02 00:00:00 diff --git a/tests/queries/0_stateless/02456_summing_mt_lc.sql b/tests/queries/0_stateless/02456_summing_mt_lc.sql new file mode 100644 index 00000000000..297eb64c642 --- /dev/null +++ b/tests/queries/0_stateless/02456_summing_mt_lc.sql @@ -0,0 +1,20 @@ +SET allow_suspicious_low_cardinality_types = 1; + +DROP TABLE IF EXISTS t_summing_lc; + +CREATE TABLE t_summing_lc +( + `key` UInt32, + `val` LowCardinality(UInt32), + `date` DateTime +) +ENGINE = SummingMergeTree(val) +PARTITION BY date +ORDER BY key; + +INSERT INTO t_summing_lc VALUES (1, 1, '2020-01-01'), (2, 1, '2020-01-02'), (1, 5, '2020-01-01'), (2, 5, '2020-01-02'); + +OPTIMIZE TABLE t_summing_lc FINAL; +SELECT * FROM t_summing_lc ORDER BY key; + +DROP TABLE t_summing_lc; diff --git a/tests/queries/0_stateless/02456_test_zero_copy_mutation.reference b/tests/queries/0_stateless/02456_test_zero_copy_mutation.reference new file mode 100644 index 00000000000..e75733cff47 --- /dev/null +++ b/tests/queries/0_stateless/02456_test_zero_copy_mutation.reference @@ -0,0 +1,3 @@ +1 Hello +1 Hello +1 Hello diff --git a/tests/queries/0_stateless/02456_test_zero_copy_mutation.sql b/tests/queries/0_stateless/02456_test_zero_copy_mutation.sql new file mode 100644 index 00000000000..99234585d83 --- /dev/null +++ b/tests/queries/0_stateless/02456_test_zero_copy_mutation.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS mutation_1; +DROP TABLE IF EXISTS mutation_2; + +CREATE TABLE mutation_1 +( + a UInt64, + b String +) +ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t', '1') +ORDER BY tuple() SETTINGS min_bytes_for_wide_part=0, allow_remote_fs_zero_copy_replication=1; + +CREATE TABLE mutation_2 +( + a UInt64, + b String +) +ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t', '2') +ORDER BY tuple() SETTINGS min_bytes_for_wide_part=0, allow_remote_fs_zero_copy_replication=1; + +INSERT INTO mutation_1 VALUES (1, 'Hello'); + +SYSTEM SYNC REPLICA mutation_2; + +SYSTEM STOP REPLICATION QUEUES mutation_2; + +ALTER TABLE mutation_1 UPDATE a = 2 WHERE b = 'xxxxxx' SETTINGS mutations_sync=1; + +SELECT * from mutation_1; +SELECT * from mutation_2; + +DROP TABLE mutation_1 SYNC; + +SELECT * from mutation_2; + +DROP TABLE IF EXISTS mutation_1; +DROP TABLE IF EXISTS mutation_2; diff --git a/tests/queries/0_stateless/02457_bz2_concatenated.reference b/tests/queries/0_stateless/02457_bz2_concatenated.reference new file mode 100644 index 00000000000..bc856dafab0 --- /dev/null +++ b/tests/queries/0_stateless/02457_bz2_concatenated.reference @@ -0,0 +1,4 @@ +0 +1 +2 +3 diff --git a/tests/queries/0_stateless/02457_bz2_concatenated.sh b/tests/queries/0_stateless/02457_bz2_concatenated.sh new file mode 100755 index 00000000000..5b24f74b9c0 --- /dev/null +++ b/tests/queries/0_stateless/02457_bz2_concatenated.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: depends on bzip2 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +WORKING_FOLDER_02457="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" + +rm -rf "${WORKING_FOLDER_02457}" +mkdir "${WORKING_FOLDER_02457}" + + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM numbers(0, 2) INTO OUTFILE '${WORKING_FOLDER_02457}/file_1.bz2'" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM numbers(2, 2) INTO OUTFILE '${WORKING_FOLDER_02457}/file_2.bz2'" +cat ${WORKING_FOLDER_02457}/file_1.bz2 ${WORKING_FOLDER_02457}/file_2.bz2 > ${WORKING_FOLDER_02457}/concatenated.bz2 +${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('${WORKING_FOLDER_02457}/concatenated.bz2', 'TabSeparated', 'col Int64')" + +rm -rf "${WORKING_FOLDER_02457}" diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.reference b/tests/queries/0_stateless/02457_insert_select_progress_http.reference new file mode 100644 index 00000000000..5f13f4b9285 --- /dev/null +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.reference @@ -0,0 +1,13 @@ +< X-ClickHouse-Progress: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"1","read_bytes":"8","written_rows":"1","written_bytes":"4","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"2","read_bytes":"16","written_rows":"1","written_bytes":"4","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"2","read_bytes":"16","written_rows":"2","written_bytes":"8","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"3","read_bytes":"24","written_rows":"2","written_bytes":"8","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"3","read_bytes":"24","written_rows":"3","written_bytes":"12","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"4","read_bytes":"32","written_rows":"3","written_bytes":"12","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"4","read_bytes":"32","written_rows":"4","written_bytes":"16","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"5","read_bytes":"40","written_rows":"4","written_bytes":"16","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"5","read_bytes":"40","written_rows":"5","written_bytes":"20","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"5","read_bytes":"40","written_rows":"5","written_bytes":"20","total_rows_to_read":"5","result_rows":"5","result_bytes":"20"} +< X-ClickHouse-Summary: {"read_rows":"5","read_bytes":"40","written_rows":"5","written_bytes":"20","total_rows_to_read":"5","result_rows":"5","result_bytes":"20"} diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.sh b/tests/queries/0_stateless/02457_insert_select_progress_http.sh new file mode 100755 index 00000000000..656ab3dc403 --- /dev/null +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d @- <<< "insert into function null('_ Int') select * from numbers(5) settings max_block_size=1" -v |& { + grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: +} diff --git a/tests/queries/0_stateless/02457_parse_date_time_best_effort.reference b/tests/queries/0_stateless/02457_parse_date_time_best_effort.reference new file mode 100644 index 00000000000..6a560e7e43b --- /dev/null +++ b/tests/queries/0_stateless/02457_parse_date_time_best_effort.reference @@ -0,0 +1,10 @@ +2017-12-01 18:31:44 +2017-01-12 18:31:44 +2017-12-01 18:31:44 +2017-01-12 18:31:44 +2017-12-01 18:31:44 +2017-01-12 18:31:44 +2015-12-31 18:31:44 +2015-12-31 18:31:44 +2015-12-31 18:31:44 +2015-12-31 18:31:44 diff --git a/tests/queries/0_stateless/02457_parse_date_time_best_effort.sql b/tests/queries/0_stateless/02457_parse_date_time_best_effort.sql new file mode 100644 index 00000000000..5eb00049b6f --- /dev/null +++ b/tests/queries/0_stateless/02457_parse_date_time_best_effort.sql @@ -0,0 +1,16 @@ +select parseDateTimeBestEffort('01/12/2017, 18:31:44'); +select parseDateTimeBestEffortUS('01/12/2017, 18:31:44'); +select parseDateTimeBestEffort('01/12/2017,18:31:44'); +select parseDateTimeBestEffortUS('01/12/2017,18:31:44'); +select parseDateTimeBestEffort('01/12/2017 , 18:31:44'); +select parseDateTimeBestEffortUS('01/12/2017 ,18:31:44'); +select parseDateTimeBestEffortUS('18:31:44, 31/12/2015'); +select parseDateTimeBestEffortUS('18:31:44 , 31/12/2015'); +select parseDateTimeBestEffort('18:31:44, 31/12/2015'); +select parseDateTimeBestEffort('18:31:44 , 31/12/2015'); +select parseDateTimeBestEffort('01/12/2017,'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeBestEffortUS('18:31:44,,,, 31/12/2015'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeBestEffortUS('18:31:44, 31/12/2015,'); -- { serverError CANNOT_PARSE_TEXT } +select parseDateTimeBestEffort('01/12/2017, 18:31:44,'); -- { serverError CANNOT_PARSE_TEXT } +select parseDateTimeBestEffort('01/12/2017, ,,,18:31:44'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeBestEffort('18:31:44 ,,,,, 31/12/2015'); -- { serverError CANNOT_PARSE_DATETIME } diff --git a/tests/queries/0_stateless/02458_default_setting.reference b/tests/queries/0_stateless/02458_default_setting.reference new file mode 100644 index 00000000000..376553843ac --- /dev/null +++ b/tests/queries/0_stateless/02458_default_setting.reference @@ -0,0 +1,5 @@ +1048545 +100000 +1 +1048545 +0 diff --git a/tests/queries/0_stateless/02458_default_setting.sql b/tests/queries/0_stateless/02458_default_setting.sql new file mode 100644 index 00000000000..712b5ad171b --- /dev/null +++ b/tests/queries/0_stateless/02458_default_setting.sql @@ -0,0 +1,9 @@ +-- Tags: no-parallel + +SELECT value FROM system.settings where name='max_insert_block_size'; +SET max_insert_block_size=100000; +SELECT value FROM system.settings where name='max_insert_block_size'; +SELECT changed FROM system.settings where name='max_insert_block_size'; +SET max_insert_block_size=DEFAULT; +SELECT value FROM system.settings where name='max_insert_block_size'; +SELECT changed FROM system.settings where name='max_insert_block_size'; diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python new file mode 100644 index 00000000000..c638b3d2040 --- /dev/null +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -0,0 +1,264 @@ +#!/usr/bin/env python3 + +import socket +import os +import uuid +import json + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) +CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') + +def writeVarUInt(x, ba): + for _ in range(0, 9): + + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, 'utf-8') + writeVarUInt(len(s), ba) + ba.extend(b) + + +def readStrict(s, size = 1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + +def readUInt8(s): + return readUInt(s) + +def readUInt16(s): + return readUInt(s, 2) + +def readUInt32(s): + return readUInt(s, 4) + +def readUInt64(s): + return readUInt(s, 8) + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode('utf-8') + + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary('simple native protocol', ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary('default', ba) # user + writeStringBinary('', ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert (p_type == 0) # Hello + server_name = readStringBinary(s) + # print("Server name: ", server_name) + server_version_major = readVarUInt(s) + # print("Major: ", server_version_major) + server_version_minor = readVarUInt(s) + # print("Minor: ", server_version_minor) + server_revision = readVarUInt(s) + # print("Revision: ", server_revision) + server_timezone = readStringBinary(s) + # print("Timezone: ", server_timezone) + server_display_name = readStringBinary(s) + # print("Display name: ", server_display_name) + server_version_patch = readVarUInt(s) + # print("Version patch: ", server_version_patch) + + +def serializeClientInfo(ba, query_id): + writeStringBinary('default', ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary('127.0.0.1:9000', ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary('os_user', ba) # os_user + writeStringBinary('client_hostname', ba) # client_hostname + writeStringBinary('client_name', ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary('', ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def sendQuery(s, query): + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + writeStringBinary('', ba) # No settings + writeStringBinary('', ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + s.sendall(ba) + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + + +def assertPacket(packet, expected): + assert(packet == expected), packet + + +class Progress(): + def __init__(self): + # NOTE: this is done in ctor to initialize __dict__ + self.read_rows = 0 + self.read_bytes = 0 + self.total_rows_to_read = 0 + self.written_rows = 0 + self.written_bytes = 0 + + def __str__(self): + return json.dumps(self.__dict__) + + def __add__(self, b): + self.read_rows += b.read_rows + self.read_bytes += b.read_bytes + self.total_rows_to_read += b.total_rows_to_read + self.written_rows += b.written_rows + self.written_bytes += b.written_bytes + return self + + def readPacket(self, s): + self.read_rows += readVarUInt(s) + self.read_bytes += readVarUInt(s) + self.total_rows_to_read += readVarUInt(s) + self.written_rows += readVarUInt(s) + self.written_bytes += readVarUInt(s) + + def __bool__(self): + return ( + self.read_rows > 0 or + self.read_bytes > 0 or + self.total_rows_to_read > 0 or + self.written_rows > 0 or + self.written_bytes > 0) + + +def readProgress(s): + packet_type = readVarUInt(s) + if packet_type == 2: # Exception + raise RuntimeError(readException(s)) + + if packet_type == 5: # End stream + return None + + assertPacket(packet_type, 3) # Progress + + progress = Progress() + progress.readPacket(s) + return progress + +def readException(s): + code = readUInt32(s) + name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace('DB::Exception:', '')) + + +def main(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + # For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet. + # NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback. + sendQuery(s, "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000") + + # external tables + sendEmptyBlock(s) + + summary_progress = Progress() + non_empty_progress_packets = 0 + while True: + progress = readProgress(s) + if progress is None: + break + summary_progress += progress + if progress: + non_empty_progress_packets += 1 + + print(summary_progress) + # Print only non empty progress packets, eventually we should have at least 3 of them + # - 2 for each INSERT block (one of them can be merged with read block, heance 3 or for) + # - 1 or 2 for each SELECT block + assert non_empty_progress_packets in (3, 4), f"{non_empty_progress_packets=:}" + + s.close() + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference b/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference new file mode 100644 index 00000000000..2ec5c9652b5 --- /dev/null +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference @@ -0,0 +1 @@ +{"read_rows": 2, "read_bytes": 16, "total_rows_to_read": 2, "written_rows": 2, "written_bytes": 8} diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh new file mode 100755 index 00000000000..ae3ea017fbb --- /dev/null +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists insert_select_progress_tcp; + create table insert_select_progress_tcp(s UInt16) engine = MergeTree order by s; +" + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02458_insert_select_progress_tcp.python + +$CLICKHOUSE_CLIENT -q "drop table insert_select_progress_tcp" diff --git a/tests/queries/0_stateless/02458_relax_too_many_parts.reference b/tests/queries/0_stateless/02458_relax_too_many_parts.reference new file mode 100644 index 00000000000..6d532a1e49a --- /dev/null +++ b/tests/queries/0_stateless/02458_relax_too_many_parts.reference @@ -0,0 +1 @@ +4 8000000 diff --git a/tests/queries/0_stateless/02458_relax_too_many_parts.sql b/tests/queries/0_stateless/02458_relax_too_many_parts.sql new file mode 100644 index 00000000000..a1f8e86fce5 --- /dev/null +++ b/tests/queries/0_stateless/02458_relax_too_many_parts.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt64, s String) ENGINE = MergeTree ORDER BY tuple() SETTINGS parts_to_throw_insert = 3; + +-- The "too many parts" threshold works: +SET max_block_size = 1, min_insert_block_size_rows = 1, min_insert_block_size_bytes = 1; +SYSTEM STOP MERGES test; +INSERT INTO test VALUES (1, 'a'); +INSERT INTO test VALUES (2, 'a'); +INSERT INTO test VALUES (3, 'a'); +INSERT INTO test VALUES (4, 'a'); -- { serverError TOO_MANY_PARTS } + +-- But it can be relaxed with a setting: +ALTER TABLE test MODIFY SETTING max_avg_part_size_for_too_many_parts = '1M'; + +-- It works in the same way if parts are small: +SYSTEM START MERGES test; +OPTIMIZE TABLE test FINAL; +SYSTEM STOP MERGES test; + +INSERT INTO test VALUES (5, 'a'); +INSERT INTO test VALUES (6, 'a'); +INSERT INTO test VALUES (7, 'a'); -- { serverError TOO_MANY_PARTS } + +-- But it allows having more parts if their average size is large: +SYSTEM START MERGES test; +OPTIMIZE TABLE test FINAL; +SYSTEM STOP MERGES test; + +SET max_block_size = 65000, min_insert_block_size_rows = 65000, min_insert_block_size_bytes = '1M'; +INSERT INTO test SELECT number, randomString(1000) FROM numbers(0, 10000); +INSERT INTO test SELECT number, randomString(1000) FROM numbers(10000, 10000); +INSERT INTO test SELECT number, randomString(1000) FROM numbers(20000, 10000); + +SELECT count(), round(avg(bytes), -6) FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02459_materialized_view_default_value.reference b/tests/queries/0_stateless/02459_materialized_view_default_value.reference new file mode 100644 index 00000000000..0cab6bd33bb --- /dev/null +++ b/tests/queries/0_stateless/02459_materialized_view_default_value.reference @@ -0,0 +1,3 @@ +2019-05-01 test +2019-05-01 test +2019-05-01 test diff --git a/tests/queries/0_stateless/02459_materialized_view_default_value.sql b/tests/queries/0_stateless/02459_materialized_view_default_value.sql new file mode 100644 index 00000000000..16a814233d1 --- /dev/null +++ b/tests/queries/0_stateless/02459_materialized_view_default_value.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS session; +DROP TABLE IF EXISTS queue; +DROP TABLE IF EXISTS forward; + +CREATE TABLE session +( + `day` Date, + `uid` String, + `dummy` String DEFAULT '' +) +ENGINE = MergeTree +ORDER BY (day, uid); + +CREATE TABLE queue +( + `day` Date, + `uid` String +) +ENGINE = MergeTree +ORDER BY (day, uid); + +CREATE MATERIALIZED VIEW IF NOT EXISTS forward TO session AS +SELECT + day, + uid +FROM queue; + +insert into queue values ('2019-05-01', 'test'); + +SELECT * FROM queue; +SELECT * FROM session; +SELECT * FROM forward; + +DROP TABLE session; +DROP TABLE queue; +DROP TABLE forward; diff --git a/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.reference b/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.reference new file mode 100644 index 00000000000..54f97aaa2e6 --- /dev/null +++ b/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.reference @@ -0,0 +1 @@ +20220920 diff --git a/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.sh b/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.sh new file mode 100755 index 00000000000..6e96b9b8afc --- /dev/null +++ b/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: depends on bzip2 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --aggregate_functions_null_for_empty=1 --multiquery --query "create table test_date (date Int32) ENGINE = MergeTree ORDER BY (date) as select 20220920; SELECT max(date) FROM test_date"; diff --git a/tests/queries/0_stateless/data_csv/10m_rows.csv.xz b/tests/queries/0_stateless/data_csv/10m_rows.csv.xz new file mode 100644 index 00000000000..409803f97b1 Binary files /dev/null and b/tests/queries/0_stateless/data_csv/10m_rows.csv.xz differ diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 72d51c48656..37c82ec7239 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -56,6 +56,15 @@ function check_replication_consistency() break fi done + + # Touch all data to check that it's readable (and trigger PartCheckThread if needed) + while ! $CLICKHOUSE_CLIENT -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do + sleep 1; + num_tries=$((num_tries+1)) + if [ $num_tries -eq 250 ]; then + break + fi + done time_left=$((300 - num_tries)) # Do not check anything if all replicas are readonly, diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 963ac384148..8e465eb19e2 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -13,15 +13,15 @@ export CLICKHOUSE_TEST_NAME export CLICKHOUSE_TEST_ZOOKEEPER_PREFIX="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DATABASE}" export CLICKHOUSE_TEST_UNIQUE_NAME="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DATABASE}" -[ -v CLICKHOUSE_CONFIG_CLIENT ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} " -[ -v CLICKHOUSE_HOST ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} " -[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " -[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_BENCHMARK_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " -[ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} " -[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} " -[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) " -[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} " -[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) " +[ -n "${CLICKHOUSE_CONFIG_CLIENT:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} " +[ -n "${CLICKHOUSE_HOST:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} " +[ -n "${CLICKHOUSE_PORT_TCP:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " +[ -n "${CLICKHOUSE_PORT_TCP:-}" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " +[ -n "${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} " +[ -n "${CLICKHOUSE_DATABASE:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} " +[ -n "${CLICKHOUSE_LOG_COMMENT:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) " +[ -n "${CLICKHOUSE_DATABASE:-}" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} " +[ -n "${CLICKHOUSE_LOG_COMMENT:-}" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) " export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"} # client @@ -78,23 +78,23 @@ export CLICKHOUSE_PORT_POSTGRESQL=${CLICKHOUSE_PORT_POSTGRESQL:="9005"} export CLICKHOUSE_PORT_KEEPER=${CLICKHOUSE_PORT_KEEPER:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=keeper_server.tcp_port 2>/dev/null)} 2>/dev/null export CLICKHOUSE_PORT_KEEPER=${CLICKHOUSE_PORT_KEEPER:="9181"} -export CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:=$(echo "${CLICKHOUSE_CLIENT}" | sed 's/--secure //' | sed 's/'"--port=${CLICKHOUSE_PORT_TCP}"'//g; s/$/'"--secure --port=${CLICKHOUSE_PORT_TCP_SECURE}"'/g')} +export CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:=$(echo "${CLICKHOUSE_CLIENT}" | sed 's/--secure //' | sed 's/'"--port=${CLICKHOUSE_PORT_TCP}"'//g; s/$/'"--secure --accept-invalid-certificate --port=${CLICKHOUSE_PORT_TCP_SECURE}"'/g')} # Add database and log comment to url params -if [ -v CLICKHOUSE_URL_PARAMS ] +if [ -n "${CLICKHOUSE_URL_PARAMS:-}" ] then export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&database=${CLICKHOUSE_DATABASE}" else export CLICKHOUSE_URL_PARAMS="database=${CLICKHOUSE_DATABASE}" fi # Note: missing url encoding of the log comment. -[ -v CLICKHOUSE_LOG_COMMENT ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}" +[ -n "${CLICKHOUSE_LOG_COMMENT:-}" ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}" export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"} export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"} # Add url params to url -if [ -v CLICKHOUSE_URL_PARAMS ] +if [ -n "${CLICKHOUSE_URL_PARAMS:-}" ] then export CLICKHOUSE_URL="${CLICKHOUSE_URL}?${CLICKHOUSE_URL_PARAMS}" export CLICKHOUSE_URL_HTTPS="${CLICKHOUSE_URL_HTTPS}?${CLICKHOUSE_URL_PARAMS}" @@ -117,10 +117,10 @@ mkdir -p ${CLICKHOUSE_TMP} export MYSQL_CLIENT_BINARY=${MYSQL_CLIENT_BINARY:="mysql"} export MYSQL_CLIENT_CLICKHOUSE_USER=${MYSQL_CLIENT_CLICKHOUSE_USER:="default"} # Avoids "Can't connect to local MySQL server through socket '/var/run/mysqld/mysqld.sock'" when connecting to localhost -[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --protocol tcp " -[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --host ${CLICKHOUSE_HOST} " -[ -v CLICKHOUSE_PORT_MYSQL ] && MYSQL_CLIENT_OPT0+=" --port ${CLICKHOUSE_PORT_MYSQL} " -[ -v CLICKHOUSE_DATABASE ] && MYSQL_CLIENT_OPT0+=" --database ${CLICKHOUSE_DATABASE} " +[ -n "${CLICKHOUSE_HOST:-}" ] && MYSQL_CLIENT_OPT0+=" --protocol tcp " +[ -n "${CLICKHOUSE_HOST:-}" ] && MYSQL_CLIENT_OPT0+=" --host ${CLICKHOUSE_HOST} " +[ -n "${CLICKHOUSE_PORT_MYSQL:-}" ] && MYSQL_CLIENT_OPT0+=" --port ${CLICKHOUSE_PORT_MYSQL} " +[ -n "${CLICKHOUSE_DATABASE:-}" ] && MYSQL_CLIENT_OPT0+=" --database ${CLICKHOUSE_DATABASE} " MYSQL_CLIENT_OPT0+=" --user ${MYSQL_CLIENT_CLICKHOUSE_USER} " export MYSQL_CLIENT_OPT="${MYSQL_CLIENT_OPT0:-} ${MYSQL_CLIENT_OPT:-}" export MYSQL_CLIENT=${MYSQL_CLIENT:="$MYSQL_CLIENT_BINARY ${MYSQL_CLIENT_OPT:-}"} diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 918174c1d83..63bf2c37ee3 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -502,3 +502,7 @@ zkcopy zlib znodes zstd +NYPD +denormalizing +ConnectionDetails +SelfManaged diff --git a/utils/check-style/check-style b/utils/check-style/check-style index a4810701dee..772f48ad088 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -352,3 +352,8 @@ find $ROOT_PATH | sort -f | uniq -i -c | awk '{ if ($1 > 1) print }' find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | grep -vP $EXCLUDE_DIRS | xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead" + +# Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | + grep -vP $EXCLUDE_DIRS | + xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead" diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list index fc021920041..f331e222541 100644 --- a/utils/check-style/codespell-ignore-words.list +++ b/utils/check-style/codespell-ignore-words.list @@ -21,3 +21,5 @@ rightt iiterator hastable nam +ubuntu +toolchain diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 6f774b3f7aa..65ec5ddec01 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,15 +1,19 @@ +v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.6.71-lts 2022-09-30 v22.8.5.29-lts 2022-09-13 v22.8.4.7-lts 2022-08-31 v22.8.3.13-lts 2022-08-29 v22.8.2.11-lts 2022-08-23 v22.8.1.2097-lts 2022-08-18 +v22.7.6.74-stable 2022-09-30 v22.7.5.13-stable 2022-08-29 v22.7.4.16-stable 2022-08-23 v22.7.3.5-stable 2022-08-10 v22.7.2.15-stable 2022-08-03 v22.7.1.2484-stable 2022-07-21 +v22.6.9.11-stable 2022-10-06 v22.6.8.35-stable 2022-09-19 v22.6.7.7-stable 2022-08-29 v22.6.6.16-stable 2022-08-23 @@ -27,6 +31,7 @@ v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 v22.4.2.1-stable 2022-04-22 +v22.3.13.80-lts 2022-09-30 v22.3.12.19-lts 2022-08-29 v22.3.11.12-lts 2022-08-10 v22.3.10.22-lts 2022-08-03