diff --git a/.github/label-pr.yml b/.github/label-pr.yml deleted file mode 100644 index 4ae73a2e720..00000000000 --- a/.github/label-pr.yml +++ /dev/null @@ -1,2 +0,0 @@ -- regExp: ".*\\.md$" - labels: ["documentation", "pr-documentation"] diff --git a/.github/labeler.keywords.yml b/.github/labeler.keywords.yml new file mode 100644 index 00000000000..c70ea45de53 --- /dev/null +++ b/.github/labeler.keywords.yml @@ -0,0 +1 @@ +pr-feature: "New Feature" diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 00000000000..02d824581c5 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,23 @@ +# Build changes +pr-build: + - "**/CMakeLists.txt" + +# Documentation PRs +documentation: + - "**/*.md" + - "docs/**/*" +pr-documentation: + - "**/*.md" + - "docs/**/*" + +# Component labels +comp-mutations: + - "**/*Mutation*" +comp-matview: + - "**/*MaterializedView*" +comp-skipidx: + - "**/*Indices*" +comp-kafka: + - "dbms/src/Storages/Kafka/**/*" + - "dbms/tests/integration/test_storage_kafka/**/*" + - "utils/kafka/**/*" diff --git a/.github/main.workflow b/.github/main.workflow deleted file mode 100644 index a450195b955..00000000000 --- a/.github/main.workflow +++ /dev/null @@ -1,9 +0,0 @@ -workflow "Main workflow" { - resolves = ["Label PR"] - on = "pull_request" -} - -action "Label PR" { - uses = "decathlon/pull-request-labeler-action@v1.0.0" - secrets = ["GITHUB_TOKEN"] -} diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 00000000000..6b18b043853 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,67 @@ +# Configuration for probot-stale - https://github.com/probot/stale + +# Number of days of inactivity before an Issue or Pull Request becomes stale +daysUntilStale: 45 + +# Number of days of inactivity before an Issue or Pull Request with the stale label is closed. +# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. +daysUntilClose: 30 + +# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) +onlyLabels: [] + +# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable +exemptLabels: + - bug + - feature + - memory + - performance + - prio-crit + - prio-major + - st-accepted + - st-in-progress + - st-waiting-for-fix + +# Set to true to ignore issues in a project (defaults to false) +exemptProjects: false + +# Set to true to ignore issues in a milestone (defaults to false) +exemptMilestones: false + +# Set to true to ignore issues with an assignee (defaults to false) +exemptAssignees: false + +# Label to use when marking as stale +staleLabel: stale + +# Comment to post when marking as stale. Set to `false` to disable +markComment: > + This issue has been automatically marked as stale because it has not had + recent activity. It will be closed if no further activity occurs. Thank you + for your contributions. + +# Comment to post when removing the stale label. +# unmarkComment: > +# Your comment here. + +# Comment to post when closing a stale Issue or Pull Request. +# closeComment: > +# Your comment here. + +# Limit the number of actions per hour, from 1-30. Default is 30 +limitPerRun: 30 + +# Limit to only `issues` or `pulls` +# only: issues + +# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': +pulls: + daysUntilStale: 365 + markComment: > + This pull request has been automatically marked as stale because it has not had + any activity for over a year. It will be closed if no further activity occurs. Thank you + for your contributions. + +# issues: +# exemptLabels: +# - confirmed diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 00000000000..0110ef7b516 --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,11 @@ +name: "Pull Request Labeler" +on: + pull_request + +jobs: + by-filename: + runs-on: ubuntu-latest + steps: + - uses: "actions/labeler@v2" + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.potato.yml b/.potato.yml new file mode 100644 index 00000000000..113bdacbdde --- /dev/null +++ b/.potato.yml @@ -0,0 +1,27 @@ +# This is the configuration file with settings for Potato. +# Potato is an internal Yandex technology that allows us to sync internal [Yandex.Tracker](https://yandex.com/tracker/) and GitHub. + +# For all PRs where documentation is needed, just add a 'pr-feature' label and we will include it into documentation sprints. + +# The project name. +name: clickhouse +# Object handlers defines which handlers we use. +handlers: + # The handler for creating an Yandex.Tracker issue. + - name: issue-create + params: + triggers: + # The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker. + github:pullRequest:labeled: + data: + # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues. + queue: CLICKHOUSEDOCS + # The issue title. + summary: '[Potato] Pull Request #{{pullRequest.number}}' + # The issue description. + description: > + {{pullRequest.description}} + + Ссылка на Pull Request: {{pullRequest.webUrl}} + # The condition for creating the Yandex.Tracker issue. + condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f196d1e414..53021dbe666 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,16 +118,16 @@ endif () option (ENABLE_TESTS "Enables tests" ON) -if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") +if (ARCH_AMD64) option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) +endif () - if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") - option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) - endif () +if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") + option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) +endif () - if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0") - message (WARNING "CMake version must be greater than 3.9.0 for production builds.") - endif () +if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0") + message (WARNING "CMake version must be greater than 3.9.0 for production builds.") endif () # Make sure the final executable has symbols exported diff --git a/README.md b/README.md index 71925542daf..83cf3e9adbc 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,6 @@ ClickHouse is an open-source column-oriented database management system that all * You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person. ## Upcoming Events -* [ClickHouse Meetup in Hong Kong](https://www.meetup.com/Hong-Kong-Machine-Learning-Meetup/events/263580542/) on October 17. -* [ClickHouse Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20. * [ClickHouse Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27. * [ClickHouse Meetup in Tokyo](https://clickhouse.connpass.com/event/147001/) on November 14. * [ClickHouse Meetup in Istanbul](https://www.eventbrite.com/e/clickhouse-meetup-istanbul-create-blazing-fast-experiences-w-clickhouse-tickets-73101120419) on November 19. diff --git a/cmake/arch.cmake b/cmake/arch.cmake index deaa7a36eb4..f339236af64 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -1,3 +1,6 @@ +if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") + set (ARCH_AMD64 1) +endif () if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") set (ARCH_AARCH64 1) endif () diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 43875c9d07f..5b420246168 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -45,9 +45,15 @@ endif () add_library(jemalloc STATIC ${SRCS}) -target_include_directories(jemalloc PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include_linux_x86_64) # jemalloc.h +target_include_directories(jemalloc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + +if (ARCH_AMD64) + target_include_directories(jemalloc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include_linux_x86_64) +elseif (ARCH_ARM) + target_include_directories(jemalloc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include_linux_aarch64) +else () + message (FATAL_ERROR "jemalloc can only be used on x86_64 or aarch64.") +endif () target_include_directories(jemalloc PRIVATE ${JEMALLOC_SOURCE_DIR}/include) diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/README b/contrib/jemalloc-cmake/include_linux_aarch64/README new file mode 100644 index 00000000000..2ab582803a2 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_aarch64/README @@ -0,0 +1,7 @@ +Here are pre-generated files from jemalloc on Linux aarch64. +You can obtain these files by running ./autogen.sh inside jemalloc source directory. + +Added #define GNU_SOURCE +Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not. +Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard. +Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5. diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h new file mode 100644 index 00000000000..0e1cf49ad97 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h @@ -0,0 +1,382 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ + + +#ifndef _GNU_SOURCE + #define _GNU_SOURCE +#endif + +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#define JEMALLOC_OVERRIDE___LIBC_CALLOC +#define JEMALLOC_OVERRIDE___LIBC_FREE +#define JEMALLOC_OVERRIDE___LIBC_MALLOC +#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN +#define JEMALLOC_OVERRIDE___LIBC_REALLOC +#define JEMALLOC_OVERRIDE___LIBC_VALLOC +#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS 1 + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS 1 + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS 1 + +/* + * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and + * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the + * functions are defined in libgcc instead of being inlines). + */ +/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ + +/* + * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and + * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the + * functions are defined in libgcc instead of being inlines). + */ +/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* + * Defined if OSSpin*() functions are available, as provided by Darwin, and + * documented in the spinlock(3) manual page. + */ +/* #undef JEMALLOC_OSSPIN */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +#define JEMALLOC_HAVE_SECURE_GETENV + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1 + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1 + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 16 + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 29 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H 1 + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT 1 + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#define JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#define JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD 1 + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC 1 + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h new file mode 100644 index 00000000000..c150785fb4a --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h @@ -0,0 +1,194 @@ +#ifndef JEMALLOC_PREAMBLE_H +#define JEMALLOC_PREAMBLE_H + +#include "jemalloc_internal_defs.h" +#include "jemalloc/internal/jemalloc_internal_decls.h" + +#ifdef JEMALLOC_UTRACE +#include +#endif + +#define JEMALLOC_NO_DEMANGLE +#ifdef JEMALLOC_JET +# undef JEMALLOC_IS_MALLOC +# define JEMALLOC_N(n) jet_##n +# include "jemalloc/internal/public_namespace.h" +# define JEMALLOC_NO_RENAME +# include "jemalloc/jemalloc.h" +# undef JEMALLOC_NO_RENAME +#else +# define JEMALLOC_N(n) je_##n +# include "jemalloc/jemalloc.h" +#endif + +#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#include +#endif + +#ifdef JEMALLOC_ZONE +#include +#include +#include +#endif + +#include "jemalloc/internal/jemalloc_internal_macros.h" + +/* + * Note that the ordering matters here; the hook itself is name-mangled. We + * want the inclusion of hooks to happen early, so that we hook as much as + * possible. + */ +#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE +# ifndef JEMALLOC_JET +# include "jemalloc/internal/private_namespace.h" +# else +# include "jemalloc/internal/private_namespace_jet.h" +# endif +#endif +#include "jemalloc/internal/test_hooks.h" + +#ifdef JEMALLOC_DEFINE_MADVISE_FREE +# define JEMALLOC_MADV_FREE 8 +#endif + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool have_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool have_madvise_huge = +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool maps_coalesce = +#ifdef JEMALLOC_MAPS_COALESCE + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_cache_oblivious = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + true +#else + false +#endif + ; +/* + * Undocumented, for jemalloc development use only at the moment. See the note + * in jemalloc/internal/log.h. + */ +static const bool config_log = +#ifdef JEMALLOC_LOG + true +#else + false +#endif + ; +#ifdef JEMALLOC_HAVE_SCHED_GETCPU +/* Currently percpu_arena depends on sched_getcpu. */ +#define JEMALLOC_PERCPU_ARENA +#endif +static const bool have_percpu_arena = +#ifdef JEMALLOC_PERCPU_ARENA + true +#else + false +#endif + ; +/* + * Undocumented, and not recommended; the application should take full + * responsibility for tracking provenance. + */ +static const bool force_ivsalloc = +#ifdef JEMALLOC_FORCE_IVSALLOC + true +#else + false +#endif + ; +static const bool have_background_thread = +#ifdef JEMALLOC_BACKGROUND_THREAD + true +#else + false +#endif + ; + +#endif /* JEMALLOC_PREAMBLE_H */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h new file mode 100644 index 00000000000..d1389237a77 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h @@ -0,0 +1,43 @@ +/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ +/* Defined if __attribute__((...)) syntax is supported. */ +#define JEMALLOC_HAVE_ATTR + +/* Defined if alloc_size attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE + +/* Defined if format(printf, ...) attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#define JEMALLOC_OVERRIDE_MEMALIGN +#define JEMALLOC_OVERRIDE_VALLOC + +/* + * At least Linux omits the "const" in: + * + * size_t malloc_usable_size(const void *ptr); + * + * Match the operating system's prototype. + */ +#define JEMALLOC_USABLE_SIZE_CONST + +/* + * If defined, specify throw() for the public function prototypes when compiling + * with C++. The only justification for this is to match the prototypes that + * glibc defines. + */ +#define JEMALLOC_USE_CXX_THROW + +#ifdef _MSC_VER +# ifdef _WIN64 +# define LG_SIZEOF_PTR_WIN 3 +# else +# define LG_SIZEOF_PTR_WIN 2 +# endif +#endif + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#define LG_SIZEOF_PTR 3 diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h new file mode 100644 index 00000000000..79b13337fbb --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include + +#define JEMALLOC_VERSION "5.1.0-97-gcd2931ad9bbd78208565716ab102e86d858c2fff" +#define JEMALLOC_VERSION_MAJOR 5 +#define JEMALLOC_VERSION_MINOR 1 +#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION_NREV 97 +#define JEMALLOC_VERSION_GID "cd2931ad9bbd78208565716ab102e86d858c2fff" +#define JEMALLOC_VERSION_GID_IDENT cd2931ad9bbd78208565716ab102e86d858c2fff + +#define MALLOCX_LG_ALIGN(la) ((int)(la)) +#if LG_SIZEOF_PTR == 2 +# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) +#else +# define MALLOCX_ALIGN(a) \ + ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ + ffs((int)(((size_t)(a))>>32))+31)) +#endif +#define MALLOCX_ZERO ((int)0x40) +/* + * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 + * encodes MALLOCX_TCACHE_NONE. + */ +#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) +#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +/* + * Bias arena index bits so that 0 encodes "use an automatically chosen arena". + */ +#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) + +/* + * Use as arena index in "arena..{purge,decay,dss}" and + * "stats.arenas..*" mallctl interfaces to select all arenas. This + * definition is intentionally specified in raw decimal format to support + * cpp-based string concatenation, e.g. + * + * #define STRINGIFY_HELPER(x) #x + * #define STRINGIFY(x) STRINGIFY_HELPER(x) + * + * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, + * 0); + */ +#define MALLCTL_ARENAS_ALL 4096 +/* + * Use as arena index in "stats.arenas..*" mallctl interfaces to select + * destroyed arenas. + */ +#define MALLCTL_ARENAS_DESTROYED 4097 + +#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) +# define JEMALLOC_CXX_THROW throw() +#else +# define JEMALLOC_CXX_THROW +#endif + +#if defined(_MSC_VER) +# define JEMALLOC_ATTR(s) +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# ifndef JEMALLOC_EXPORT +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif +# endif +# define JEMALLOC_FORMAT_PRINTF(s, i) +# define JEMALLOC_NOINLINE __declspec(noinline) +# ifdef __cplusplus +# define JEMALLOC_NOTHROW __declspec(nothrow) +# else +# define JEMALLOC_NOTHROW +# endif +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_RESTRICT_RETURN __declspec(restrict) +# if _MSC_VER >= 1900 && !defined(__EDG__) +# define JEMALLOC_ALLOCATOR __declspec(allocator) +# else +# define JEMALLOC_ALLOCATOR +# endif +#elif defined(JEMALLOC_HAVE_ATTR) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +# else +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# endif +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +# else +# define JEMALLOC_FORMAT_PRINTF(s, i) +# endif +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR +#else +# define JEMALLOC_ATTR(s) +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# define JEMALLOC_EXPORT +# define JEMALLOC_FORMAT_PRINTF(s, i) +# define JEMALLOC_NOINLINE +# define JEMALLOC_NOTHROW +# define JEMALLOC_SECTION(s) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR +#endif diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h new file mode 100644 index 00000000000..ff025e30fa7 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h @@ -0,0 +1,66 @@ +/* + * The je_ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). + */ +extern JEMALLOC_EXPORT const char *je_malloc_conf; +extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, + const char *s); + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_malloc(size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_calloc(size_t num, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_posix_memalign(void **memptr, + size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_aligned_alloc(size_t alignment, + size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) + JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_realloc(void *ptr, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) + JEMALLOC_CXX_THROW; + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_mallocx(size_t size, int flags) + JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_rallocx(void *ptr, size_t size, + int flags) JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void *ptr, size_t size, + size_t extra, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void *ptr, + int flags) JEMALLOC_ATTR(pure); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void *ptr, size_t size, + int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) + JEMALLOC_ATTR(pure); + +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, + size_t *mibp, size_t *miblenp); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( + void (*write_cb)(void *, const char *), void *je_cbopaque, + const char *opts); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW; + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_memalign(size_t alignment, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_valloc(size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(malloc); +#endif diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h new file mode 100644 index 00000000000..1a58874306e --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h @@ -0,0 +1,77 @@ +typedef struct extent_hooks_s extent_hooks_t; + +/* + * void * + * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, + * size_t alignment, bool *zero, bool *commit, unsigned arena_ind); + */ +typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, + bool *, unsigned); + +/* + * bool + * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size, + * bool committed, unsigned arena_ind); + */ +typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, + unsigned); + +/* + * void + * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, + * bool committed, unsigned arena_ind); + */ +typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, + unsigned); + +/* + * bool + * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); + */ +typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + unsigned); + +/* + * bool + * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); + */ +typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, + size_t, unsigned); + +/* + * bool + * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); + */ +typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + unsigned); + +/* + * bool + * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t size_a, size_t size_b, bool committed, unsigned arena_ind); + */ +typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + bool, unsigned); + +/* + * bool + * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, + * void *addr_b, size_t size_b, bool committed, unsigned arena_ind); + */ +typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, + bool, unsigned); + +struct extent_hooks_s { + extent_alloc_t *alloc; + extent_dalloc_t *dalloc; + extent_destroy_t *destroy; + extent_commit_t *commit; + extent_decommit_t *decommit; + extent_purge_t *purge_lazy; + extent_purge_t *purge_forced; + extent_split_t *split; + extent_merge_t *merge; +}; diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 64dc83fa8b6..fc1ba23f7c1 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -62,6 +62,7 @@ set(SRCS ) add_library(rdkafka ${SRCS}) +target_compile_options(rdkafka PRIVATE -fno-sanitize=undefined) target_include_directories(rdkafka SYSTEM PUBLIC include) target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used. target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common) # Because wrong path to "zstd_errors.h" is used. diff --git a/contrib/murmurhash/src/murmurhash2.cpp b/contrib/murmurhash/src/murmurhash2.cpp index 8a41ba02d98..7b659f50b4c 100644 --- a/contrib/murmurhash/src/murmurhash2.cpp +++ b/contrib/murmurhash/src/murmurhash2.cpp @@ -13,6 +13,7 @@ // machines. #include "murmurhash2.h" +#include // Platform-specific functions and macros // Microsoft Visual Studio @@ -48,7 +49,8 @@ uint32_t MurmurHash2(const void * key, int len, uint32_t seed) while (len >= 4) { - uint32_t k = *reinterpret_cast(data); + uint32_t k; + memcpy(&k, data, sizeof(k)); k *= m; k ^= k >> r; k *= m; @@ -418,4 +420,4 @@ uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed) return h; } -} \ No newline at end of file +} diff --git a/contrib/murmurhash/src/murmurhash3.cpp b/contrib/murmurhash/src/murmurhash3.cpp index 2831bf5c73b..d6062340d03 100644 --- a/contrib/murmurhash/src/murmurhash3.cpp +++ b/contrib/murmurhash/src/murmurhash3.cpp @@ -7,6 +7,7 @@ // non-native version will be less than optimal. #include "murmurhash3.h" +#include //----------------------------------------------------------------------------- // Platform-specific functions and macros @@ -53,7 +54,9 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r ) FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) { - return p[i]; + uint32_t res; + memcpy(&res, p + i, sizeof(res)); + return res; } FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 229639a8a7f..ec9ffc6e3dd 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -164,6 +164,7 @@ macro(add_object_library name common_path) endif () endmacro() +add_object_library(clickhouse_access src/Access) add_object_library(clickhouse_core src/Core) add_object_library(clickhouse_compression src/Compression) add_object_library(clickhouse_datastreams src/DataStreams) diff --git a/dbms/programs/benchmark/Benchmark.cpp b/dbms/programs/benchmark/Benchmark.cpp index e685425eefc..195f8c01270 100644 --- a/dbms/programs/benchmark/Benchmark.cpp +++ b/dbms/programs/benchmark/Benchmark.cpp @@ -274,15 +274,24 @@ private: pcg64 generator(randomSeed()); std::uniform_int_distribution distribution(0, queries.size() - 1); - for (size_t i = 0; i < concurrency; ++i) + try { - EntryPtrs connection_entries; - connection_entries.reserve(connections.size()); + for (size_t i = 0; i < concurrency; ++i) + { + EntryPtrs connection_entries; + connection_entries.reserve(connections.size()); - for (const auto & connection : connections) - connection_entries.emplace_back(std::make_shared(connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)))); + for (const auto & connection : connections) + connection_entries.emplace_back(std::make_shared( + connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)))); - pool.schedule(std::bind(&Benchmark::thread, this, connection_entries)); + pool.scheduleOrThrowOnError(std::bind(&Benchmark::thread, this, connection_entries)); + } + } + catch (...) + { + pool.wait(); + throw; } InterruptListener interrupt_listener; diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 7ac03dfdd3b..5fc1d76b542 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -895,7 +895,7 @@ public: ThreadPool thread_pool(num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores()); for (const TaskShardPtr & task_shard : task_table.all_shards) - thread_pool.schedule([this, timeouts, task_shard]() { discoverShardPartitions(timeouts, task_shard); }); + thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]() { discoverShardPartitions(timeouts, task_shard); }); LOG_DEBUG(log, "Waiting for " << thread_pool.active() << " setup jobs"); thread_pool.wait(); @@ -2038,7 +2038,7 @@ protected: ThreadPool thread_pool(std::min(num_shards, getNumberOfPhysicalCPUCores())); for (UInt64 shard_index = 0; shard_index < num_shards; ++shard_index) - thread_pool.schedule([=] { do_for_shard(shard_index); }); + thread_pool.scheduleOrThrowOnError([=] { do_for_shard(shard_index); }); thread_pool.wait(); } diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index f4eac1baec2..c3dfcacf3f3 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -19,8 +19,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -221,14 +221,6 @@ catch (const Exception & e) } -inline String getQuotedString(const String & s) -{ - WriteBufferFromOwnString buf; - writeQuotedString(s, buf); - return buf.str(); -} - - std::string LocalServer::getInitialCreateTableQuery() { if (!config().has("table-structure")) @@ -241,7 +233,7 @@ std::string LocalServer::getInitialCreateTableQuery() if (!config().has("table-file") || config().getString("table-file") == "-") /// Use Unix tools stdin naming convention table_file = "stdin"; else /// Use regular file - table_file = getQuotedString(config().getString("table-file")); + table_file = quoteString(config().getString("table-file")); return "CREATE TABLE " + table_name + diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 1b495552fbc..f7429ebf2a7 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -46,7 +46,7 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so , connection_id(connection_id_) , public_key(public_key_) , private_key(private_key_) - , auth_plugin(new Authentication::Native41()) + , auth_plugin(new MySQLProtocol::Authentication::Native41()) { server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; if (ssl_enabled) @@ -231,8 +231,8 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl { // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible (if password is specified using double SHA1). Otherwise SHA256 plugin is used. auto user = connection_context.getUser(user_name); - if (user->password_double_sha1_hex.empty()) - auth_plugin = std::make_unique(public_key, private_key, log); + if (user->authentication.getType() != DB::Authentication::DOUBLE_SHA1_PASSWORD) + auth_plugin = std::make_unique(public_key, private_key, log); try { std::optional auth_response = auth_plugin_name == auth_plugin->getName() ? std::make_optional(initial_auth_response) : std::nullopt; diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 58ef28d0c03..83749975f34 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -565,7 +565,7 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads) auto executor = pipeline.execute(); std::atomic_bool exception = false; - pool.schedule([&]() + pool.scheduleOrThrowOnError([&]() { /// ThreadStatus thread_status; diff --git a/dbms/src/Access/AllowedClientHosts.cpp b/dbms/src/Access/AllowedClientHosts.cpp new file mode 100644 index 00000000000..4016d0ce00f --- /dev/null +++ b/dbms/src/Access/AllowedClientHosts.cpp @@ -0,0 +1,397 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int DNS_ERROR; + extern const int IP_ADDRESS_NOT_ALLOWED; +} + +namespace +{ + using IPAddress = Poco::Net::IPAddress; + + const AllowedClientHosts::IPSubnet ALL_ADDRESSES = AllowedClientHosts::IPSubnet{IPAddress{IPAddress::IPv6}, IPAddress{IPAddress::IPv6}}; + + IPAddress toIPv6(const IPAddress & addr) + { + if (addr.family() == IPAddress::IPv6) + return addr; + + return IPAddress("::FFFF:" + addr.toString()); + } + + + IPAddress maskToIPv6(const IPAddress & mask) + { + if (mask.family() == IPAddress::IPv6) + return mask; + + return IPAddress(96, IPAddress::IPv6) | toIPv6(mask); + } + + + bool isAddressOfHostImpl(const IPAddress & address, const String & host) + { + IPAddress addr_v6 = toIPv6(address); + + /// Resolve by hand, because Poco don't use AI_ALL flag but we need it. + addrinfo * ai = nullptr; + SCOPE_EXIT( + { + if (ai) + freeaddrinfo(ai); + }); + + addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_flags |= AI_V4MAPPED | AI_ALL; + + int ret = getaddrinfo(host.c_str(), nullptr, &hints, &ai); + if (0 != ret) + throw Exception("Cannot getaddrinfo: " + std::string(gai_strerror(ret)), ErrorCodes::DNS_ERROR); + + for (; ai != nullptr; ai = ai->ai_next) + { + if (ai->ai_addrlen && ai->ai_addr) + { + if (ai->ai_family == AF_INET6) + { + if (addr_v6 == IPAddress( + &reinterpret_cast(ai->ai_addr)->sin6_addr, sizeof(in6_addr), + reinterpret_cast(ai->ai_addr)->sin6_scope_id)) + { + return true; + } + } + else if (ai->ai_family == AF_INET) + { + if (addr_v6 == toIPv6(IPAddress(&reinterpret_cast(ai->ai_addr)->sin_addr, sizeof(in_addr)))) + { + return true; + } + } + } + } + + return false; + } + + + /// Cached version of isAddressOfHostImpl(). We need to cache DNS requests. + bool isAddressOfHost(const IPAddress & address, const String & host) + { + static SimpleCache cache; + return cache(address, host); + } + + + String getHostByAddressImpl(const IPAddress & address) + { + Poco::Net::SocketAddress sock_addr(address, 0); + + /// Resolve by hand, because Poco library doesn't have such functionality. + char host[1024]; + int gai_errno = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD); + if (0 != gai_errno) + throw Exception("Cannot getnameinfo: " + std::string(gai_strerror(gai_errno)), ErrorCodes::DNS_ERROR); + + /// Check that PTR record is resolved back to client address + if (!isAddressOfHost(address, host)) + throw Exception("Host " + String(host) + " isn't resolved back to " + address.toString(), ErrorCodes::DNS_ERROR); + return host; + } + + + /// Cached version of getHostByAddressImpl(). We need to cache DNS requests. + String getHostByAddress(const IPAddress & address) + { + static SimpleCache cache; + return cache(address); + } +} + + +String AllowedClientHosts::IPSubnet::toString() const +{ + unsigned int prefix_length = mask.prefixLength(); + if (IPAddress{prefix_length, mask.family()} == mask) + return prefix.toString() + "/" + std::to_string(prefix_length); + + return prefix.toString() + "/" + mask.toString(); +} + + +AllowedClientHosts::AllowedClientHosts() +{ +} + + +AllowedClientHosts::AllowedClientHosts(AllAddressesTag) +{ + addAllAddresses(); +} + + +AllowedClientHosts::~AllowedClientHosts() = default; + + +AllowedClientHosts::AllowedClientHosts(const AllowedClientHosts & src) +{ + *this = src; +} + + +AllowedClientHosts & AllowedClientHosts::operator =(const AllowedClientHosts & src) +{ + addresses = src.addresses; + subnets = src.subnets; + host_names = src.host_names; + host_regexps = src.host_regexps; + compiled_host_regexps.clear(); + return *this; +} + + +AllowedClientHosts::AllowedClientHosts(AllowedClientHosts && src) +{ + *this = src; +} + + +AllowedClientHosts & AllowedClientHosts::operator =(AllowedClientHosts && src) +{ + addresses = std::move(src.addresses); + subnets = std::move(src.subnets); + host_names = std::move(src.host_names); + host_regexps = std::move(src.host_regexps); + compiled_host_regexps = std::move(src.compiled_host_regexps); + return *this; +} + + +void AllowedClientHosts::clear() +{ + addresses.clear(); + subnets.clear(); + host_names.clear(); + host_regexps.clear(); + compiled_host_regexps.clear(); +} + + +bool AllowedClientHosts::empty() const +{ + return addresses.empty() && subnets.empty() && host_names.empty() && host_regexps.empty(); +} + + +void AllowedClientHosts::addAddress(const IPAddress & address) +{ + IPAddress addr_v6 = toIPv6(address); + if (boost::range::find(addresses, addr_v6) == addresses.end()) + addresses.push_back(addr_v6); +} + + +void AllowedClientHosts::addAddress(const String & address) +{ + addAddress(IPAddress{address}); +} + + +void AllowedClientHosts::addSubnet(const IPSubnet & subnet) +{ + IPSubnet subnet_v6; + subnet_v6.prefix = toIPv6(subnet.prefix); + subnet_v6.mask = maskToIPv6(subnet.mask); + + if (subnet_v6.mask == IPAddress(128, IPAddress::IPv6)) + { + addAddress(subnet_v6.prefix); + return; + } + + subnet_v6.prefix = subnet_v6.prefix & subnet_v6.mask; + + if (boost::range::find(subnets, subnet_v6) == subnets.end()) + subnets.push_back(subnet_v6); +} + + +void AllowedClientHosts::addSubnet(const IPAddress & prefix, const IPAddress & mask) +{ + addSubnet(IPSubnet{prefix, mask}); +} + + +void AllowedClientHosts::addSubnet(const IPAddress & prefix, size_t num_prefix_bits) +{ + addSubnet(prefix, IPAddress(num_prefix_bits, prefix.family())); +} + + +void AllowedClientHosts::addSubnet(const String & subnet) +{ + size_t slash = subnet.find('/'); + if (slash == String::npos) + { + addAddress(subnet); + return; + } + + IPAddress prefix{String{subnet, 0, slash}}; + String mask(subnet, slash + 1, subnet.length() - slash - 1); + if (std::all_of(mask.begin(), mask.end(), isNumericASCII)) + addSubnet(prefix, parseFromString(mask)); + else + addSubnet(prefix, IPAddress{mask}); +} + + +void AllowedClientHosts::addHostName(const String & host_name) +{ + if (boost::range::find(host_names, host_name) == host_names.end()) + host_names.push_back(host_name); +} + + +void AllowedClientHosts::addHostRegexp(const String & host_regexp) +{ + if (boost::range::find(host_regexps, host_regexp) == host_regexps.end()) + host_regexps.push_back(host_regexp); +} + + +void AllowedClientHosts::addAllAddresses() +{ + clear(); + addSubnet(ALL_ADDRESSES); +} + + +bool AllowedClientHosts::containsAllAddresses() const +{ + return (boost::range::find(subnets, ALL_ADDRESSES) != subnets.end()) + || (boost::range::find(host_regexps, ".*") != host_regexps.end()) + || (boost::range::find(host_regexps, "$") != host_regexps.end()); +} + + +bool AllowedClientHosts::contains(const IPAddress & address) const +{ + return containsImpl(address, String(), nullptr); +} + + +void AllowedClientHosts::checkContains(const IPAddress & address, const String & user_name) const +{ + String error; + if (!containsImpl(address, user_name, &error)) + throw Exception(error, ErrorCodes::IP_ADDRESS_NOT_ALLOWED); +} + + +bool AllowedClientHosts::containsImpl(const IPAddress & address, const String & user_name, String * error) const +{ + if (error) + error->clear(); + + /// Check `ip_addresses`. + IPAddress addr_v6 = toIPv6(address); + if (boost::range::find(addresses, addr_v6) != addresses.end()) + return true; + + /// Check `ip_subnets`. + for (const auto & subnet : subnets) + if ((addr_v6 & subnet.mask) == subnet.prefix) + return true; + + /// Check `hosts`. + for (const String & host_name : host_names) + { + try + { + if (isAddressOfHost(address, host_name)) + return true; + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::DNS_ERROR) + e.rethrow(); + + /// Try to ignore DNS errors: if host cannot be resolved, skip it and try next. + LOG_WARNING( + &Logger::get("AddressPatterns"), + "Failed to check if the allowed client hosts contain address " << address.toString() << ". " << e.displayText() + << ", code = " << e.code()); + } + } + + /// Check `host_regexps`. + if (!host_regexps.empty()) + { + compileRegexps(); + try + { + String resolved_host = getHostByAddress(address); + for (const auto & compiled_regexp : compiled_host_regexps) + { + if (compiled_regexp && compiled_regexp->match(resolved_host)) + return true; + } + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::DNS_ERROR) + e.rethrow(); + + /// Try to ignore DNS errors: if host cannot be resolved, skip it and try next. + LOG_WARNING( + &Logger::get("AddressPatterns"), + "Failed to check if the allowed client hosts contain address " << address.toString() << ". " << e.displayText() + << ", code = " << e.code()); + } + } + + if (error) + { + if (user_name.empty()) + *error = "It's not allowed to connect from address " + address.toString(); + else + *error = "User " + user_name + " is not allowed to connect from address " + address.toString(); + } + return false; +} + + +void AllowedClientHosts::compileRegexps() const +{ + if (compiled_host_regexps.size() == host_regexps.size()) + return; + size_t old_size = compiled_host_regexps.size(); + compiled_host_regexps.reserve(host_regexps.size()); + for (size_t i = old_size; i != host_regexps.size(); ++i) + compiled_host_regexps.emplace_back(std::make_unique(host_regexps[i])); +} + + +bool operator ==(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) +{ + return (lhs.addresses == rhs.addresses) && (lhs.subnets == rhs.subnets) && (lhs.host_names == rhs.host_names) + && (lhs.host_regexps == rhs.host_regexps); +} +} diff --git a/dbms/src/Access/AllowedClientHosts.h b/dbms/src/Access/AllowedClientHosts.h new file mode 100644 index 00000000000..fea797c2aa4 --- /dev/null +++ b/dbms/src/Access/AllowedClientHosts.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include +#include + + +namespace Poco +{ +class RegularExpression; +} + + +namespace DB +{ +/// Represents lists of hosts an user is allowed to connect to server from. +class AllowedClientHosts +{ +public: + using IPAddress = Poco::Net::IPAddress; + + struct IPSubnet + { + IPAddress prefix; + IPAddress mask; + + String toString() const; + + friend bool operator ==(const IPSubnet & lhs, const IPSubnet & rhs) { return (lhs.prefix == rhs.prefix) && (lhs.mask == rhs.mask); } + friend bool operator !=(const IPSubnet & lhs, const IPSubnet & rhs) { return !(lhs == rhs); } + }; + + struct AllAddressesTag {}; + + AllowedClientHosts(); + explicit AllowedClientHosts(AllAddressesTag); + ~AllowedClientHosts(); + + AllowedClientHosts(const AllowedClientHosts & src); + AllowedClientHosts & operator =(const AllowedClientHosts & src); + AllowedClientHosts(AllowedClientHosts && src); + AllowedClientHosts & operator =(AllowedClientHosts && src); + + /// Removes all contained addresses. This will disallow all addresses. + void clear(); + bool empty() const; + + /// Allows exact IP address. + /// For example, 213.180.204.3 or 2a02:6b8::3 + void addAddress(const IPAddress & address); + void addAddress(const String & address); + + /// Allows an IP subnet. + void addSubnet(const IPSubnet & subnet); + void addSubnet(const String & subnet); + + /// Allows an IP subnet. + /// For example, 312.234.1.1/255.255.255.0 or 2a02:6b8::3/FFFF:FFFF:FFFF:FFFF:: + void addSubnet(const IPAddress & prefix, const IPAddress & mask); + + /// Allows an IP subnet. + /// For example, 10.0.0.1/8 or 2a02:6b8::3/64 + void addSubnet(const IPAddress & prefix, size_t num_prefix_bits); + + /// Allows all addresses. + void addAllAddresses(); + + /// Allows an exact host. The `contains()` function will check that the provided address equals to one of that host's addresses. + void addHostName(const String & host_name); + + /// Allows a regular expression for the host. + void addHostRegexp(const String & host_regexp); + + const std::vector & getAddresses() const { return addresses; } + const std::vector & getSubnets() const { return subnets; } + const std::vector & getHostNames() const { return host_names; } + const std::vector & getHostRegexps() const { return host_regexps; } + + /// Checks if the provided address is in the list. Returns false if not. + bool contains(const IPAddress & address) const; + + /// Checks if any address is allowed. + bool containsAllAddresses() const; + + /// Checks if the provided address is in the list. Throws an exception if not. + /// `username` is only used for generating an error message if the address isn't in the list. + void checkContains(const IPAddress & address, const String & user_name = String()) const; + + friend bool operator ==(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs); + friend bool operator !=(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) { return !(lhs == rhs); } + +private: + bool containsImpl(const IPAddress & address, const String & user_name, String * error) const; + void compileRegexps() const; + + std::vector addresses; + std::vector subnets; + std::vector host_names; + std::vector host_regexps; + mutable std::vector> compiled_host_regexps; +}; +} diff --git a/dbms/src/Access/Authentication.cpp b/dbms/src/Access/Authentication.cpp new file mode 100644 index 00000000000..5b641e2906e --- /dev/null +++ b/dbms/src/Access/Authentication.cpp @@ -0,0 +1,207 @@ +#include +#include +#include +#include +#include +#include +#include "config_core.h" +#if USE_SSL +# include +#endif + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; + extern const int REQUIRED_PASSWORD; + extern const int WRONG_PASSWORD; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + + +namespace +{ + using Digest = Authentication::Digest; + + Digest encodePlainText(const StringRef & text) + { + return Digest(text.data, text.data + text.size); + } + + Digest encodeSHA256(const StringRef & text) + { +#if USE_SSL + Digest hash; + hash.resize(32); + SHA256_CTX ctx; + SHA256_Init(&ctx); + SHA256_Update(&ctx, reinterpret_cast(text.data), text.size); + SHA256_Final(hash.data(), &ctx); + return hash; +#else + UNUSED(text); + throw DB::Exception("SHA256 passwords support is disabled, because ClickHouse was built without SSL library", DB::ErrorCodes::SUPPORT_IS_DISABLED); +#endif + } + + Digest encodeSHA1(const StringRef & text) + { + Poco::SHA1Engine engine; + engine.update(text.data, text.size); + return engine.digest(); + } + + Digest encodeSHA1(const Digest & text) + { + return encodeSHA1(StringRef{reinterpret_cast(text.data()), text.size()}); + } + + Digest encodeDoubleSHA1(const StringRef & text) + { + return encodeSHA1(encodeSHA1(text)); + } +} + + +Authentication::Authentication(Authentication::Type type_) + : type(type_) +{ +} + + +void Authentication::setPassword(const String & password_) +{ + switch (type) + { + case NO_PASSWORD: + throw Exception("Cannot specify password for the 'NO_PASSWORD' authentication type", ErrorCodes::LOGICAL_ERROR); + + case PLAINTEXT_PASSWORD: + setPasswordHashBinary(encodePlainText(password_)); + return; + + case SHA256_PASSWORD: + setPasswordHashBinary(encodeSHA256(password_)); + return; + + case DOUBLE_SHA1_PASSWORD: + setPasswordHashBinary(encodeDoubleSHA1(password_)); + return; + } + throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); +} + + +String Authentication::getPassword() const +{ + if (type != PLAINTEXT_PASSWORD) + throw Exception("Cannot decode the password", ErrorCodes::LOGICAL_ERROR); + return String(password_hash.data(), password_hash.data() + password_hash.size()); +} + + +void Authentication::setPasswordHashHex(const String & hash) +{ + Digest digest; + digest.resize(hash.size() / 2); + boost::algorithm::unhex(hash.begin(), hash.end(), digest.data()); + setPasswordHashBinary(digest); +} + + +String Authentication::getPasswordHashHex() const +{ + String hex; + hex.resize(password_hash.size() * 2); + boost::algorithm::hex(password_hash.begin(), password_hash.end(), hex.data()); + return hex; +} + + +void Authentication::setPasswordHashBinary(const Digest & hash) +{ + switch (type) + { + case NO_PASSWORD: + throw Exception("Cannot specify password for the 'NO_PASSWORD' authentication type", ErrorCodes::LOGICAL_ERROR); + + case PLAINTEXT_PASSWORD: + { + password_hash = hash; + return; + } + + case SHA256_PASSWORD: + { + if (hash.size() != 32) + throw Exception( + "Password hash for the 'SHA256_PASSWORD' authentication type has length " + std::to_string(hash.size()) + + " but must be exactly 32 bytes.", + ErrorCodes::BAD_ARGUMENTS); + password_hash = hash; + return; + } + + case DOUBLE_SHA1_PASSWORD: + { + if (hash.size() != 20) + throw Exception( + "Password hash for the 'DOUBLE_SHA1_PASSWORD' authentication type has length " + std::to_string(hash.size()) + + " but must be exactly 20 bytes.", + ErrorCodes::BAD_ARGUMENTS); + password_hash = hash; + return; + } + } + throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); +} + + +bool Authentication::isCorrectPassword(const String & password_) const +{ + switch (type) + { + case NO_PASSWORD: + return true; + + case PLAINTEXT_PASSWORD: + return password_ == StringRef{reinterpret_cast(password_hash.data()), password_hash.size()}; + + case SHA256_PASSWORD: + return encodeSHA256(password_) == password_hash; + + case DOUBLE_SHA1_PASSWORD: + { + auto first_sha1 = encodeSHA1(password_); + + /// If it was MySQL compatibility server, then first_sha1 already contains double SHA1. + if (first_sha1 == password_hash) + return true; + + return encodeSHA1(first_sha1) == password_hash; + } + } + throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); +} + + +void Authentication::checkPassword(const String & password_, const String & user_name) const +{ + if (isCorrectPassword(password_)) + return; + auto info_about_user_name = [&user_name]() { return user_name.empty() ? String() : " for user " + user_name; }; + if (password_.empty() && (type != NO_PASSWORD)) + throw Exception("Password required" + info_about_user_name(), ErrorCodes::REQUIRED_PASSWORD); + throw Exception("Wrong password" + info_about_user_name(), ErrorCodes::WRONG_PASSWORD); +} + + +bool operator ==(const Authentication & lhs, const Authentication & rhs) +{ + return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash); +} +} + diff --git a/dbms/src/Access/Authentication.h b/dbms/src/Access/Authentication.h new file mode 100644 index 00000000000..d8fae6e03eb --- /dev/null +++ b/dbms/src/Access/Authentication.h @@ -0,0 +1,66 @@ +#pragma once + +#include + + +namespace DB +{ +/// Authentication type and encrypted password for checking when an user logins. +class Authentication +{ +public: + enum Type + { + /// User doesn't have to enter password. + NO_PASSWORD, + + /// Password is stored as is. + PLAINTEXT_PASSWORD, + + /// Password is encrypted in SHA256 hash. + SHA256_PASSWORD, + + /// SHA1(SHA1(password)). + /// This kind of hash is used by the `mysql_native_password` authentication plugin. + DOUBLE_SHA1_PASSWORD, + }; + + using Digest = std::vector; + + Authentication(Authentication::Type type = NO_PASSWORD); + Authentication(const Authentication & src) = default; + Authentication & operator =(const Authentication & src) = default; + Authentication(Authentication && src) = default; + Authentication & operator =(Authentication && src) = default; + + Type getType() const { return type; } + + /// Sets the password and encrypt it using the authentication type set in the constructor. + void setPassword(const String & password); + + /// Returns the password. Allowed to use only for Type::PLAINTEXT_PASSWORD. + String getPassword() const; + + /// Sets the password as a string of hexadecimal digits. + void setPasswordHashHex(const String & hash); + String getPasswordHashHex() const; + + /// Sets the password in binary form. + void setPasswordHashBinary(const Digest & hash); + const Digest & getPasswordHashBinary() const { return password_hash; } + + /// Checks if the provided password is correct. Returns false if not. + bool isCorrectPassword(const String & password) const; + + /// Checks if the provided password is correct. Throws an exception if not. + /// `user_name` is only used for generating an error message if the password is incorrect. + void checkPassword(const String & password, const String & user_name = String()) const; + + friend bool operator ==(const Authentication & lhs, const Authentication & rhs); + friend bool operator !=(const Authentication & lhs, const Authentication & rhs) { return !(lhs == rhs); } + +private: + Type type = Type::NO_PASSWORD; + Digest password_hash; +}; +} diff --git a/dbms/src/Access/CMakeLists.txt b/dbms/src/Access/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/src/AggregateFunctions/AggregateFunctionCount.cpp b/dbms/src/AggregateFunctions/AggregateFunctionCount.cpp index 02dc796a4cf..f650a178808 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -12,8 +12,8 @@ namespace AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, const DataTypes & argument_types, const Array & parameters) { assertNoParameters(name, parameters); + assertArityAtMost<1>(name, argument_types); - /// 'count' accept any number of arguments and (in this case of non-Nullable types) simply ignore them. return std::make_shared(argument_types); } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionCount.h b/dbms/src/AggregateFunctions/AggregateFunctionCount.h index 6cd9c0c7487..c1691e665b3 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionCount.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionCount.h @@ -113,69 +113,4 @@ public: const char * getHeaderFilePath() const override { return __FILE__; } }; - -/// Count number of calls where all arguments are not NULL. -class AggregateFunctionCountNotNullVariadic final : public IAggregateFunctionDataHelper -{ -public: - AggregateFunctionCountNotNullVariadic(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper(arguments, params) - { - number_of_arguments = arguments.size(); - - if (number_of_arguments == 1) - throw Exception("Logical error: single argument is passed to AggregateFunctionCountNotNullVariadic", ErrorCodes::LOGICAL_ERROR); - - if (number_of_arguments > MAX_ARGS) - throw Exception("Maximum number of arguments for aggregate function with Nullable types is " + toString(size_t(MAX_ARGS)), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - for (size_t i = 0; i < number_of_arguments; ++i) - is_nullable[i] = arguments[i]->isNullable(); - } - - String getName() const override { return "count"; } - - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override - { - for (size_t i = 0; i < number_of_arguments; ++i) - if (is_nullable[i] && assert_cast(*columns[i]).isNullAt(row_num)) - return; - - ++data(place).count; - } - - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override - { - data(place).count += data(rhs).count; - } - - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override - { - writeVarUInt(data(place).count, buf); - } - - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override - { - readVarUInt(data(place).count, buf); - } - - void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override - { - assert_cast(to).getData().push_back(data(place).count); - } - - const char * getHeaderFilePath() const override { return __FILE__; } - -private: - enum { MAX_ARGS = 8 }; - size_t number_of_arguments = 0; - std::array is_nullable; /// Plain array is better than std::vector due to one indirection less. -}; - } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp index 6eb2c0299da..a420ff92f16 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp @@ -1,9 +1,12 @@ #include -#include #include #include #include +// TODO include this last because of a broken roaring header. See the comment +// inside. +#include + namespace DB { @@ -36,15 +39,13 @@ AggregateFunctionPtr createAggregateFunctionBitmapL2(const std::string & name, c assertUnary(name, argument_types); DataTypePtr argument_type_ptr = argument_types[0]; WhichDataType which(*argument_type_ptr); - if (which.idx == TypeIndex::AggregateFunction) - { - const DataTypeAggregateFunction& datatype_aggfunc = dynamic_cast(*argument_type_ptr); - AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction(); - argument_type_ptr = aggfunc->getArgumentTypes()[0]; - } + if (which.idx != TypeIndex::AggregateFunction) + throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const DataTypeAggregateFunction& datatype_aggfunc = dynamic_cast(*argument_type_ptr); + AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction(); + argument_type_ptr = aggfunc->getArgumentTypes()[0]; AggregateFunctionPtr res(createWithUnsignedIntegerType(*argument_type_ptr, argument_type_ptr)); - if (!res) throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h index f71ebd53922..65a450bfbaf 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h @@ -3,10 +3,13 @@ #include #include #include -#include #include #include +// TODO include this last because of a broken roaring header. See the comment +// inside. +#include + namespace DB { @@ -71,7 +74,7 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { Data & data_lhs = this->data(place); - const Data & data_rhs = this->data(static_cast(*columns[0]).getData()[row_num]); + const Data & data_rhs = this->data(assert_cast(*columns[0]).getData()[row_num]); if (!data_lhs.doneFirst) { data_lhs.doneFirst = true; @@ -110,7 +113,7 @@ public: void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - static_cast &>(to).getData().push_back(this->data(place).rbs.size()); + assert_cast &>(to).getData().push_back(this->data(place).rbs.size()); } const char * getHeaderFilePath() const override { return __FILE__; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index e4cf36f5cb4..5ee9df1625c 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -1,14 +1,18 @@ #pragma once #include -#include #include #include #include -#include #include #include +// Include this header last, because it is an auto-generated dump of questionable +// garbage that breaks the build (e.g. it changes _POSIX_C_SOURCE). +// TODO: find out what it is. On github, they have proper inteface headers like +// this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h +#include + namespace DB { /** diff --git a/dbms/src/AggregateFunctions/AggregateFunctionNull.cpp b/dbms/src/AggregateFunctions/AggregateFunctionNull.cpp index 7011ebbde09..a1cba5519e7 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -53,12 +53,7 @@ public: /// Special case for 'count' function. It could be called with Nullable arguments /// - that means - count number of calls, when all arguments are not NULL. if (nested_function && nested_function->getName() == "count") - { - if (arguments.size() == 1) - return std::make_shared(arguments[0], params); - else - return std::make_shared(arguments, params); - } + return std::make_shared(arguments[0], params); if (has_null_types) return std::make_shared(arguments, params); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionOrFill.cpp b/dbms/src/AggregateFunctions/AggregateFunctionOrFill.cpp new file mode 100644 index 00000000000..24624415080 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionOrFill.cpp @@ -0,0 +1,39 @@ +#include + +#include + + +namespace DB +{ + +template +class AggregateFunctionCombinatorOrFill final : public IAggregateFunctionCombinator +{ +public: + String getName() const override + { + if constexpr (UseNull) + return "OrNull"; + else + return "OrDefault"; + } + + AggregateFunctionPtr transformAggregateFunction( + const AggregateFunctionPtr & nested_function, + const DataTypes & arguments, + const Array & params) const override + { + return std::make_shared>( + nested_function, + arguments, + params); + } +}; + +void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactory & factory) +{ + factory.registerCombinator(std::make_shared>()); + factory.registerCombinator(std::make_shared>()); +} + +} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionOrFill.h b/dbms/src/AggregateFunctions/AggregateFunctionOrFill.h new file mode 100644 index 00000000000..39cf3f96488 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionOrFill.h @@ -0,0 +1,179 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; +} + +/** + * -OrDefault and -OrNull combinators for aggregate functions. + * If there are no input values, return NULL or a default value, accordingly. + * Use a single additional byte of data after the nested function data: + * 0 means there was no input, 1 means there was some. + */ +template +class AggregateFunctionOrFill final : public IAggregateFunctionHelper> +{ +private: + AggregateFunctionPtr nested_function; + + size_t size_of_data; + DataTypePtr inner_type; + bool inner_nullable; + +public: + AggregateFunctionOrFill(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) + : IAggregateFunctionHelper{arguments, params} + , nested_function{nested_function_} + , size_of_data {nested_function->sizeOfData()} + , inner_type {nested_function->getReturnType()} + , inner_nullable {inner_type->isNullable()} + { + // nothing + } + + String getName() const override + { + if constexpr (UseNull) + return nested_function->getName() + "OrNull"; + else + return nested_function->getName() + "OrDefault"; + } + + const char * getHeaderFilePath() const override + { + return __FILE__; + } + + bool isState() const override + { + return nested_function->isState(); + } + + bool allocatesMemoryInArena() const override + { + return nested_function->allocatesMemoryInArena(); + } + + bool hasTrivialDestructor() const override + { + return nested_function->hasTrivialDestructor(); + } + + size_t sizeOfData() const override + { + return size_of_data + sizeof(char); + } + + size_t alignOfData() const override + { + return nested_function->alignOfData(); + } + + void create(AggregateDataPtr place) const override + { + nested_function->create(place); + + place[size_of_data] = 0; + } + + void destroy(AggregateDataPtr place) const noexcept override + { + nested_function->destroy(place); + } + + void add( + AggregateDataPtr place, + const IColumn ** columns, + size_t row_num, + Arena * arena) const override + { + nested_function->add(place, columns, row_num, arena); + + place[size_of_data] = 1; + } + + void merge( + AggregateDataPtr place, + ConstAggregateDataPtr rhs, + Arena * arena) const override + { + nested_function->merge(place, rhs, arena); + } + + void serialize( + ConstAggregateDataPtr place, + WriteBuffer & buf) const override + { + nested_function->serialize(place, buf); + } + + void deserialize( + AggregateDataPtr place, + ReadBuffer & buf, + Arena * arena) const override + { + nested_function->deserialize(place, buf, arena); + } + + DataTypePtr getReturnType() const override + { + if constexpr (UseNull) + { + // -OrNull + + if (inner_nullable) + return inner_type; + + return std::make_shared(inner_type); + } + else + { + // -OrDefault + + return inner_type; + } + } + + void insertResultInto( + ConstAggregateDataPtr place, + IColumn & to) const override + { + if (place[size_of_data]) + { + if constexpr (UseNull) + { + // -OrNull + + if (inner_nullable) + nested_function->insertResultInto(place, to); + else + { + ColumnNullable & col = typeid_cast(to); + + col.getNullMapColumn().insertDefault(); + nested_function->insertResultInto(place, col.getNestedColumn()); + } + } + else + { + // -OrDefault + + nested_function->insertResultInto(place, to); + } + } + else + to.insertDefault(); + } +}; + +} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionResample.h b/dbms/src/AggregateFunctions/AggregateFunctionResample.h index 39f1e8a35c4..3864de7db4f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionResample.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionResample.h @@ -29,8 +29,8 @@ private: size_t step; size_t total; - size_t aod; - size_t sod; + size_t align_of_data; + size_t size_of_data; public: AggregateFunctionResample( @@ -47,8 +47,8 @@ public: , end{end_} , step{step_} , total{0} - , aod{nested_function->alignOfData()} - , sod{(nested_function->sizeOfData() + aod - 1) / aod * aod} + , align_of_data{nested_function->alignOfData()} + , size_of_data{(nested_function->sizeOfData() + align_of_data - 1) / align_of_data * align_of_data} { // notice: argument types has been checked before if (step == 0) @@ -94,24 +94,24 @@ public: size_t sizeOfData() const override { - return total * sod; + return total * size_of_data; } size_t alignOfData() const override { - return aod; + return align_of_data; } void create(AggregateDataPtr place) const override { for (size_t i = 0; i < total; ++i) - nested_function->create(place + i * sod); + nested_function->create(place + i * size_of_data); } void destroy(AggregateDataPtr place) const noexcept override { for (size_t i = 0; i < total; ++i) - nested_function->destroy(place + i * sod); + nested_function->destroy(place + i * size_of_data); } void add( @@ -132,7 +132,7 @@ public: size_t pos = (key - begin) / step; - nested_function->add(place + pos * sod, columns, row_num, arena); + nested_function->add(place + pos * size_of_data, columns, row_num, arena); } void merge( @@ -141,7 +141,7 @@ public: Arena * arena) const override { for (size_t i = 0; i < total; ++i) - nested_function->merge(place + i * sod, rhs + i * sod, arena); + nested_function->merge(place + i * size_of_data, rhs + i * size_of_data, arena); } void serialize( @@ -149,7 +149,7 @@ public: WriteBuffer & buf) const override { for (size_t i = 0; i < total; ++i) - nested_function->serialize(place + i * sod, buf); + nested_function->serialize(place + i * size_of_data, buf); } void deserialize( @@ -158,7 +158,7 @@ public: Arena * arena) const override { for (size_t i = 0; i < total; ++i) - nested_function->deserialize(place + i * sod, buf, arena); + nested_function->deserialize(place + i * size_of_data, buf, arena); } DataTypePtr getReturnType() const override @@ -174,7 +174,7 @@ public: auto & col_offsets = assert_cast(col.getOffsetsColumn()); for (size_t i = 0; i < total; ++i) - nested_function->insertResultInto(place + i * sod, col.getData()); + nested_function->insertResultInto(place + i * size_of_data, col.getData()); col_offsets.getData().push_back(col.getData().size()); } diff --git a/dbms/src/AggregateFunctions/FactoryHelpers.h b/dbms/src/AggregateFunctions/FactoryHelpers.h index 77e06fd4880..183116df54e 100644 --- a/dbms/src/AggregateFunctions/FactoryHelpers.h +++ b/dbms/src/AggregateFunctions/FactoryHelpers.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -31,4 +32,22 @@ inline void assertBinary(const std::string & name, const DataTypes & argument_ty throw Exception("Aggregate function " + name + " require two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } +template +inline void assertArityAtMost(const std::string & name, const DataTypes & argument_types) +{ + if (argument_types.size() <= maximal_arity) + return; + + if constexpr (maximal_arity == 0) + throw Exception("Aggregate function " + name + " cannot have arguments", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if constexpr (maximal_arity == 1) + throw Exception("Aggregate function " + name + " requires zero or one argument", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + throw Exception("Aggregate function " + name + " requires at most " + toString(maximal_arity) + " arguments", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); +} + } diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp index cc03965715c..4d1b22dcba3 100644 --- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -42,6 +42,7 @@ void registerAggregateFunctionCombinatorForEach(AggregateFunctionCombinatorFacto void registerAggregateFunctionCombinatorState(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorMerge(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory &); +void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory &); void registerAggregateFunctions() @@ -88,6 +89,7 @@ void registerAggregateFunctions() registerAggregateFunctionCombinatorState(factory); registerAggregateFunctionCombinatorMerge(factory); registerAggregateFunctionCombinatorNull(factory); + registerAggregateFunctionCombinatorOrFill(factory); registerAggregateFunctionCombinatorResample(factory); } } diff --git a/dbms/src/CMakeLists.txt b/dbms/src/CMakeLists.txt index 84755f7f280..591fcd784b3 100644 --- a/dbms/src/CMakeLists.txt +++ b/dbms/src/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory (Access) add_subdirectory (Columns) add_subdirectory (Common) add_subdirectory (Core) diff --git a/dbms/src/Client/ConnectionPoolWithFailover.cpp b/dbms/src/Client/ConnectionPoolWithFailover.cpp index 9256b9e9fa3..a353fda12ad 100644 --- a/dbms/src/Client/ConnectionPoolWithFailover.cpp +++ b/dbms/src/Client/ConnectionPoolWithFailover.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/dbms/src/Columns/ColumnAggregateFunction.h b/dbms/src/Columns/ColumnAggregateFunction.h index 5c9be660754..af1825dca87 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.h +++ b/dbms/src/Columns/ColumnAggregateFunction.h @@ -3,6 +3,7 @@ #include #include +#include #include diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 7d1ff63a62e..ab414f4917c 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/dbms/src/Columns/ColumnConst.cpp b/dbms/src/Columns/ColumnConst.cpp index 91036499871..4324d439fb0 100644 --- a/dbms/src/Columns/ColumnConst.cpp +++ b/dbms/src/Columns/ColumnConst.cpp @@ -2,6 +2,7 @@ #include #include +#include #include diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index a8cb365ab67..ad9d00661a0 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index ef25a036f60..91f0e92c0a9 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/dbms/src/Columns/ColumnFunction.cpp b/dbms/src/Columns/ColumnFunction.cpp index 75fe22446f4..96434c7a0af 100644 --- a/dbms/src/Columns/ColumnFunction.cpp +++ b/dbms/src/Columns/ColumnFunction.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index 452b77bb7e2..b1fa32fd6db 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index da4eb055e36..7c686f79767 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/dbms/src/Columns/ColumnTuple.cpp b/dbms/src/Columns/ColumnTuple.cpp index 4c5fe54b3d6..d7aacec8d09 100644 --- a/dbms/src/Columns/ColumnTuple.cpp +++ b/dbms/src/Columns/ColumnTuple.cpp @@ -94,16 +94,17 @@ MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const Field ColumnTuple::operator[](size_t n) const { - return Tuple{ext::map(columns, [n] (const auto & column) { return (*column)[n]; })}; + return ext::map(columns, [n] (const auto & column) { return (*column)[n]; }); } void ColumnTuple::get(size_t n, Field & res) const { const size_t tuple_size = columns.size(); - res = Tuple(TupleBackend(tuple_size)); - TupleBackend & res_arr = DB::get(res).toUnderType(); + Tuple tuple(tuple_size); for (const auto i : ext::range(0, tuple_size)) - columns[i]->get(n, res_arr[i]); + columns[i]->get(n, tuple[i]); + + res = tuple; } StringRef ColumnTuple::getDataAt(size_t) const @@ -118,7 +119,7 @@ void ColumnTuple::insertData(const char *, size_t) void ColumnTuple::insert(const Field & x) { - const TupleBackend & tuple = DB::get(x).toUnderType(); + auto & tuple = DB::get(x); const size_t tuple_size = columns.size(); if (tuple.size() != tuple_size) @@ -352,14 +353,14 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const { const size_t tuple_size = columns.size(); - min = Tuple(TupleBackend(tuple_size)); - max = Tuple(TupleBackend(tuple_size)); - - auto & min_backend = min.get().toUnderType(); - auto & max_backend = max.get().toUnderType(); + Tuple min_tuple(tuple_size); + Tuple max_tuple(tuple_size); for (const auto i : ext::range(0, tuple_size)) - columns[i]->getExtremes(min_backend[i], max_backend[i]); + columns[i]->getExtremes(min_tuple[i], max_tuple[i]); + + min = min_tuple; + max = max_tuple; } void ColumnTuple::forEachSubcolumn(ColumnCallback callback) diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index c97ddc2f8ac..28307cb33f0 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index d57fd228a17..2b340a84783 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -1,10 +1,11 @@ #pragma once #include -#include +#include #include #include #include +#include class SipHash; @@ -373,32 +374,7 @@ protected: /// Template is to devirtualize calls to insertFrom method. /// In derived classes (that use final keyword), implement scatter method as call to scatterImpl. template - std::vector scatterImpl(ColumnIndex num_columns, const Selector & selector) const - { - size_t num_rows = size(); - - if (num_rows != selector.size()) - throw Exception( - "Size of selector: " + std::to_string(selector.size()) + " doesn't match size of column: " + std::to_string(num_rows), - ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - - std::vector columns(num_columns); - for (auto & column : columns) - column = cloneEmpty(); - - { - size_t reserve_size = num_rows * 1.1 / num_columns; /// 1.1 is just a guess. Better to use n-sigma rule. - - if (reserve_size > 1) - for (auto & column : columns) - column->reserve(reserve_size); - } - - for (size_t i = 0; i < num_rows; ++i) - static_cast(*columns[selector[i]]).insertFrom(*this, i); - - return columns; - } + std::vector scatterImpl(ColumnIndex num_columns, const Selector & selector) const; }; using ColumnPtr = IColumn::Ptr; diff --git a/dbms/src/Columns/IColumnDummy.h b/dbms/src/Columns/IColumnDummy.h index beb0d101122..0b56647d5a2 100644 --- a/dbms/src/Columns/IColumnDummy.h +++ b/dbms/src/Columns/IColumnDummy.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include diff --git a/dbms/src/Columns/IColumnImpl.h b/dbms/src/Columns/IColumnImpl.h new file mode 100644 index 00000000000..1690d8dd61a --- /dev/null +++ b/dbms/src/Columns/IColumnImpl.h @@ -0,0 +1,45 @@ +/** + * This file implements template methods of IColumn that depend on other types + * we don't want to include. + * Currently, this is only the scatterImpl method that depends on PODArray + * implementation. + */ + +#pragma once + +#include +#include + +namespace DB +{ + +template +std::vector IColumn::scatterImpl(ColumnIndex num_columns, + const Selector & selector) const +{ + size_t num_rows = size(); + + if (num_rows != selector.size()) + throw Exception( + "Size of selector: " + std::to_string(selector.size()) + " doesn't match size of column: " + std::to_string(num_rows), + ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + std::vector columns(num_columns); + for (auto & column : columns) + column = cloneEmpty(); + + { + size_t reserve_size = num_rows * 1.1 / num_columns; /// 1.1 is just a guess. Better to use n-sigma rule. + + if (reserve_size > 1) + for (auto & column : columns) + column->reserve(reserve_size); + } + + for (size_t i = 0; i < num_rows; ++i) + static_cast(*columns[selector[i]]).insertFrom(*this, i); + + return columns; +} + +} diff --git a/dbms/src/Common/Allocator.h b/dbms/src/Common/Allocator.h index 0a6a8fc202f..5d39d327243 100644 --- a/dbms/src/Common/Allocator.h +++ b/dbms/src/Common/Allocator.h @@ -30,6 +30,8 @@ #include #include +#include + /// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS #ifndef MAP_ANONYMOUS @@ -84,7 +86,7 @@ namespace ErrorCodes * - random hint address for mmap * - mmap_threshold for using mmap less or more */ -template +template class Allocator { public: @@ -270,7 +272,7 @@ private: /** Allocator with optimization to place small memory ranges in automatic memory. */ -template +template class AllocatorWithStackMemory : private Base { private: diff --git a/dbms/src/Common/Allocator_fwd.h b/dbms/src/Common/Allocator_fwd.h new file mode 100644 index 00000000000..8ee4d4d7028 --- /dev/null +++ b/dbms/src/Common/Allocator_fwd.h @@ -0,0 +1,10 @@ +/** + * This file provides forward declarations for Allocator. + */ +#pragma once + +template +class Allocator; + +template +class AllocatorWithStackMemory; diff --git a/dbms/src/Common/DiskSpaceMonitor.cpp b/dbms/src/Common/DiskSpaceMonitor.cpp index 967aa34ee40..00a146a809e 100644 --- a/dbms/src/Common/DiskSpaceMonitor.cpp +++ b/dbms/src/Common/DiskSpaceMonitor.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include diff --git a/dbms/src/Common/FieldVisitors.cpp b/dbms/src/Common/FieldVisitors.cpp index c5ce10c0db4..8380061209a 100644 --- a/dbms/src/Common/FieldVisitors.cpp +++ b/dbms/src/Common/FieldVisitors.cpp @@ -72,9 +72,8 @@ String FieldVisitorDump::operator() (const Array & x) const return wb.str(); } -String FieldVisitorDump::operator() (const Tuple & x_def) const +String FieldVisitorDump::operator() (const Tuple & x) const { - auto & x = x_def.toUnderType(); WriteBufferFromOwnString wb; wb << "Tuple_("; @@ -149,9 +148,8 @@ String FieldVisitorToString::operator() (const Array & x) const return wb.str(); } -String FieldVisitorToString::operator() (const Tuple & x_def) const +String FieldVisitorToString::operator() (const Tuple & x) const { - auto & x = x_def.toUnderType(); WriteBufferFromOwnString wb; wb << '('; @@ -211,6 +209,16 @@ void FieldVisitorHash::operator() (const String & x) const hash.update(x.data(), x.size()); } +void FieldVisitorHash::operator() (const Tuple & x) const +{ + UInt8 type = Field::Types::Tuple; + hash.update(type); + hash.update(x.size()); + + for (const auto & elem : x) + applyVisitor(*this, elem); +} + void FieldVisitorHash::operator() (const Array & x) const { UInt8 type = Field::Types::Array; diff --git a/dbms/src/Common/FieldVisitors.h b/dbms/src/Common/FieldVisitors.h index 56d3c84decc..a1de23d5820 100644 --- a/dbms/src/Common/FieldVisitors.h +++ b/dbms/src/Common/FieldVisitors.h @@ -231,6 +231,7 @@ public: void operator() (const Float64 & x) const; void operator() (const String & x) const; void operator() (const Array & x) const; + void operator() (const Tuple & x) const; void operator() (const DecimalField & x) const; void operator() (const DecimalField & x) const; void operator() (const DecimalField & x) const; @@ -479,6 +480,7 @@ public: bool operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); } bool operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); } + bool operator() (Tuple &) const { throw Exception("Cannot sum Tuples", ErrorCodes::LOGICAL_ERROR); } bool operator() (UInt128 &) const { throw Exception("Cannot sum UUIDs", ErrorCodes::LOGICAL_ERROR); } bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Common/MemorySanitizer.h b/dbms/src/Common/MemorySanitizer.h index a3ca5a44cb5..6ece85901a8 100644 --- a/dbms/src/Common/MemorySanitizer.h +++ b/dbms/src/Common/MemorySanitizer.h @@ -1,5 +1,10 @@ #pragma once +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-id-macro" +#endif + #define __msan_unpoison(X, Y) #define __msan_test_shadow(X, Y) (false) #define __msan_print_shadow(X, Y) @@ -11,3 +16,7 @@ # include # endif #endif + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/dbms/src/Common/PODArray.h b/dbms/src/Common/PODArray.h index 523927ce40a..def8f675c25 100644 --- a/dbms/src/Common/PODArray.h +++ b/dbms/src/Common/PODArray.h @@ -21,6 +21,8 @@ #include #endif +#include + namespace DB { @@ -30,11 +32,6 @@ namespace ErrorCodes extern const int CANNOT_MPROTECT; } -inline constexpr size_t integerRoundUp(size_t value, size_t dividend) -{ - return ((value + dividend - 1) / dividend) * dividend; -} - /** A dynamic array for POD types. * Designed for a small number of large arrays (rather than a lot of small ones). * To be more precise - for use in ColumnVector. @@ -258,7 +255,7 @@ public: } }; -template , size_t pad_right_ = 0, size_t pad_left_ = 0> +template class PODArray : public PODArrayBase { protected: @@ -625,17 +622,5 @@ void swap(PODArray & lhs, PODArray> -using PaddedPODArray = PODArray; - -/** A helper for declaring PODArray that uses inline memory. - * The initial size is set to use all the inline bytes, since using less would - * only add some extra allocation calls. - */ -template -using PODArrayWithStackMemory = PODArray, rounded_bytes, alignof(T)>>; } diff --git a/dbms/src/Common/PODArray_fwd.h b/dbms/src/Common/PODArray_fwd.h new file mode 100644 index 00000000000..8d3f0bc273c --- /dev/null +++ b/dbms/src/Common/PODArray_fwd.h @@ -0,0 +1,35 @@ +/** + * This file contains some using-declarations that define various kinds of + * PODArray. + */ +#pragma once + +#include + +namespace DB +{ + +inline constexpr size_t integerRoundUp(size_t value, size_t dividend) +{ + return ((value + dividend - 1) / dividend) * dividend; +} + +template , size_t pad_right_ = 0, + size_t pad_left_ = 0> +class PODArray; + +/** For columns. Padding is enough to read and write xmm-register at the address of the last element. */ +template > +using PaddedPODArray = PODArray; + +/** A helper for declaring PODArray that uses inline memory. + * The initial size is set to use all the inline bytes, since using less would + * only add some extra allocation calls. + */ +template +using PODArrayWithStackMemory = PODArray, rounded_bytes, alignof(T)>>; + +} diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp index b88106fab73..c1cad465ed2 100644 --- a/dbms/src/Common/ThreadPool.cpp +++ b/dbms/src/Common/ThreadPool.cpp @@ -121,13 +121,13 @@ ReturnType ThreadPoolImpl::scheduleImpl(Job job, int priority, std::opti } template -void ThreadPoolImpl::schedule(Job job, int priority) +void ThreadPoolImpl::scheduleOrThrowOnError(Job job, int priority) { scheduleImpl(std::move(job), priority, std::nullopt); } template -bool ThreadPoolImpl::trySchedule(Job job, int priority, uint64_t wait_microseconds) +bool ThreadPoolImpl::trySchedule(Job job, int priority, uint64_t wait_microseconds) noexcept { return scheduleImpl(std::move(job), priority, wait_microseconds); } diff --git a/dbms/src/Common/ThreadPool.h b/dbms/src/Common/ThreadPool.h index 2ced4626a1b..1b3071f732c 100644 --- a/dbms/src/Common/ThreadPool.h +++ b/dbms/src/Common/ThreadPool.h @@ -36,18 +36,23 @@ public: ThreadPoolImpl(size_t max_threads_, size_t max_free_threads_, size_t queue_size_); /// Add new job. Locks until number of scheduled jobs is less than maximum or exception in one of threads was thrown. - /// If an exception in some thread was thrown, method silently returns, and exception will be rethrown only on call to 'wait' function. + /// If any thread was throw an exception, first exception will be rethrown from this method, + /// and exception will be cleared. + /// Also throws an exception if cannot create thread. /// Priority: greater is higher. - void schedule(Job job, int priority = 0); + /// NOTE: Probably you should call wait() if exception was thrown. If some previously scheduled jobs are using some objects, + /// located on stack of current thread, the stack must not be unwinded until all jobs finished. However, + /// if ThreadPool is a local object, it will wait for all scheduled jobs in own destructor. + void scheduleOrThrowOnError(Job job, int priority = 0); - /// Wait for specified amount of time and schedule a job or return false. - bool trySchedule(Job job, int priority = 0, uint64_t wait_microseconds = 0); + /// Similar to scheduleOrThrowOnError(...). Wait for specified amount of time and schedule a job or return false. + bool trySchedule(Job job, int priority = 0, uint64_t wait_microseconds = 0) noexcept; - /// Wait for specified amount of time and schedule a job or throw an exception. + /// Similar to scheduleOrThrowOnError(...). Wait for specified amount of time and schedule a job or throw an exception. void scheduleOrThrow(Job job, int priority = 0, uint64_t wait_microseconds = 0); /// Wait for all currently active jobs to be done. - /// You may call schedule and wait many times in arbitary order. + /// You may call schedule and wait many times in arbitrary order. /// If any thread was throw an exception, first exception will be rethrown from this method, /// and exception will be cleared. void wait(); @@ -140,7 +145,7 @@ public: explicit ThreadFromGlobalPool(Function && func, Args &&... args) : state(std::make_shared()) { - /// NOTE: If this will throw an exception, the descructor won't be called. + /// NOTE: If this will throw an exception, the destructor won't be called. GlobalThreadPool::instance().scheduleOrThrow([ state = state, func = std::forward(func), diff --git a/dbms/src/Common/formatReadable.h b/dbms/src/Common/formatReadable.h index 0252c5b3282..87d24b75c20 100644 --- a/dbms/src/Common/formatReadable.h +++ b/dbms/src/Common/formatReadable.h @@ -1,8 +1,13 @@ #pragma once #include -#include +namespace DB +{ + +class WriteBuffer; + +} /// Displays the passed size in bytes as 123.45 GiB. void formatReadableSizeWithBinarySuffix(double value, DB::WriteBuffer & out, int precision = 2); diff --git a/dbms/src/Common/getMultipleKeysFromConfig.cpp b/dbms/src/Common/getMultipleKeysFromConfig.cpp index fa50d82adab..ecad5cc45eb 100644 --- a/dbms/src/Common/getMultipleKeysFromConfig.cpp +++ b/dbms/src/Common/getMultipleKeysFromConfig.cpp @@ -24,7 +24,7 @@ std::vector getMultipleValuesFromConfig(const Poco::Util::AbstractC { std::vector values; for (const auto & key : DB::getMultipleKeysFromConfig(config, root, name)) - values.emplace_back(config.getString(key)); + values.emplace_back(config.getString(root.empty() ? key : root + "." + key)); return values; } diff --git a/dbms/src/Common/quoteString.cpp b/dbms/src/Common/quoteString.cpp new file mode 100644 index 00000000000..bcc6906ddfa --- /dev/null +++ b/dbms/src/Common/quoteString.cpp @@ -0,0 +1,37 @@ +#include +#include +#include + + +namespace DB +{ +String quoteString(const StringRef & x) +{ + String res(x.size, '\0'); + WriteBufferFromString wb(res); + writeQuotedString(x, wb); + return res; +} + + +String backQuote(const StringRef & x) +{ + String res(x.size, '\0'); + { + WriteBufferFromString wb(res); + writeBackQuotedString(x, wb); + } + return res; +} + + +String backQuoteIfNeed(const StringRef & x) +{ + String res(x.size, '\0'); + { + WriteBufferFromString wb(res); + writeProbablyBackQuotedString(x, wb); + } + return res; +} +} diff --git a/dbms/src/Common/quoteString.h b/dbms/src/Common/quoteString.h new file mode 100644 index 00000000000..f17f6c7015d --- /dev/null +++ b/dbms/src/Common/quoteString.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + + +namespace DB +{ +/// Quote the string. +String quoteString(const StringRef & x); + +/// Quote the identifier with backquotes. +String backQuote(const StringRef & x); + +/// Quote the identifier with backquotes, if required. +String backQuoteIfNeed(const StringRef & x); +} diff --git a/dbms/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp b/dbms/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp new file mode 100644 index 00000000000..4756043acbf --- /dev/null +++ b/dbms/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp @@ -0,0 +1,26 @@ +#include +#include +#include + +#include + + +using namespace DB; + +TEST(Common, getMultipleValuesFromConfig) +{ + std::istringstream xml_isteam(R"END( + + + 0 + 1 + 2 + 3 + +)END"); + + Poco::AutoPtr config = new Poco::Util::XMLConfiguration(xml_isteam); + std::vector answer = getMultipleValuesFromConfig(*config, "first_level", "second_level"); + std::vector right_answer = {"0", "1", "2", "3"}; + EXPECT_EQ(answer, right_answer); +} diff --git a/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp b/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp index 213e70ce3dd..f5f14739e39 100644 --- a/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp @@ -21,14 +21,14 @@ TEST(ThreadPool, ConcurrentWait) ThreadPool pool(num_threads); for (size_t i = 0; i < num_jobs; ++i) - pool.schedule(worker); + pool.scheduleOrThrowOnError(worker); constexpr size_t num_waiting_threads = 4; ThreadPool waiting_pool(num_waiting_threads); for (size_t i = 0; i < num_waiting_threads; ++i) - waiting_pool.schedule([&pool]{ pool.wait(); }); + waiting_pool.scheduleOrThrowOnError([&pool] { pool.wait(); }); waiting_pool.wait(); } diff --git a/dbms/src/Common/tests/gtest_thread_pool_global_full.cpp b/dbms/src/Common/tests/gtest_thread_pool_global_full.cpp index 597ed60baac..583d43be1bb 100644 --- a/dbms/src/Common/tests/gtest_thread_pool_global_full.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_global_full.cpp @@ -30,11 +30,11 @@ TEST(ThreadPool, GlobalFull1) ThreadPool pool(num_jobs); for (size_t i = 0; i < capacity; ++i) - pool.schedule(func); + pool.scheduleOrThrowOnError(func); for (size_t i = capacity; i < num_jobs; ++i) { - EXPECT_THROW(pool.schedule(func), DB::Exception); + EXPECT_THROW(pool.scheduleOrThrowOnError(func), DB::Exception); ++counter; } @@ -67,10 +67,10 @@ TEST(ThreadPool, GlobalFull2) ThreadPool pool(capacity, 0, capacity); for (size_t i = 0; i < capacity; ++i) - pool.schedule(func); + pool.scheduleOrThrowOnError(func); ThreadPool another_pool(1); - EXPECT_THROW(another_pool.schedule(func), DB::Exception); + EXPECT_THROW(another_pool.scheduleOrThrowOnError(func), DB::Exception); ++counter; @@ -79,7 +79,7 @@ TEST(ThreadPool, GlobalFull2) global_pool.wait(); for (size_t i = 0; i < capacity; ++i) - another_pool.schedule([&] { ++counter; }); + another_pool.scheduleOrThrowOnError([&] { ++counter; }); another_pool.wait(); EXPECT_EQ(counter, capacity * 2 + 1); diff --git a/dbms/src/Common/tests/gtest_thread_pool_limit.cpp b/dbms/src/Common/tests/gtest_thread_pool_limit.cpp index c18ff2e38ee..bc67ffd0bc1 100644 --- a/dbms/src/Common/tests/gtest_thread_pool_limit.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_limit.cpp @@ -14,7 +14,7 @@ int test() std::atomic counter{0}; for (size_t i = 0; i < 10; ++i) - pool.schedule([&]{ ++counter; }); + pool.scheduleOrThrowOnError([&]{ ++counter; }); pool.wait(); return counter; diff --git a/dbms/src/Common/tests/gtest_thread_pool_loop.cpp b/dbms/src/Common/tests/gtest_thread_pool_loop.cpp index 63d4114b867..15915044652 100644 --- a/dbms/src/Common/tests/gtest_thread_pool_loop.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_loop.cpp @@ -14,7 +14,7 @@ TEST(ThreadPool, Loop) size_t threads = 16; ThreadPool pool(threads); for (size_t j = 0; j < threads; ++j) - pool.schedule([&]{ ++res; }); + pool.scheduleOrThrowOnError([&] { ++res; }); pool.wait(); } diff --git a/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp b/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp index 52091a1ea7f..373c9421e94 100644 --- a/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp @@ -9,12 +9,12 @@ bool check() { ThreadPool pool(10); - pool.schedule([]{ throw std::runtime_error("Hello, world!"); }); + pool.scheduleOrThrowOnError([] { throw std::runtime_error("Hello, world!"); }); try { for (size_t i = 0; i < 100; ++i) - pool.schedule([]{}); /// An exception will be rethrown from this method. + pool.scheduleOrThrowOnError([] {}); /// An exception will be rethrown from this method. } catch (const std::runtime_error &) { diff --git a/dbms/src/Common/tests/multi_version.cpp b/dbms/src/Common/tests/multi_version.cpp index 0937e597e2d..a4645a16e6e 100644 --- a/dbms/src/Common/tests/multi_version.cpp +++ b/dbms/src/Common/tests/multi_version.cpp @@ -37,8 +37,8 @@ int main(int, char **) ThreadPool tp(8); for (size_t i = 0; i < n; ++i) { - tp.schedule(std::bind(thread1, std::ref(x), std::ref(results[i]))); - tp.schedule(std::bind(thread2, std::ref(x), (rand() % 2) ? s1 : s2)); + tp.scheduleOrThrowOnError(std::bind(thread1, std::ref(x), std::ref(results[i]))); + tp.scheduleOrThrowOnError(std::bind(thread2, std::ref(x), (rand() % 2) ? s1 : s2)); } tp.wait(); diff --git a/dbms/src/Common/tests/parallel_aggregation.cpp b/dbms/src/Common/tests/parallel_aggregation.cpp index 4b3cc3006d7..7ecb054b481 100644 --- a/dbms/src/Common/tests/parallel_aggregation.cpp +++ b/dbms/src/Common/tests/parallel_aggregation.cpp @@ -284,7 +284,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate1, + pool.scheduleOrThrowOnError(std::bind(aggregate1, std::ref(maps[i]), data.begin() + (data.size() * i) / num_threads, data.begin() + (data.size() * (i + 1)) / num_threads)); @@ -338,7 +338,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate12, + pool.scheduleOrThrowOnError(std::bind(aggregate12, std::ref(maps[i]), data.begin() + (data.size() * i) / num_threads, data.begin() + (data.size() * (i + 1)) / num_threads)); @@ -397,7 +397,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate1, + pool.scheduleOrThrowOnError(std::bind(aggregate1, std::ref(maps[i]), data.begin() + (data.size() * i) / num_threads, data.begin() + (data.size() * (i + 1)) / num_threads)); @@ -473,7 +473,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate2, + pool.scheduleOrThrowOnError(std::bind(aggregate2, std::ref(maps[i]), data.begin() + (data.size() * i) / num_threads, data.begin() + (data.size() * (i + 1)) / num_threads)); @@ -499,7 +499,7 @@ int main(int argc, char ** argv) watch.restart(); for (size_t i = 0; i < MapTwoLevel::NUM_BUCKETS; ++i) - pool.schedule(std::bind(merge2, + pool.scheduleOrThrowOnError(std::bind(merge2, maps.data(), num_threads, i)); pool.wait(); @@ -527,7 +527,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate22, + pool.scheduleOrThrowOnError(std::bind(aggregate22, std::ref(maps[i]), data.begin() + (data.size() * i) / num_threads, data.begin() + (data.size() * (i + 1)) / num_threads)); @@ -553,7 +553,7 @@ int main(int argc, char ** argv) watch.restart(); for (size_t i = 0; i < MapTwoLevel::NUM_BUCKETS; ++i) - pool.schedule(std::bind(merge2, maps.data(), num_threads, i)); + pool.scheduleOrThrowOnError(std::bind(merge2, maps.data(), num_threads, i)); pool.wait(); @@ -592,7 +592,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate3, + pool.scheduleOrThrowOnError(std::bind(aggregate3, std::ref(local_maps[i]), std::ref(global_map), std::ref(mutex), @@ -658,7 +658,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate33, + pool.scheduleOrThrowOnError(std::bind(aggregate33, std::ref(local_maps[i]), std::ref(global_map), std::ref(mutex), @@ -727,7 +727,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate4, + pool.scheduleOrThrowOnError(std::bind(aggregate4, std::ref(local_maps[i]), std::ref(global_map), mutexes.data(), @@ -797,7 +797,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate5, + pool.scheduleOrThrowOnError(std::bind(aggregate5, std::ref(local_maps[i]), std::ref(global_map), data.begin() + (data.size() * i) / num_threads, @@ -860,7 +860,7 @@ int main(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < num_threads; ++i) - pool.schedule(std::bind(aggregate1, + pool.scheduleOrThrowOnError(std::bind(aggregate1, std::ref(maps[i]), data.begin() + (data.size() * i) / num_threads, data.begin() + (data.size() * (i + 1)) / num_threads)); diff --git a/dbms/src/Common/tests/parallel_aggregation2.cpp b/dbms/src/Common/tests/parallel_aggregation2.cpp index 7df230c5651..56eb34bbf0c 100644 --- a/dbms/src/Common/tests/parallel_aggregation2.cpp +++ b/dbms/src/Common/tests/parallel_aggregation2.cpp @@ -42,7 +42,7 @@ struct AggregateIndependent auto end = data.begin() + (data.size() * (i + 1)) / num_threads; auto & map = *results[i]; - pool.schedule([&, begin, end]() + pool.scheduleOrThrowOnError([&, begin, end]() { for (auto it = begin; it != end; ++it) { @@ -85,7 +85,7 @@ struct AggregateIndependentWithSequentialKeysOptimization auto end = data.begin() + (data.size() * (i + 1)) / num_threads; auto & map = *results[i]; - pool.schedule([&, begin, end]() + pool.scheduleOrThrowOnError([&, begin, end]() { typename Map::LookupResult place = nullptr; Key prev_key {}; @@ -180,7 +180,7 @@ struct MergeParallelForTwoLevelTable ThreadPool & pool) { for (size_t bucket = 0; bucket < Map::NUM_BUCKETS; ++bucket) - pool.schedule([&, bucket, num_maps] + pool.scheduleOrThrowOnError([&, bucket, num_maps] { std::vector section(num_maps); for (size_t i = 0; i < num_maps; ++i) diff --git a/dbms/src/Common/tests/thread_creation_latency.cpp b/dbms/src/Common/tests/thread_creation_latency.cpp index 120f9bca2cd..480199f211f 100644 --- a/dbms/src/Common/tests/thread_creation_latency.cpp +++ b/dbms/src/Common/tests/thread_creation_latency.cpp @@ -66,7 +66,7 @@ int main(int argc, char ** argv) test(n, "Create and destroy ThreadPool each iteration", [] { ThreadPool tp(1); - tp.schedule(f); + tp.scheduleOrThrowOnError(f); tp.wait(); }); @@ -90,7 +90,7 @@ int main(int argc, char ** argv) test(n, "Schedule job for Threadpool each iteration", [&tp] { - tp.schedule(f); + tp.scheduleOrThrowOnError(f); tp.wait(); }); } @@ -100,7 +100,7 @@ int main(int argc, char ** argv) test(n, "Schedule job for Threadpool with 128 threads each iteration", [&tp] { - tp.schedule(f); + tp.scheduleOrThrowOnError(f); tp.wait(); }); } diff --git a/dbms/src/Compression/CompressionCodecMultiple.cpp b/dbms/src/Compression/CompressionCodecMultiple.cpp index fad72c932c3..9c9ac2cac13 100644 --- a/dbms/src/Compression/CompressionCodecMultiple.cpp +++ b/dbms/src/Compression/CompressionCodecMultiple.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/dbms/src/Compression/ICompressionCodec.h b/dbms/src/Compression/ICompressionCodec.h index bc08df03d18..5f508297721 100644 --- a/dbms/src/Compression/ICompressionCodec.h +++ b/dbms/src/Compression/ICompressionCodec.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Core/Field.cpp b/dbms/src/Core/Field.cpp index a952dccc27d..9d27e33c414 100644 --- a/dbms/src/Core/Field.cpp +++ b/dbms/src/Core/Field.cpp @@ -152,9 +152,8 @@ namespace DB buf.write(res.data(), res.size()); } - void readBinary(Tuple & x_def, ReadBuffer & buf) + void readBinary(Tuple & x, ReadBuffer & buf) { - auto & x = x_def.toUnderType(); size_t size; DB::readBinary(size, buf); @@ -231,9 +230,8 @@ namespace DB } } - void writeBinary(const Tuple & x_def, WriteBuffer & buf) + void writeBinary(const Tuple & x, WriteBuffer & buf) { - auto & x = x_def.toUnderType(); const size_t size = x.size(); DB::writeBinary(size, buf); @@ -292,7 +290,12 @@ namespace DB void writeText(const Tuple & x, WriteBuffer & buf) { - DB::String res = applyVisitor(DB::FieldVisitorToString(), DB::Field(x)); + writeFieldText(DB::Field(x), buf); + } + + void writeFieldText(const Field & x, WriteBuffer & buf) + { + DB::String res = applyVisitor(DB::FieldVisitorToString(), x); buf.write(res.data(), res.size()); } diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index a5b8538932e..a35bf608e5c 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -34,9 +34,23 @@ template using NearestFieldType = typename NearestFieldTypeImpl::Type; class Field; -using Array = std::vector; -using TupleBackend = std::vector; -STRONG_TYPEDEF(TupleBackend, Tuple) /// Array and Tuple are different types with equal representation inside Field. +using FieldVector = std::vector; + +/// Array and Tuple use the same storage type -- FieldVector, but we declare +/// distinct types for them, so that the caller can choose whether it wants to +/// construct a Field of Array or a Tuple type. An alternative approach would be +/// to construct both of these types from FieldVector, and have the caller +/// specify the desired Field type explicitly. +#define DEFINE_FIELD_VECTOR(X) \ +struct X : public FieldVector \ +{ \ + using FieldVector::FieldVector; \ +} + +DEFINE_FIELD_VECTOR(Array); +DEFINE_FIELD_VECTOR(Tuple); + +#undef DEFINE_FIELD_VECTOR struct AggregateFunctionStateData { @@ -457,7 +471,6 @@ private: void createConcrete(T && x) { using UnqualifiedType = std::decay_t; - which = TypeToEnum::value; // In both Field and PODArray, small types may be stored as wider types, // e.g. char is stored as UInt64. Field can return this extended value @@ -466,6 +479,7 @@ private: // nominal type. using StorageType = NearestFieldType; new (&storage) StorageType(std::forward(x)); + which = TypeToEnum::value; } /// Assuming same types. @@ -748,5 +762,7 @@ void writeBinary(const Tuple & x, WriteBuffer & buf); void writeText(const Tuple & x, WriteBuffer & buf); +void writeFieldText(const Field & x, WriteBuffer & buf); + [[noreturn]] inline void writeQuoted(const Tuple &, WriteBuffer &) { throw Exception("Cannot write Tuple quoted.", ErrorCodes::NOT_IMPLEMENTED); } } diff --git a/dbms/src/Core/MySQLProtocol.h b/dbms/src/Core/MySQLProtocol.h index 2ac255cca34..2829e489f25 100644 --- a/dbms/src/Core/MySQLProtocol.h +++ b/dbms/src/Core/MySQLProtocol.h @@ -919,10 +919,10 @@ public: auto user = context.getUser(user_name); - if (user->password_double_sha1_hex.empty()) + if (user->authentication.getType() != DB::Authentication::DOUBLE_SHA1_PASSWORD) throw Exception("Cannot use " + getName() + " auth plugin for user " + user_name + " since its password isn't specified using double SHA1.", ErrorCodes::UNKNOWN_EXCEPTION); - Poco::SHA1Engine::Digest double_sha1_value = Poco::DigestEngine::digestFromHex(user->password_double_sha1_hex); + Poco::SHA1Engine::Digest double_sha1_value = user->authentication.getPasswordHashBinary(); assert(double_sha1_value.size() == Poco::SHA1Engine::DIGEST_SIZE); Poco::SHA1Engine engine; diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 1d2cb2e6416..30752113a6b 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -176,8 +176,9 @@ struct Settings : public SettingsCollection M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).") \ M(SettingBool, input_format_with_names_use_header, false, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.") \ M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \ - M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow format).") \ - M(SettingBool, input_format_null_as_default, false, "For CSV format initialize null fields with default values if data type of this field is not nullable") \ + M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).") \ + M(SettingBool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.") \ + M(SettingBool, input_format_null_as_default, false, "For text input formats initialize null fields with default values if data type of this field is not nullable") \ \ M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \ M(SettingBool, input_format_values_deduce_templates_of_expressions, false, "For Values format: if field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.") \ @@ -202,8 +203,8 @@ struct Settings : public SettingsCollection \ M(SettingBool, fsync_metadata, 1, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.") \ \ - M(SettingUInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if both absolute and relative values are non-zero, and at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.") \ - M(SettingFloat, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if both absolute and relative values are non-zero, and at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.") \ + M(SettingUInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.") \ + M(SettingFloat, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.") \ \ M(SettingBool, join_use_nulls, 0, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.") \ \ @@ -306,8 +307,9 @@ struct Settings : public SettingsCollection M(SettingBool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.") \ M(SettingBool, partial_merge_join, false, "Use partial merge join instead of hash join for LEFT and INNER JOINs.") \ M(SettingBool, partial_merge_join_optimizations, false, "Enable optimizations in partial merge join") \ + M(SettingUInt64, default_max_bytes_in_join, 100000000, "Maximum size of right-side table if limit's required but max_bytes_in_join is not set.") \ M(SettingUInt64, partial_merge_join_rows_in_right_blocks, 10000, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.") \ - M(SettingFloat, partial_merge_join_rows_in_left_blocks, 10000, "Group left-hand joining data in bigger blocks. Setting it to a bigger value increase JOIN performance and memory usage.") \ + M(SettingUInt64, partial_merge_join_rows_in_left_blocks, 10000, "Group left-hand joining data in bigger blocks. Setting it to a bigger value increase JOIN performance and memory usage.") \ \ M(SettingUInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.") \ M(SettingUInt64, max_bytes_to_transfer, 0, "Maximum size (in uncompressed bytes) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.") \ @@ -344,7 +346,6 @@ struct Settings : public SettingsCollection \ M(SettingBool, prefer_localhost_replica, 1, "1 - always send query to local replica, if it exists. 0 - choose replica to send query between local and remote ones according to load_balancing") \ M(SettingUInt64, max_fetch_partition_retries_count, 5, "Amount of retries while fetching partition from another host.") \ - M(SettingBool, asterisk_left_columns_only, 0, "If it is set to true, the asterisk only return left of join query.") \ M(SettingUInt64, http_max_multipart_form_data_size, 1024 * 1024 * 1024, "Limit on size of multipart/form-data content. This setting cannot be parsed from URL parameters and should be set in user profile. Note that content is parsed and external tables are created in memory before start of query execution. And this is the only limit that has effect on that stage (limits on max memory usage and max execution time have no effect while reading HTTP form data).") \ M(SettingBool, calculate_text_stack_trace, 1, "Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when huge amount of wrong queries are executed. In normal cases you should not disable this option.") \ M(SettingBool, allow_ddl, true, "If it is set to true, then a user is allowed to executed DDL queries.") \ diff --git a/dbms/src/DataStreams/AsynchronousBlockInputStream.cpp b/dbms/src/DataStreams/AsynchronousBlockInputStream.cpp index 417df4c53b1..2aa70abd692 100644 --- a/dbms/src/DataStreams/AsynchronousBlockInputStream.cpp +++ b/dbms/src/DataStreams/AsynchronousBlockInputStream.cpp @@ -36,7 +36,7 @@ void AsynchronousBlockInputStream::next() { ready.reset(); - pool.schedule([this, thread_group = CurrentThread::getGroup()] () + pool.scheduleOrThrowOnError([this, thread_group = CurrentThread::getGroup()]() { CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; diff --git a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 82cde69ca4e..f771a5cf20c 100644 --- a/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include diff --git a/dbms/src/DataStreams/CheckNonEmptySetBlockInputStream.cpp b/dbms/src/DataStreams/CheckNonEmptySetBlockInputStream.cpp deleted file mode 100644 index 3255ba19aef..00000000000 --- a/dbms/src/DataStreams/CheckNonEmptySetBlockInputStream.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -CheckNonEmptySetBlockInputStream::CheckNonEmptySetBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, const NameSet sets_) - : expression(expression_), sets(sets_) -{ - children.push_back(input); - cached_header = children.back()->getHeader(); -} - - -String CheckNonEmptySetBlockInputStream::getName() const { return "CheckNonEmptySet"; } - - -Block CheckNonEmptySetBlockInputStream::getTotals() -{ - return children.back()->getTotals(); -} - - -Block CheckNonEmptySetBlockInputStream::getHeader() const -{ - return cached_header.cloneEmpty(); -} - - -Block CheckNonEmptySetBlockInputStream::readImpl() -{ - if (!initialized) - { - /// CheckNonEmptyBlockInputStream in the downstream with CreatingSetsBlockInputStream. So set has been created. - cached_result = inOrInnerRightJoinWithEmpty(); - initialized = true; - } - - Block res; - - if (isCancelledOrThrowIfKilled() || cached_result) - return res; - - return children.back()->read(); -} - - -bool CheckNonEmptySetBlockInputStream::inOrInnerRightJoinWithEmpty() const -{ - InOrInnerRightJoinWithEmpty checker; - - for (const auto & action : expression->getActions()) - { - if (action.type == ExpressionAction::ARRAY_JOIN) - { - return false; - } - else if (action.type == ExpressionAction::JOIN) - { - if (const auto * join = dynamic_cast(action.join.get())) - { - checker.hasJoin = true; - checker.innerRightJoinWithEmpty &= join->getTotalRowCount() == 0 && isInnerOrRight(join->getKind()); - } - } - else if (action.type == ExpressionAction::ADD_COLUMN) - { - if (!sets.count(action.result_name)) - continue; - checker.hasIn = true; - ColumnPtr column_set_ptr = action.added_column; - const ColumnSet * column_set = typeid_cast(&*column_set_ptr); - checker.inWithEmpty &= column_set && column_set->getData()->getTotalRowCount() == 0; - } - } - /// Get the final result. - return checker.result(); -} - -} diff --git a/dbms/src/DataStreams/CheckNonEmptySetBlockInputStream.h b/dbms/src/DataStreams/CheckNonEmptySetBlockInputStream.h deleted file mode 100644 index 733b970aadd..00000000000 --- a/dbms/src/DataStreams/CheckNonEmptySetBlockInputStream.h +++ /dev/null @@ -1,63 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -class CheckNonEmptySetBlockInputStream : public IBlockInputStream -{ -private: - using ExpressionActionsPtr = std::shared_ptr; - -public: - CheckNonEmptySetBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, const NameSet sets_); - - String getName() const override; - Block getTotals() override; - Block getHeader() const override; - -protected: - Block readImpl() override; - -private: - Block cached_header; - ExpressionActionsPtr expression; - bool initialized = false; - bool cached_result = false; - NameSet sets; - - bool inOrInnerRightJoinWithEmpty() const; - - -/** - * Used to determine if actions are IN OR INNER/RIGHT JOIN with empty. - */ -struct InOrInnerRightJoinWithEmpty -{ - bool hasJoin = false; - bool hasIn = false; - bool innerRightJoinWithEmpty = true; - bool inWithEmpty = true; - - bool result() - { - if (hasJoin && !hasIn) - return innerRightJoinWithEmpty; - - else if (hasIn && !hasJoin) - return inWithEmpty; - - else if (hasJoin && hasIn) - return innerRightJoinWithEmpty && inWithEmpty; - - return false; - } - -}; - -}; - -} diff --git a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp index 320bb35f5b3..44f4989f3cc 100644 --- a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp index 51adc462ef6..9673395a21a 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp @@ -30,6 +30,14 @@ Block ExpressionBlockInputStream::getHeader() const Block ExpressionBlockInputStream::readImpl() { + if (!initialized) + { + if (expression->resultIsAlwaysEmpty()) + return {}; + + initialized = true; + } + Block res = children.back()->read(); if (res) expression->execute(res); diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.h b/dbms/src/DataStreams/ExpressionBlockInputStream.h index 1b9be4e2852..af0e2db589b 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.h +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.h @@ -31,6 +31,7 @@ protected: private: ExpressionActionsPtr expression; Block cached_header; + bool initialized = false; }; } diff --git a/dbms/src/DataStreams/FilterBlockInputStream.cpp b/dbms/src/DataStreams/FilterBlockInputStream.cpp index 8613bc8cf8f..6516af786f3 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.cpp +++ b/dbms/src/DataStreams/FilterBlockInputStream.cpp @@ -17,9 +17,11 @@ namespace ErrorCodes } -FilterBlockInputStream::FilterBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, - const String & filter_column_name, bool remove_filter_) - : remove_filter(remove_filter_), expression(expression_) +FilterBlockInputStream::FilterBlockInputStream(const BlockInputStreamPtr & input, ExpressionActionsPtr expression_, + String filter_column_name_, bool remove_filter_) + : remove_filter(remove_filter_) + , expression(std::move(expression_)) + , filter_column_name(std::move(filter_column_name_)) { children.push_back(input); @@ -72,6 +74,9 @@ Block FilterBlockInputStream::readImpl() if (constant_filter_description.always_false) return removeFilterIfNeed(std::move(res)); + if (expression->checkColumnIsAlwaysFalse(filter_column_name)) + return {}; + /// Until non-empty block after filtering or end of stream. while (1) { diff --git a/dbms/src/DataStreams/FilterBlockInputStream.h b/dbms/src/DataStreams/FilterBlockInputStream.h index e287b69460c..7e49f5bb6cd 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.h +++ b/dbms/src/DataStreams/FilterBlockInputStream.h @@ -20,8 +20,8 @@ private: using ExpressionActionsPtr = std::shared_ptr; public: - FilterBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, - const String & filter_column_name_, bool remove_filter_ = false); + FilterBlockInputStream(const BlockInputStreamPtr & input, ExpressionActionsPtr expression_, + String filter_column_name_, bool remove_filter_ = false); String getName() const override; Block getTotals() override; @@ -35,6 +35,7 @@ protected: private: ExpressionActionsPtr expression; Block header; + String filter_column_name; ssize_t filter_column; ConstantFilterDescription constant_filter_description; diff --git a/dbms/src/DataStreams/LimitByBlockInputStream.cpp b/dbms/src/DataStreams/LimitByBlockInputStream.cpp index 55f3dae02bd..dddf3c21929 100644 --- a/dbms/src/DataStreams/LimitByBlockInputStream.cpp +++ b/dbms/src/DataStreams/LimitByBlockInputStream.cpp @@ -1,4 +1,5 @@ #include +#include #include diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index 4a6259d4a7c..9f6f8173cde 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -84,14 +84,11 @@ Block MergeSortingBlockInputStream::readImpl() temporary_files.emplace_back(createTemporaryFile(tmp_path)); const std::string & path = temporary_files.back()->path(); - WriteBufferFromFile file_buf(path); - CompressedWriteBuffer compressed_buf(file_buf); - NativeBlockOutputStream block_out(compressed_buf, 0, header_without_constants); MergeSortingBlocksBlockInputStream block_in(blocks, description, max_merged_block_size, limit); LOG_INFO(log, "Sorting and writing part of data into temporary file " + path); ProfileEvents::increment(ProfileEvents::ExternalSortWritePart); - copyData(block_in, block_out, &is_cancelled); /// NOTE. Possibly limit disk usage. + TemporaryFileStream::write(path, header_without_constants, block_in, &is_cancelled); /// NOTE. Possibly limit disk usage. LOG_INFO(log, "Done writing part of data into temporary file " + path); blocks.clear(); @@ -138,7 +135,7 @@ Block MergeSortingBlockInputStream::readImpl() MergeSortingBlocksBlockInputStream::MergeSortingBlocksBlockInputStream( - Blocks & blocks_, SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_) + Blocks & blocks_, const SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_) : blocks(blocks_), header(blocks.at(0).cloneEmpty()), description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_) { Blocks nonempty_blocks; diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h index ffc8c471270..a8b8e8cfd3b 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h @@ -3,14 +3,13 @@ #include #include -#include +#include #include #include #include #include -#include #include #include @@ -19,6 +18,8 @@ namespace DB { +struct TemporaryFileStream; + namespace ErrorCodes { extern const int NOT_ENOUGH_SPACE; @@ -34,7 +35,7 @@ class MergeSortingBlocksBlockInputStream : public IBlockInputStream { public: /// limit - if not 0, allowed to return just first 'limit' rows in sorted order. - MergeSortingBlocksBlockInputStream(Blocks & blocks_, SortDescription & description_, + MergeSortingBlocksBlockInputStream(Blocks & blocks_, const SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_ = 0); String getName() const override { return "MergeSortingBlocks"; } diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 96ea9112e1d..cfbf5052648 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -168,21 +168,28 @@ void MergingAggregatedMemoryEfficientBlockInputStream::start() else { size_t num_children = children.size(); - for (size_t i = 0; i < num_children; ++i) + try { - auto & child = children[i]; - - auto thread_group = CurrentThread::getGroup(); - reading_pool->schedule([&child, thread_group] + for (size_t i = 0; i < num_children; ++i) { - setThreadName("MergeAggReadThr"); - if (thread_group) - CurrentThread::attachToIfDetached(thread_group); - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - child->readPrefix(); - }); - } + auto & child = children[i]; + auto thread_group = CurrentThread::getGroup(); + reading_pool->scheduleOrThrowOnError([&child, thread_group] + { + setThreadName("MergeAggReadThr"); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); + CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; + child->readPrefix(); + }); + } + } + catch (...) + { + reading_pool->wait(); + throw; + } reading_pool->wait(); } @@ -194,7 +201,7 @@ void MergingAggregatedMemoryEfficientBlockInputStream::start() */ for (size_t i = 0; i < merging_threads; ++i) - pool.schedule([this, thread_group = CurrentThread::getGroup()] () { mergeThread(thread_group); }); + pool.scheduleOrThrowOnError([this, thread_group = CurrentThread::getGroup()]() { mergeThread(thread_group); }); } } @@ -475,22 +482,29 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate } else { - for (auto & input : inputs) + try { - if (need_that_input(input)) + for (auto & input : inputs) { - auto thread_group = CurrentThread::getGroup(); - reading_pool->schedule([&input, &read_from_input, thread_group] + if (need_that_input(input)) { - setThreadName("MergeAggReadThr"); - if (thread_group) - CurrentThread::attachToIfDetached(thread_group); - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - read_from_input(input); - }); + auto thread_group = CurrentThread::getGroup(); + reading_pool->scheduleOrThrowOnError([&input, &read_from_input, thread_group] + { + setThreadName("MergeAggReadThr"); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); + CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; + read_from_input(input); + }); + } } } - + catch (...) + { + reading_pool->wait(); + throw; + } reading_pool->wait(); } diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 6c3012a481e..84a5bd78293 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -129,7 +129,7 @@ void PushingToViewsBlockOutputStream::write(const Block & block) for (size_t view_num = 0; view_num < views.size(); ++view_num) { auto thread_group = CurrentThread::getGroup(); - pool.schedule([=] + pool.scheduleOrThrowOnError([=] { setThreadName("PushingToViews"); if (thread_group) diff --git a/dbms/src/DataStreams/ReverseBlockInputStream.cpp b/dbms/src/DataStreams/ReverseBlockInputStream.cpp index 83d74540ca0..2dfa147c68f 100644 --- a/dbms/src/DataStreams/ReverseBlockInputStream.cpp +++ b/dbms/src/DataStreams/ReverseBlockInputStream.cpp @@ -1,5 +1,7 @@ #include "ReverseBlockInputStream.h" +#include + namespace DB { ReverseBlockInputStream::ReverseBlockInputStream(const BlockInputStreamPtr & input) diff --git a/dbms/src/DataStreams/SizeLimits.cpp b/dbms/src/DataStreams/SizeLimits.cpp index 63164552120..bf232fcbc41 100644 --- a/dbms/src/DataStreams/SizeLimits.cpp +++ b/dbms/src/DataStreams/SizeLimits.cpp @@ -9,24 +9,28 @@ namespace DB bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const { - if (max_rows && rows > max_rows) + if (overflow_mode == OverflowMode::THROW) { - if (overflow_mode == OverflowMode::THROW) + if (max_rows && rows > max_rows) throw Exception("Limit for " + std::string(what) + " exceeded, max rows: " + formatReadableQuantity(max_rows) + ", current rows: " + formatReadableQuantity(rows), exception_code); - else - return false; - } - if (max_bytes && bytes > max_bytes) - { - if (overflow_mode == OverflowMode::THROW) + if (max_bytes && bytes > max_bytes) throw Exception("Limit for " + std::string(what) + " exceeded, max bytes: " + formatReadableSizeWithBinarySuffix(max_bytes) + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), exception_code); - else - return false; + + return true; } + return softCheck(rows, bytes); +} + +bool SizeLimits::softCheck(UInt64 rows, UInt64 bytes) const +{ + if (max_rows && rows > max_rows) + return false; + if (max_bytes && bytes > max_bytes) + return false; return true; } diff --git a/dbms/src/DataStreams/SizeLimits.h b/dbms/src/DataStreams/SizeLimits.h index 41238087613..b5206f6b97d 100644 --- a/dbms/src/DataStreams/SizeLimits.h +++ b/dbms/src/DataStreams/SizeLimits.h @@ -32,6 +32,11 @@ struct SizeLimits /// Check limits. If exceeded, return false or throw an exception, depending on overflow_mode. bool check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const; + + /// Check limits. No exceptions. + bool softCheck(UInt64 rows, UInt64 bytes) const; + + bool hasLimits() const { return max_rows || max_bytes; } }; } diff --git a/dbms/src/DataStreams/TemporaryFileStream.h b/dbms/src/DataStreams/TemporaryFileStream.h index 1e55f2c4aae..25ea54a7505 100644 --- a/dbms/src/DataStreams/TemporaryFileStream.h +++ b/dbms/src/DataStreams/TemporaryFileStream.h @@ -3,8 +3,12 @@ #include #include #include +#include +#include #include +#include #include +#include namespace DB { @@ -27,6 +31,46 @@ struct TemporaryFileStream , compressed_in(file_in) , block_in(std::make_shared(compressed_in, header_, 0)) {} + + /// Flush data from input stream into file for future reading + static void write(const std::string & path, const Block & header, IBlockInputStream & input, std::atomic * is_cancelled = nullptr) + { + WriteBufferFromFile file_buf(path); + CompressedWriteBuffer compressed_buf(file_buf); + NativeBlockOutputStream output(compressed_buf, 0, header); + copyData(input, output, is_cancelled); + } +}; + +class TemporaryFileLazyInputStream : public IBlockInputStream +{ +public: + TemporaryFileLazyInputStream(const std::string & path_, const Block & header_) + : path(path_) + , header(header_) + , done(false) + {} + + String getName() const override { return "TemporaryFile"; } + Block getHeader() const override { return header; } + void readSuffix() override {} + +protected: + Block readImpl() override + { + if (!done) + { + done = true; + TemporaryFileStream stream(path, header); + return stream.block_in->read(); + } + return {}; + } + +private: + const std::string path; + Block header; + bool done; }; } diff --git a/dbms/src/DataTypes/DataTypeNullable.cpp b/dbms/src/DataTypes/DataTypeNullable.cpp index 0eb82d6abdb..fa6eb0d6d71 100644 --- a/dbms/src/DataTypes/DataTypeNullable.cpp +++ b/dbms/src/DataTypes/DataTypeNullable.cpp @@ -156,11 +156,11 @@ void DataTypeNullable::serializeBinary(const IColumn & column, size_t row_num, W nested_data_type->serializeBinary(col.getNestedColumn(), row_num, ostr); } - +/// Deserialize value into ColumnNullable. /// We need to insert both to nested column and to null byte map, or, in case of exception, to not insert at all. -template -static void safeDeserialize( - IColumn & column, +template , ReturnType>* = nullptr> +static ReturnType safeDeserialize( + IColumn & column, const IDataType & /*nested_data_type*/, CheckForNull && check_for_null, DeserializeNested && deserialize_nested) { ColumnNullable & col = assert_cast(column); @@ -185,10 +185,26 @@ static void safeDeserialize( } } +/// Deserialize value into non-nullable column. In case of NULL, insert default value and return false. +template , ReturnType>* = nullptr> +static ReturnType safeDeserialize( + IColumn & column, const IDataType & nested_data_type, + CheckForNull && check_for_null, DeserializeNested && deserialize_nested) +{ + assert(!dynamic_cast(&column)); + assert(!dynamic_cast(&nested_data_type)); + bool insert_default = check_for_null(); + if (insert_default) + nested_data_type.insertDefaultInto(column); + else + deserialize_nested(column); + return !insert_default; +} + void DataTypeNullable::deserializeBinary(IColumn & column, ReadBuffer & istr) const { - safeDeserialize(column, + safeDeserialize(column, *nested_data_type, [&istr] { bool is_null = 0; readBinary(is_null, istr); return is_null; }, [this, &istr] (IColumn & nested) { nested_data_type->deserializeBinary(nested, istr); }); } @@ -206,6 +222,13 @@ void DataTypeNullable::serializeTextEscaped(const IColumn & column, size_t row_n void DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings, nested_data_type); +} + +template +ReturnType DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, + const DataTypePtr & nested_data_type) { /// Little tricky, because we cannot discriminate null from first character. @@ -215,9 +238,9 @@ void DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & ist /// This is not null, surely. if (*istr.position() != '\\') { - safeDeserialize(column, + return safeDeserialize(column, *nested_data_type, [] { return false; }, - [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextEscaped(nested, istr, settings); }); + [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextEscaped(nested, istr, settings); }); } else { @@ -227,7 +250,7 @@ void DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & ist if (istr.eof()) throw Exception("Unexpected end of stream, while parsing value of Nullable type, after backslash", ErrorCodes::CANNOT_READ_ALL_DATA); - safeDeserialize(column, + return safeDeserialize(column, *nested_data_type, [&istr] { if (*istr.position() == 'N') @@ -237,7 +260,7 @@ void DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & ist } return false; }, - [this, &istr, &settings] (IColumn & nested) + [&nested_data_type, &istr, &settings] (IColumn & nested) { if (istr.position() != istr.buffer().begin()) { @@ -275,15 +298,22 @@ void DataTypeNullable::serializeTextQuoted(const IColumn & column, size_t row_nu void DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - safeDeserialize(column, + deserializeTextQuoted(column, istr, settings, nested_data_type); +} + +template +ReturnType DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, + const DataTypePtr & nested_data_type) +{ + return safeDeserialize(column, *nested_data_type, [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); }, - [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); }); + [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); }); } void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - safeDeserialize(column, + safeDeserialize(column, *nested_data_type, [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); }, [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); }); } @@ -300,6 +330,13 @@ void DataTypeNullable::serializeTextCSV(const IColumn & column, size_t row_num, } void DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextCSV(column, istr, settings, nested_data_type); +} + +template +ReturnType DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, + const DataTypePtr & nested_data_type) { constexpr char const * null_literal = "NULL"; constexpr size_t len = 4; @@ -331,7 +368,7 @@ void DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c return false; }; - auto deserialize_nested = [this, &settings, &istr, &null_prefix_len] (IColumn & nested) + auto deserialize_nested = [&nested_data_type, &settings, &istr, &null_prefix_len] (IColumn & nested) { if (likely(!null_prefix_len)) nested_data_type->deserializeAsTextCSV(nested, istr, settings); @@ -357,8 +394,8 @@ void DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c WriteBufferFromOwnString parsed_value; nested_data_type->serializeAsTextCSV(nested, nested.size() - 1, parsed_value, settings); throw DB::Exception("Error while parsing \"" + std::string(null_literal, null_prefix_len) - + std::string(istr.position(), std::min(size_t{10}, istr.available())) + "\" as " + getName() - + " at position " + std::to_string(istr.count()) + ": expected \"NULL\" or " + nested_data_type->getName() + + std::string(istr.position(), std::min(size_t{10}, istr.available())) + "\" as Nullable(" + nested_data_type->getName() + + ") at position " + std::to_string(istr.count()) + ": expected \"NULL\" or " + nested_data_type->getName() + ", got \"" + std::string(null_literal, buf.count()) + "\", which was deserialized as \"" + parsed_value.str() + "\". It seems that input data is ill-formatted.", ErrorCodes::CANNOT_READ_ALL_DATA); @@ -366,7 +403,7 @@ void DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c } }; - safeDeserialize(column, check_for_null, deserialize_nested); + return safeDeserialize(column, *nested_data_type, check_for_null, deserialize_nested); } void DataTypeNullable::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -397,9 +434,16 @@ void DataTypeNullable::serializeTextJSON(const IColumn & column, size_t row_num, void DataTypeNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - safeDeserialize(column, + deserializeTextJSON(column, istr, settings, nested_data_type); +} + +template +ReturnType DataTypeNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, + const DataTypePtr & nested_data_type) +{ + return safeDeserialize(column, *nested_data_type, [&istr] { return checkStringByFirstCharacterAndAssertTheRest("null", istr); }, - [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextJSON(nested, istr, settings); }); + [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextJSON(nested, istr, settings); }); } void DataTypeNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -492,4 +536,10 @@ DataTypePtr removeNullable(const DataTypePtr & type) return type; } + +template bool DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); +template bool DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested); +template bool DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); +template bool DataTypeNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested); + } diff --git a/dbms/src/DataTypes/DataTypeNullable.h b/dbms/src/DataTypes/DataTypeNullable.h index 49b14eefc6c..1766b399c2a 100644 --- a/dbms/src/DataTypes/DataTypeNullable.h +++ b/dbms/src/DataTypes/DataTypeNullable.h @@ -100,6 +100,17 @@ public: const DataTypePtr & getNestedType() const { return nested_data_type; } + /// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false) + /// If ReturnType is void, deserialize Nullable(T) + template + static ReturnType deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); + template + static ReturnType deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested); + template + static ReturnType deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); + template + static ReturnType deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested); + private: DataTypePtr nested_data_type; }; diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp index 051683596cf..bd0e7e6ea3a 100644 --- a/dbms/src/DataTypes/DataTypeTuple.cpp +++ b/dbms/src/DataTypes/DataTypeTuple.cpp @@ -101,7 +101,7 @@ static inline const IColumn & extractElementColumn(const IColumn & column, size_ void DataTypeTuple::serializeBinary(const Field & field, WriteBuffer & ostr) const { - const auto & tuple = get(field).toUnderType(); + const auto & tuple = get(field); for (const auto idx_elem : ext::enumerate(elems)) idx_elem.second->serializeBinary(tuple[idx_elem.first], ostr); } @@ -109,10 +109,12 @@ void DataTypeTuple::serializeBinary(const Field & field, WriteBuffer & ostr) con void DataTypeTuple::deserializeBinary(Field & field, ReadBuffer & istr) const { const size_t size = elems.size(); - field = Tuple(TupleBackend(size)); - TupleBackend & tuple = get(field).toUnderType(); + + Tuple tuple(size); for (const auto i : ext::range(0, size)) elems[i]->deserializeBinary(tuple[i], istr); + + field = tuple; } void DataTypeTuple::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const @@ -447,7 +449,7 @@ MutableColumnPtr DataTypeTuple::createColumn() const Field DataTypeTuple::getDefault() const { - return Tuple(ext::map(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); })); + return Tuple(ext::map(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); })); } void DataTypeTuple::insertDefaultInto(IColumn & column) const diff --git a/dbms/src/DataTypes/FieldToDataType.cpp b/dbms/src/DataTypes/FieldToDataType.cpp index 70fab533838..fcea9d53f39 100644 --- a/dbms/src/DataTypes/FieldToDataType.cpp +++ b/dbms/src/DataTypes/FieldToDataType.cpp @@ -90,9 +90,8 @@ DataTypePtr FieldToDataType::operator() (const Array & x) const } -DataTypePtr FieldToDataType::operator() (const Tuple & x) const +DataTypePtr FieldToDataType::operator() (const Tuple & tuple) const { - auto & tuple = static_cast(x); if (tuple.empty()) throw Exception("Cannot infer type of an empty tuple", ErrorCodes::EMPTY_DATA_PASSED); diff --git a/dbms/src/Databases/DatabaseOnDisk.h b/dbms/src/Databases/DatabaseOnDisk.h index 761d55bd90b..231db6fdccb 100644 --- a/dbms/src/Databases/DatabaseOnDisk.h +++ b/dbms/src/Databases/DatabaseOnDisk.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 8dbeab29f41..5987597e3e9 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -141,7 +141,7 @@ void DatabaseOrdinary::loadTables( for (const auto & file_name : file_names) { - pool.schedule([&]() { loadOneTable(file_name); }); + pool.scheduleOrThrowOnError([&]() { loadOneTable(file_name); }); } pool.wait(); @@ -174,11 +174,16 @@ void DatabaseOrdinary::startupTables(ThreadPool & thread_pool) } }; - for (const auto & table : tables) + try { - thread_pool.schedule([&]() { startupOneTable(table.second); }); + for (const auto & table : tables) + thread_pool.scheduleOrThrowOnError([&]() { startupOneTable(table.second); }); + } + catch (...) + { + thread_pool.wait(); + throw; } - thread_pool.wait(); } diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index 1fc7da240b2..eddd5aa92cd 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -40,7 +40,7 @@ static FormatSettings getInputFormatSetting(const Settings & settings) format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.unquoted_null_literal_as_null = settings.input_format_csv_unquoted_null_literal_as_null; format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields; - format_settings.csv.null_as_default = settings.input_format_null_as_default; + format_settings.null_as_default = settings.input_format_null_as_default; format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; @@ -53,6 +53,7 @@ static FormatSettings getInputFormatSetting(const Settings & settings) format_settings.template_settings.resultset_format = settings.format_template_resultset; format_settings.template_settings.row_format = settings.format_template_row; format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter; + format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default; return format_settings; } diff --git a/dbms/src/Formats/FormatSettings.h b/dbms/src/Formats/FormatSettings.h index f8018dec125..dfd5d5b86f6 100644 --- a/dbms/src/Formats/FormatSettings.h +++ b/dbms/src/Formats/FormatSettings.h @@ -29,7 +29,6 @@ struct FormatSettings bool allow_double_quotes = true; bool unquoted_null_literal_as_null = false; bool empty_as_default = false; - bool null_as_default = false; }; CSV csv; @@ -61,10 +60,18 @@ struct FormatSettings Template template_settings; + struct TSV + { + bool empty_as_default = false; + }; + + TSV tsv; + bool skip_unknown_fields = false; bool with_names_use_header = false; bool write_statistics = true; bool import_nested_json = false; + bool null_as_default = false; enum class DateTimeInputFormat { diff --git a/dbms/src/Functions/FunctionFQDN.cpp b/dbms/src/Functions/FunctionFQDN.cpp new file mode 100644 index 00000000000..ae29d419fbd --- /dev/null +++ b/dbms/src/Functions/FunctionFQDN.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +class FunctionFQDN : public IFunction +{ +public: + static constexpr auto name = "FQDN"; + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + bool isDeterministic() const override { return false; } + + size_t getNumberOfArguments() const override + { + return 0; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override + { + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst( + input_rows_count, getFQDNOrHostName())->convertToFullColumnIfConst(); + } +}; + + +void registerFunctionFQDN(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction("fullHostName"); +} + +} diff --git a/dbms/src/Functions/FunctionJoinGet.cpp b/dbms/src/Functions/FunctionJoinGet.cpp index 0aad01c62f3..0e174202745 100644 --- a/dbms/src/Functions/FunctionJoinGet.cpp +++ b/dbms/src/Functions/FunctionJoinGet.cpp @@ -82,13 +82,18 @@ DataTypePtr FunctionBuilderJoinGet::getReturnTypeImpl(const ColumnsWithTypeAndNa } -void FunctionJoinGet::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) +void FunctionJoinGet::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { - auto & ctn = block.getByPosition(arguments[2]); + auto ctn = block.getByPosition(arguments[2]); + if (isColumnConst(*ctn.column)) + ctn.column = ctn.column->cloneResized(1); ctn.name = ""; // make sure the key name never collide with the join columns Block key_block = {ctn}; join->joinGet(key_block, attr_name); - block.getByPosition(result) = key_block.getByPosition(1); + auto & result_ctn = key_block.getByPosition(1); + if (isColumnConst(*ctn.column)) + result_ctn.column = ColumnConst::create(result_ctn.column, input_rows_count); + block.getByPosition(result) = result_ctn; } void registerFunctionJoinGet(FunctionFactory & factory) diff --git a/dbms/src/Functions/FunctionsBitmap.cpp b/dbms/src/Functions/FunctionsBitmap.cpp index 62faf49d2b3..8ffa641fec8 100644 --- a/dbms/src/Functions/FunctionsBitmap.cpp +++ b/dbms/src/Functions/FunctionsBitmap.cpp @@ -1,4 +1,7 @@ #include + +// TODO include this last because of a broken roaring header. See the comment +// inside. #include diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h index d354faa0c65..422f78a7abe 100644 --- a/dbms/src/Functions/FunctionsBitmap.h +++ b/dbms/src/Functions/FunctionsBitmap.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -15,6 +14,9 @@ #include #include +// TODO include this last because of a broken roaring header. See the comment +// inside. +#include namespace DB { diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index c16f439c3fa..7f89ea2430f 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1540,6 +1540,9 @@ public: String getName() const override { return name; } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + bool hasInformationAboutMonotonicity() const override { return static_cast(monotonicity_for_range); diff --git a/dbms/src/Functions/FunctionsJSON.h b/dbms/src/Functions/FunctionsJSON.h index b9fddf57d39..9ac670f1f64 100644 --- a/dbms/src/Functions/FunctionsJSON.h +++ b/dbms/src/Functions/FunctionsJSON.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -8,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Functions/FunctionsMiscellaneous.h b/dbms/src/Functions/FunctionsMiscellaneous.h index 96539f9559f..de1177dd8ed 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.h +++ b/dbms/src/Functions/FunctionsMiscellaneous.h @@ -26,6 +26,9 @@ public: String getName() const override { return "FunctionExpression"; } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + const DataTypes & getArgumentTypes() const override { return argument_types; } const DataTypePtr & getReturnType() const override { return return_type; } @@ -110,6 +113,9 @@ public: String getName() const override { return name; } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + const DataTypes & getArgumentTypes() const override { return captured_types; } const DataTypePtr & getReturnType() const override { return return_type; } diff --git a/dbms/src/Functions/FunctionsRandom.h b/dbms/src/Functions/FunctionsRandom.h index 069c0afa86b..9559f6121fd 100644 --- a/dbms/src/Functions/FunctionsRandom.h +++ b/dbms/src/Functions/FunctionsRandom.h @@ -53,9 +53,11 @@ public: return name; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - bool isDeterministicInScopeOfQuery() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { diff --git a/dbms/src/Functions/FunctionsStringRegex.cpp b/dbms/src/Functions/FunctionsStringRegex.cpp index 464260e6a33..12fb2b781f6 100644 --- a/dbms/src/Functions/FunctionsStringRegex.cpp +++ b/dbms/src/Functions/FunctionsStringRegex.cpp @@ -268,14 +268,12 @@ struct MultiMatchAnyImpl static_assert(static_cast(FindAny) + static_cast(FindAnyIndex) == 1); using ResultType = Type; static constexpr bool is_using_hyperscan = true; - - static void vector_constant( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const std::vector & needles, - PaddedPODArray & res) + /// Variable for understanding, if we used offsets for the output, most + /// likely to determine whether the function returns ColumnVector of ColumnArray. + static constexpr bool is_column_array = false; + static auto ReturnType() { - vector_constant(haystack_data, haystack_offsets, needles, res, std::nullopt); + return std::make_shared>(); } static void vector_constant( @@ -283,10 +281,22 @@ struct MultiMatchAnyImpl const ColumnString::Offsets & haystack_offsets, const std::vector & needles, PaddedPODArray & res, + PaddedPODArray & offsets) + { + vector_constant(haystack_data, haystack_offsets, needles, res, offsets, std::nullopt); + } + + static void vector_constant( + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, + const std::vector & needles, + PaddedPODArray & res, + [[maybe_unused]] PaddedPODArray & offsets, [[maybe_unused]] std::optional edit_distance) { (void)FindAny; (void)FindAnyIndex; + res.resize(haystack_offsets.size()); #if USE_HYPERSCAN const auto & hyperscan_regex = MultiRegexps::get(needles, edit_distance); hs_scratch_t * scratch = nullptr; @@ -307,15 +317,18 @@ struct MultiMatchAnyImpl *reinterpret_cast(context) = id; else if constexpr (FindAny) *reinterpret_cast(context) = 1; - return 0; + /// Once we hit the callback, there is no need to search for others. + return 1; }; const size_t haystack_offsets_size = haystack_offsets.size(); UInt64 offset = 0; for (size_t i = 0; i < haystack_offsets_size; ++i) { UInt64 length = haystack_offsets[i] - offset - 1; + /// Hyperscan restriction. if (length > std::numeric_limits::max()) throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES); + /// Zero the result, scan, check, update the offset. res[i] = 0; err = hs_scan( hyperscan_regex->getDB(), @@ -325,7 +338,7 @@ struct MultiMatchAnyImpl smart_scratch.get(), on_match, &res[i]); - if (err != HS_SUCCESS) + if (err != HS_SUCCESS && err != HS_SCAN_TERMINATED) throw Exception("Failed to scan with hyperscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT); offset = haystack_offsets[i]; } @@ -333,7 +346,7 @@ struct MultiMatchAnyImpl /// Fallback if do not use hyperscan if constexpr (MultiSearchDistance) throw Exception( - "Edit distance multi-search is not implemented when hyperscan is off (is it Intel processor?)", + "Edit distance multi-search is not implemented when hyperscan is off (is it x86 processor?)", ErrorCodes::NOT_IMPLEMENTED); PaddedPODArray accum(res.size()); memset(res.data(), 0, res.size() * sizeof(res.front())); @@ -353,6 +366,92 @@ struct MultiMatchAnyImpl } }; +template +struct MultiMatchAllIndicesImpl +{ + using ResultType = Type; + static constexpr bool is_using_hyperscan = true; + /// Variable for understanding, if we used offsets for the output, most + /// likely to determine whether the function returns ColumnVector of ColumnArray. + static constexpr bool is_column_array = true; + static auto ReturnType() + { + return std::make_shared(std::make_shared()); + } + + static void vector_constant( + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, + const std::vector & needles, + PaddedPODArray & res, + PaddedPODArray & offsets) + { + vector_constant(haystack_data, haystack_offsets, needles, res, offsets, std::nullopt); + } + + static void vector_constant( + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, + const std::vector & needles, + PaddedPODArray & res, + PaddedPODArray & offsets, + [[maybe_unused]] std::optional edit_distance) + { + offsets.resize(haystack_offsets.size()); +#if USE_HYPERSCAN + const auto & hyperscan_regex = MultiRegexps::get(needles, edit_distance); + hs_scratch_t * scratch = nullptr; + hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch); + + if (err != HS_SUCCESS) + throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); + + MultiRegexps::ScratchPtr smart_scratch(scratch); + + auto on_match = [](unsigned int id, + unsigned long long /* from */, + unsigned long long /* to */, + unsigned int /* flags */, + void * context) -> int + { + static_cast*>(context)->push_back(id); + return 0; + }; + const size_t haystack_offsets_size = haystack_offsets.size(); + UInt64 offset = 0; + for (size_t i = 0; i < haystack_offsets_size; ++i) + { + UInt64 length = haystack_offsets[i] - offset - 1; + /// Hyperscan restriction. + if (length > std::numeric_limits::max()) + throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES); + /// Scan, check, update the offsets array and the offset of haystack. + err = hs_scan( + hyperscan_regex->getDB(), + reinterpret_cast(haystack_data.data()) + offset, + length, + 0, + smart_scratch.get(), + on_match, + &res); + if (err != HS_SUCCESS) + throw Exception("Failed to scan with hyperscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT); + offsets[i] = res.size(); + offset = haystack_offsets[i]; + } +#else + (void)haystack_data; + (void)haystack_offsets; + (void)needles; + (void)res; + (void)offsets; + throw Exception( + "multi-search all indices is not implemented when hyperscan is off (is it x86 processor?)", + ErrorCodes::NOT_IMPLEMENTED); +#endif // USE_HYPERSCAN + } +}; + struct ExtractImpl { @@ -866,6 +965,10 @@ struct NameMultiMatchAnyIndex { static constexpr auto name = "multiMatchAnyIndex"; }; +struct NameMultiMatchAllIndices +{ + static constexpr auto name = "multiMatchAllIndices"; +}; struct NameMultiFuzzyMatchAny { static constexpr auto name = "multiFuzzyMatchAny"; @@ -874,6 +977,10 @@ struct NameMultiFuzzyMatchAnyIndex { static constexpr auto name = "multiFuzzyMatchAnyIndex"; }; +struct NameMultiFuzzyMatchAllIndices +{ + static constexpr auto name = "multiFuzzyMatchAllIndices"; +}; struct NameExtract { static constexpr auto name = "extract"; @@ -908,6 +1015,11 @@ using FunctionMultiMatchAnyIndex = FunctionsMultiStringSearch< NameMultiMatchAnyIndex, std::numeric_limits::max()>; +using FunctionMultiMatchAllIndices = FunctionsMultiStringSearch< + MultiMatchAllIndicesImpl, + NameMultiMatchAllIndices, + std::numeric_limits::max()>; + using FunctionMultiFuzzyMatchAny = FunctionsMultiStringFuzzySearch< MultiMatchAnyImpl, NameMultiFuzzyMatchAny, @@ -918,6 +1030,11 @@ using FunctionMultiFuzzyMatchAnyIndex = FunctionsMultiStringFuzzySearch< NameMultiFuzzyMatchAnyIndex, std::numeric_limits::max()>; +using FunctionMultiFuzzyMatchAllIndices = FunctionsMultiStringFuzzySearch< + MultiMatchAllIndicesImpl, + NameMultiFuzzyMatchAllIndices, + std::numeric_limits::max()>; + using FunctionLike = FunctionsStringSearch, NameLike>; using FunctionNotLike = FunctionsStringSearch, NameNotLike>; using FunctionExtract = FunctionsStringSearchToString; @@ -940,8 +1057,10 @@ void registerFunctionsStringRegex(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive); } } diff --git a/dbms/src/Functions/FunctionsStringRegex.h b/dbms/src/Functions/FunctionsStringRegex.h index a3f508b74d9..072f813c02b 100644 --- a/dbms/src/Functions/FunctionsStringRegex.h +++ b/dbms/src/Functions/FunctionsStringRegex.h @@ -63,9 +63,7 @@ public: if (!array_type || !checkAndGetDataType(array_type->getNestedType().get())) throw Exception( "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - - return std::make_shared>(); + return Impl::ReturnType(); } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override @@ -115,20 +113,23 @@ public: for (const auto & el : src_arr) refs.emplace_back(el.get()); - const size_t column_haystack_size = column_haystack->size(); - auto col_res = ColumnVector::create(); + auto col_offsets = ColumnArray::ColumnOffsets::create(); auto & vec_res = col_res->getData(); + auto & offsets_res = col_offsets->getData(); - vec_res.resize(column_haystack_size); - + /// The blame for resizing output is for the callee. if (col_haystack_vector) - Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res, edit_distance); + Impl::vector_constant( + col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res, offsets_res, edit_distance); else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN); - block.getByPosition(result).column = std::move(col_res); + if constexpr (Impl::is_column_array) + block.getByPosition(result).column = ColumnArray::create(std::move(col_res), std::move(col_offsets)); + else + block.getByPosition(result).column = std::move(col_res); } }; diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index e44138bb482..c39d536927c 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -47,7 +47,7 @@ struct PositionCaseSensitiveASCII /// Convert string to lowercase. Only for case-insensitive search. /// Implementation is permitted to be inefficient because it is called for single string. - static void toLowerIfNeed(std::string &) {} + static void toLowerIfNeed(std::string &) { } }; struct PositionCaseInsensitiveASCII @@ -107,7 +107,7 @@ struct PositionCaseSensitiveUTF8 return res; } - static void toLowerIfNeed(std::string &) {} + static void toLowerIfNeed(std::string &) { } }; struct PositionCaseInsensitiveUTF8 @@ -335,15 +335,21 @@ struct MultiSearchImpl { using ResultType = UInt8; static constexpr bool is_using_hyperscan = false; + /// Variable for understanding, if we used offsets for the output, most + /// likely to determine whether the function returns ColumnVector of ColumnArray. + static constexpr bool is_column_array = false; + static auto ReturnType() { return std::make_shared>(); } static void vector_constant( const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const std::vector & needles, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] PaddedPODArray & offsets) { auto searcher = Impl::createMultiSearcherInBigHaystack(needles); const size_t haystack_string_size = haystack_offsets.size(); + res.resize(haystack_string_size); size_t iteration = 0; while (searcher.hasMoreToSearch()) { @@ -366,12 +372,17 @@ struct MultiSearchFirstPositionImpl { using ResultType = UInt64; static constexpr bool is_using_hyperscan = false; + /// Variable for understanding, if we used offsets for the output, most + /// likely to determine whether the function returns ColumnVector of ColumnArray. + static constexpr bool is_column_array = false; + static auto ReturnType() { return std::make_shared>(); } static void vector_constant( const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const std::vector & needles, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] PaddedPODArray & offsets) { auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { @@ -379,6 +390,7 @@ struct MultiSearchFirstPositionImpl }; auto searcher = Impl::createMultiSearcherInBigHaystack(needles); const size_t haystack_string_size = haystack_offsets.size(); + res.resize(haystack_string_size); size_t iteration = 0; while (searcher.hasMoreToSearch()) { @@ -407,15 +419,21 @@ struct MultiSearchFirstIndexImpl { using ResultType = UInt64; static constexpr bool is_using_hyperscan = false; + /// Variable for understanding, if we used offsets for the output, most + /// likely to determine whether the function returns ColumnVector of ColumnArray. + static constexpr bool is_column_array = false; + static auto ReturnType() { return std::make_shared>(); } static void vector_constant( const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const std::vector & needles, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] PaddedPODArray & offsets) { auto searcher = Impl::createMultiSearcherInBigHaystack(needles); const size_t haystack_string_size = haystack_offsets.size(); + res.resize(haystack_string_size); size_t iteration = 0; while (searcher.hasMoreToSearch()) { @@ -598,30 +616,48 @@ struct NameHasTokenCaseInsensitive using FunctionPosition = FunctionsStringSearch, NamePosition>; using FunctionPositionUTF8 = FunctionsStringSearch, NamePositionUTF8>; using FunctionPositionCaseInsensitive = FunctionsStringSearch, NamePositionCaseInsensitive>; -using FunctionPositionCaseInsensitiveUTF8 = FunctionsStringSearch, NamePositionCaseInsensitiveUTF8>; +using FunctionPositionCaseInsensitiveUTF8 + = FunctionsStringSearch, NamePositionCaseInsensitiveUTF8>; -using FunctionMultiSearchAllPositions = FunctionsMultiStringPosition, NameMultiSearchAllPositions>; -using FunctionMultiSearchAllPositionsUTF8 = FunctionsMultiStringPosition, NameMultiSearchAllPositionsUTF8>; -using FunctionMultiSearchAllPositionsCaseInsensitive = FunctionsMultiStringPosition, NameMultiSearchAllPositionsCaseInsensitive>; -using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition, NameMultiSearchAllPositionsCaseInsensitiveUTF8>; +using FunctionMultiSearchAllPositions + = FunctionsMultiStringPosition, NameMultiSearchAllPositions>; +using FunctionMultiSearchAllPositionsUTF8 + = FunctionsMultiStringPosition, NameMultiSearchAllPositionsUTF8>; +using FunctionMultiSearchAllPositionsCaseInsensitive + = FunctionsMultiStringPosition, NameMultiSearchAllPositionsCaseInsensitive>; +using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition< + MultiSearchAllPositionsImpl, + NameMultiSearchAllPositionsCaseInsensitiveUTF8>; using FunctionMultiSearch = FunctionsMultiStringSearch, NameMultiSearchAny>; using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch, NameMultiSearchAnyUTF8>; -using FunctionMultiSearchCaseInsensitive = FunctionsMultiStringSearch, NameMultiSearchAnyCaseInsensitive>; -using FunctionMultiSearchCaseInsensitiveUTF8 = FunctionsMultiStringSearch, NameMultiSearchAnyCaseInsensitiveUTF8>; +using FunctionMultiSearchCaseInsensitive + = FunctionsMultiStringSearch, NameMultiSearchAnyCaseInsensitive>; +using FunctionMultiSearchCaseInsensitiveUTF8 + = FunctionsMultiStringSearch, NameMultiSearchAnyCaseInsensitiveUTF8>; -using FunctionMultiSearchFirstIndex = FunctionsMultiStringSearch, NameMultiSearchFirstIndex>; -using FunctionMultiSearchFirstIndexUTF8 = FunctionsMultiStringSearch, NameMultiSearchFirstIndexUTF8>; -using FunctionMultiSearchFirstIndexCaseInsensitive = FunctionsMultiStringSearch, NameMultiSearchFirstIndexCaseInsensitive>; -using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8 = FunctionsMultiStringSearch, NameMultiSearchFirstIndexCaseInsensitiveUTF8>; +using FunctionMultiSearchFirstIndex + = FunctionsMultiStringSearch, NameMultiSearchFirstIndex>; +using FunctionMultiSearchFirstIndexUTF8 + = FunctionsMultiStringSearch, NameMultiSearchFirstIndexUTF8>; +using FunctionMultiSearchFirstIndexCaseInsensitive + = FunctionsMultiStringSearch, NameMultiSearchFirstIndexCaseInsensitive>; +using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8 + = FunctionsMultiStringSearch, NameMultiSearchFirstIndexCaseInsensitiveUTF8>; -using FunctionMultiSearchFirstPosition = FunctionsMultiStringSearch, NameMultiSearchFirstPosition>; -using FunctionMultiSearchFirstPositionUTF8 = FunctionsMultiStringSearch, NameMultiSearchFirstPositionUTF8>; -using FunctionMultiSearchFirstPositionCaseInsensitive = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitive>; -using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitiveUTF8>; +using FunctionMultiSearchFirstPosition + = FunctionsMultiStringSearch, NameMultiSearchFirstPosition>; +using FunctionMultiSearchFirstPositionUTF8 + = FunctionsMultiStringSearch, NameMultiSearchFirstPositionUTF8>; +using FunctionMultiSearchFirstPositionCaseInsensitive + = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitive>; +using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch< + MultiSearchFirstPositionImpl, + NameMultiSearchFirstPositionCaseInsensitiveUTF8>; using FunctionHasToken = FunctionsStringSearch, NameHasToken>; -using FunctionHasTokenCaseInsensitive = FunctionsStringSearch, NameHasTokenCaseInsensitive>; +using FunctionHasTokenCaseInsensitive + = FunctionsStringSearch, NameHasTokenCaseInsensitive>; void registerFunctionsStringSearch(FunctionFactory & factory) { diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index 053240570d1..1f7963fca5f 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -28,6 +28,7 @@ namespace DB * match(haystack, pattern) - search by regular expression re2; Returns 0 or 1. * multiMatchAny(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- search by re2 regular expressions pattern_i; Returns 0 or 1 if any pattern_i matches. * multiMatchAnyIndex(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- search by re2 regular expressions pattern_i; Returns index of any match or zero if none; + * multiMatchAllIndices(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- search by re2 regular expressions pattern_i; Returns an array of matched indices in any order; * * Applies regexp re2 and pulls: * - the first subpattern, if the regexp has a subpattern; @@ -312,9 +313,7 @@ public: if (!array_type || !checkAndGetDataType(array_type->getNestedType().get())) throw Exception( "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - - return std::make_shared>(); + return Impl::ReturnType(); } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override @@ -347,20 +346,22 @@ public: for (const auto & el : src_arr) refs.emplace_back(el.get()); - const size_t column_haystack_size = column_haystack->size(); - auto col_res = ColumnVector::create(); + auto col_offsets = ColumnArray::ColumnOffsets::create(); auto & vec_res = col_res->getData(); + auto & offsets_res = col_offsets->getData(); - vec_res.resize(column_haystack_size); - + /// The blame for resizing output is for the callee. if (col_haystack_vector) - Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res); + Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res, offsets_res); else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN); - block.getByPosition(result).column = std::move(col_res); + if constexpr (Impl::is_column_array) + block.getByPosition(result).column = ColumnArray::create(std::move(col_res), std::move(col_offsets)); + else + block.getByPosition(result).column = std::move(col_res); } }; diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index a3b4c790926..04e9783a519 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -198,9 +198,9 @@ public: * Example: now(). Another example: functions that work with periodically updated dictionaries. */ - virtual bool isDeterministic() const { return true; } + virtual bool isDeterministic() const = 0; - virtual bool isDeterministicInScopeOfQuery() const { return true; } + virtual bool isDeterministicInScopeOfQuery() const = 0; /** Lets you know if the function is monotonic in a range of values. * This is used to work with the index in a sorted chunk of data. @@ -240,11 +240,16 @@ public: /// Get the main function name. virtual String getName() const = 0; + /// See the comment for the same method in IFunctionBase + virtual bool isDeterministic() const = 0; + + virtual bool isDeterministicInScopeOfQuery() const = 0; + /// Override and return true if function needs to depend on the state of the data. - virtual bool isStateful() const { return false; } + virtual bool isStateful() const = 0; /// Override and return true if function could take different number of arguments. - virtual bool isVariadic() const { return false; } + virtual bool isVariadic() const = 0; /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). virtual size_t getNumberOfArguments() const = 0; @@ -277,6 +282,11 @@ public: return buildImpl(arguments, getReturnType(arguments)); } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + bool isStateful() const override { return false; } + bool isVariadic() const override { return false; } + /// Default implementation. Will check only in non-variadic case. void checkNumberOfArguments(size_t number_of_arguments) const override; @@ -357,6 +367,8 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool canBeExecutedOnDefaultArguments() const override { return true; } bool canBeExecutedOnLowCardinalityDictionary() const override { return isDeterministicInScopeOfQuery(); } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } using PreparedFunctionImpl::execute; using PreparedFunctionImpl::executeImplDryRun; @@ -506,6 +518,9 @@ public: return function->checkNumberOfArguments(number_of_arguments); } + bool isDeterministic() const override { return function->isDeterministic(); } + bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } + String getName() const override { return function->getName(); } bool isStateful() const override { return function->isStateful(); } bool isVariadic() const override { return function->isVariadic(); } diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index ce81e62ac69..e7fec8027fb 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -8,10 +8,10 @@ #include #include #include +#include #include #include #include -#include #include @@ -87,18 +87,20 @@ namespace MultiRegexps } }; + /// Helper unique pointers to correctly delete the allocated space when hyperscan cannot compile something and we throw an exception. using CompilerError = std::unique_ptr>; using ScratchPtr = std::unique_ptr>; using DataBasePtr = std::unique_ptr>; - /// Database is thread safe across multiple threads and Scratch is not but we can copy it whenever we use it in the searcher + /// Database is thread safe across multiple threads and Scratch is not but we can copy it whenever we use it in the searcher. class Regexps { public: - Regexps(hs_database_t * db_, hs_scratch_t * scratch_) : db{db_}, scratch{scratch_} {} + Regexps(hs_database_t * db_, hs_scratch_t * scratch_) : db{db_}, scratch{scratch_} { } hs_database_t * getDB() const { return db.get(); } hs_scratch_t * getScratch() const { return scratch.get(); } + private: DataBasePtr db; ScratchPtr scratch; @@ -106,25 +108,25 @@ namespace MultiRegexps struct Pool { - /// Mutex for finding in map + /// Mutex for finding in map. std::mutex mutex; - /// Patterns + possible edit_distance to database and scratch + /// Patterns + possible edit_distance to database and scratch. std::map, std::optional>, Regexps> storage; }; - template + template inline Regexps constructRegexps(const std::vector & str_patterns, std::optional edit_distance) { (void)edit_distance; /// Common pointers - std::vector ptrns; + std::vector patterns; std::vector flags; /// Pointer for external edit distance compilation std::vector ext_exprs; std::vector ext_exprs_ptrs; - ptrns.reserve(str_patterns.size()); + patterns.reserve(str_patterns.size()); flags.reserve(str_patterns.size()); if constexpr (CompileForEditDistance) @@ -135,12 +137,22 @@ namespace MultiRegexps for (const StringRef ref : str_patterns) { - ptrns.push_back(ref.data); - flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8); + patterns.push_back(ref.data); + /* Flags below are the pattern matching flags. + * HS_FLAG_DOTALL is a compile flag where matching a . will not exclude newlines. This is a good + * performance practice accrording to Hyperscan API. https://intel.github.io/hyperscan/dev-reference/performance.html#dot-all-mode + * HS_FLAG_ALLOWEMPTY is a compile flag where empty strings are allowed to match. + * HS_FLAG_UTF8 is a flag where UTF8 literals are matched. + * HS_FLAG_SINGLEMATCH is a compile flag where each pattern match will be returned only once. it is a good performance practice + * as it is said in the Hyperscan documentation. https://intel.github.io/hyperscan/dev-reference/performance.html#single-match-flag + */ + flags.push_back(HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8); if constexpr (CompileForEditDistance) { + /// Hyperscan currently does not support UTF8 matching with edit distance. flags.back() &= ~HS_FLAG_UTF8; ext_exprs.emplace_back(); + /// HS_EXT_FLAG_EDIT_DISTANCE is a compile flag responsible for Levenstein distance. ext_exprs.back().flags = HS_EXT_FLAG_EDIT_DISTANCE; ext_exprs.back().edit_distance = edit_distance.value(); ext_exprs_ptrs.push_back(&ext_exprs.back()); @@ -152,31 +164,32 @@ namespace MultiRegexps std::unique_ptr ids; - if constexpr (FindAnyIndex) + /// We mark the patterns to provide the callback results. + if constexpr (SaveIndices) { - ids.reset(new unsigned int[ptrns.size()]); - for (size_t i = 0; i < ptrns.size(); ++i) + ids.reset(new unsigned int[patterns.size()]); + for (size_t i = 0; i < patterns.size(); ++i) ids[i] = i + 1; } hs_error_t err; if constexpr (!CompileForEditDistance) err = hs_compile_multi( - ptrns.data(), + patterns.data(), flags.data(), ids.get(), - ptrns.size(), + patterns.size(), HS_MODE_BLOCK, nullptr, &db, &compile_error); else err = hs_compile_ext_multi( - ptrns.data(), + patterns.data(), flags.data(), ids.get(), ext_exprs_ptrs.data(), - ptrns.size(), + patterns.size(), HS_MODE_BLOCK, nullptr, &db, @@ -184,6 +197,7 @@ namespace MultiRegexps if (err != HS_SUCCESS) { + /// CompilerError is a unique_ptr, so correct memory free after the exception is thrown. CompilerError error(compile_error); if (error->expression < 0) @@ -196,9 +210,12 @@ namespace MultiRegexps ProfileEvents::increment(ProfileEvents::RegexpCreated); + /// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch + /// function which is faster than allocating scratch space each time in each thread. hs_scratch_t * scratch = nullptr; err = hs_alloc_scratch(db, &scratch); + /// If not HS_SUCCESS, it is guaranteed that the memory would not be allocated for scratch. if (err != HS_SUCCESS) throw Exception("Could not allocate scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); @@ -206,7 +223,10 @@ namespace MultiRegexps } /// If CompileForEditDistance is False, edit_distance must be nullopt - template + /// Also, we use templates here because each instantiation of function + /// template has its own copy of local static variables which must not be the same + /// for different hyperscan compilations. + template inline Regexps * get(const std::vector & patterns, std::optional edit_distance) { /// C++11 has thread-safe function-local statics on most modern compilers. @@ -217,15 +237,19 @@ namespace MultiRegexps for (const StringRef & ref : patterns) str_patterns.push_back(ref.toString()); + /// Get the lock for finding database. std::unique_lock lock(known_regexps.mutex); auto it = known_regexps.storage.find({str_patterns, edit_distance}); + /// If not found, compile and let other threads wait. if (known_regexps.storage.end() == it) - it = known_regexps.storage.emplace( - std::pair{str_patterns, edit_distance}, - constructRegexps(str_patterns, edit_distance)).first; - + it = known_regexps.storage + .emplace( + std::pair{str_patterns, edit_distance}, + constructRegexps(str_patterns, edit_distance)) + .first; + /// If found, unlock and return the database. lock.unlock(); return &it->second; diff --git a/dbms/src/Functions/array/arraySplit.cpp b/dbms/src/Functions/array/arraySplit.cpp new file mode 100644 index 00000000000..dcb0c73e8a4 --- /dev/null +++ b/dbms/src/Functions/array/arraySplit.cpp @@ -0,0 +1,108 @@ +#include +#include +#include "FunctionArrayMapped.h" +#include + + +namespace DB +{ + +template +struct ArraySplitImpl +{ + static bool needBoolean() { return true; } + static bool needExpression() { return true; } + static bool needOneArray() { return false; } + + static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element) + { + return std::make_shared( + std::make_shared(array_element) + ); + } + + static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) + { + const ColumnUInt8 * column_cut = typeid_cast(&*mapped); + + const IColumn::Offsets & in_offsets = array.getOffsets(); + auto column_offsets_2 = ColumnArray::ColumnOffsets::create(); + auto column_offsets_1 = ColumnArray::ColumnOffsets::create(); + IColumn::Offsets & out_offsets_2 = column_offsets_2->getData(); + IColumn::Offsets & out_offsets_1 = column_offsets_1->getData(); + + if (column_cut) + { + const IColumn::Filter & cut = column_cut->getData(); + + size_t pos = 0; + + out_offsets_2.reserve(in_offsets.size()); // the actual size would be equal or larger + out_offsets_1.reserve(in_offsets.size()); + + for (size_t i = 0; i < in_offsets.size(); ++i) + { + pos += !Reverse; + for (; pos < in_offsets[i] - Reverse; ++pos) + { + if (cut[pos]) + out_offsets_2.push_back(pos + Reverse); + } + pos += Reverse; + + out_offsets_2.push_back(pos); + out_offsets_1.push_back(out_offsets_2.size()); + } + } + else + { + auto column_cut_const = checkAndGetColumnConst(&*mapped); + + if (!column_cut_const) + throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_cut_const->getValue()) + { + out_offsets_2.reserve(in_offsets.back()); + out_offsets_1.reserve(in_offsets.size()); + + for (size_t i = 0; i < in_offsets.back(); ++i) + out_offsets_2.push_back(i + 1); + for (size_t i = 0; i < in_offsets.size(); ++i) + out_offsets_1.push_back(in_offsets[i]); + } + else + { + out_offsets_2.reserve(in_offsets.size()); + out_offsets_1.reserve(in_offsets.size()); + + for (size_t i = 0; i < in_offsets.size(); ++i) + { + out_offsets_2.push_back(in_offsets[i]); + out_offsets_1.push_back(i + 1); + } + } + } + + return ColumnArray::create( + ColumnArray::create( + array.getDataPtr(), + std::move(column_offsets_2) + ), + std::move(column_offsets_1) + ); + } +}; + +struct NameArraySplit { static constexpr auto name = "arraySplit"; }; +struct NameArrayReverseSplit { static constexpr auto name = "arrayReverseSplit"; }; +using FunctionArraySplit = FunctionArrayMapped, NameArraySplit>; +using FunctionArrayReverseSplit = FunctionArrayMapped, NameArrayReverseSplit>; + +void registerFunctionArraySplit(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/now.cpp b/dbms/src/Functions/now.cpp index b5b4d9d7918..35ecf75e420 100644 --- a/dbms/src/Functions/now.cpp +++ b/dbms/src/Functions/now.cpp @@ -53,6 +53,7 @@ public: } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } private: time_t time_value; @@ -65,8 +66,10 @@ public: static constexpr auto name = "now"; String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 0; } + bool isDeterministic() const override { return false; } + + size_t getNumberOfArguments() const override { return 0; } static FunctionBuilderPtr create(const Context &) { return std::make_shared(); } protected: diff --git a/dbms/src/Functions/randConstant.cpp b/dbms/src/Functions/randConstant.cpp index e854484154c..3bbb3f56b0e 100644 --- a/dbms/src/Functions/randConstant.cpp +++ b/dbms/src/Functions/randConstant.cpp @@ -50,6 +50,7 @@ public: } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } private: ToType value; @@ -64,6 +65,8 @@ public: static constexpr auto name = Name::name; String getName() const override { return name; } + bool isDeterministic() const override { return false; } + bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } diff --git a/dbms/src/Functions/registerFunctionsHigherOrder.cpp b/dbms/src/Functions/registerFunctionsHigherOrder.cpp index e0948ebc913..2e8b678240b 100644 --- a/dbms/src/Functions/registerFunctionsHigherOrder.cpp +++ b/dbms/src/Functions/registerFunctionsHigherOrder.cpp @@ -11,6 +11,7 @@ void registerFunctionArrayAll(FunctionFactory &); void registerFunctionArraySum(FunctionFactory &); void registerFunctionArrayFirst(FunctionFactory &); void registerFunctionArrayFirstIndex(FunctionFactory &); +void registerFunctionArraySplit(FunctionFactory &); void registerFunctionsArraySort(FunctionFactory &); void registerFunctionArrayReverseSort(FunctionFactory &); void registerFunctionArrayCumSum(FunctionFactory &); @@ -27,6 +28,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory) registerFunctionArraySum(factory); registerFunctionArrayFirst(factory); registerFunctionArrayFirstIndex(factory); + registerFunctionArraySplit(factory); registerFunctionsArraySort(factory); registerFunctionArrayCumSum(factory); registerFunctionArrayCumSumNonNegative(factory); diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index 418bd093e32..3c0e03e46c3 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -8,6 +8,7 @@ class FunctionFactory; void registerFunctionCurrentDatabase(FunctionFactory &); void registerFunctionCurrentUser(FunctionFactory &); void registerFunctionHostName(FunctionFactory &); +void registerFunctionFQDN(FunctionFactory &); void registerFunctionVisibleWidth(FunctionFactory &); void registerFunctionToTypeName(FunctionFactory &); void registerFunctionGetSizeOfEnumType(FunctionFactory &); @@ -61,6 +62,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionCurrentDatabase(factory); registerFunctionCurrentUser(factory); registerFunctionHostName(factory); + registerFunctionFQDN(factory); registerFunctionVisibleWidth(factory); registerFunctionToTypeName(factory); registerFunctionGetSizeOfEnumType(factory); diff --git a/dbms/src/Functions/runningDifference.h b/dbms/src/Functions/runningDifference.h index 374142311e9..7cda3b0be94 100644 --- a/dbms/src/Functions/runningDifference.h +++ b/dbms/src/Functions/runningDifference.h @@ -141,6 +141,7 @@ public: return 1; } + bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const override { return false; diff --git a/dbms/src/Functions/toTypeName.cpp b/dbms/src/Functions/toTypeName.cpp index 55d602167a5..202274787bc 100644 --- a/dbms/src/Functions/toTypeName.cpp +++ b/dbms/src/Functions/toTypeName.cpp @@ -38,6 +38,9 @@ public: static constexpr auto name = "toTypeName"; String getName() const override { return name; } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + const DataTypes & getArgumentTypes() const override { return argument_types; } const DataTypePtr & getReturnType() const override { return return_type; } diff --git a/dbms/src/Functions/today.cpp b/dbms/src/Functions/today.cpp index 72e4867a950..8f692333170 100644 --- a/dbms/src/Functions/today.cpp +++ b/dbms/src/Functions/today.cpp @@ -52,6 +52,7 @@ public: } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } private: DayNum day_value; @@ -64,6 +65,9 @@ public: static constexpr auto name = "today"; String getName() const override { return name; } + + bool isDeterministic() const override { return false; } + size_t getNumberOfArguments() const override { return 0; } static FunctionBuilderPtr create(const Context &) { return std::make_shared(); } diff --git a/dbms/src/Functions/yesterday.cpp b/dbms/src/Functions/yesterday.cpp index 565a2c40913..88aa6896f5a 100644 --- a/dbms/src/Functions/yesterday.cpp +++ b/dbms/src/Functions/yesterday.cpp @@ -52,6 +52,7 @@ public: } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } private: DayNum day_value; @@ -64,6 +65,9 @@ public: static constexpr auto name = "yesterday"; String getName() const override { return name; } + + bool isDeterministic() const override { return false; } + size_t getNumberOfArguments() const override { return 0; } static FunctionBuilderPtr create(const Context &) { return std::make_shared(); } diff --git a/dbms/src/IO/AsynchronousWriteBuffer.h b/dbms/src/IO/AsynchronousWriteBuffer.h index c87777450e7..74b5804691b 100644 --- a/dbms/src/IO/AsynchronousWriteBuffer.h +++ b/dbms/src/IO/AsynchronousWriteBuffer.h @@ -41,7 +41,7 @@ private: swapBuffers(); /// The data will be written in separate stream. - pool.schedule([this] { thread(); }); + pool.scheduleOrThrowOnError([this] { thread(); }); } public: diff --git a/dbms/src/IO/ConnectionTimeouts.h b/dbms/src/IO/ConnectionTimeouts.h index 0b7810e25a4..e9f9643943a 100644 --- a/dbms/src/IO/ConnectionTimeouts.h +++ b/dbms/src/IO/ConnectionTimeouts.h @@ -89,7 +89,7 @@ struct ConnectionTimeouts const auto & settings = context.getSettingsRef(); const auto & config = context.getConfigRef(); Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", 10), 0}; - return ConnectionTimeouts(settings.http_connection_timeout, settings.http_send_timeout, settings.http_receive_timeout, http_keep_alive_timeout); + return ConnectionTimeouts(settings.http_connection_timeout, settings.http_send_timeout, settings.http_receive_timeout, settings.tcp_keep_alive_timeout, http_keep_alive_timeout); } }; diff --git a/dbms/src/IO/WriteHelpers.cpp b/dbms/src/IO/WriteHelpers.cpp index 0b5bce27b46..fe64983c18a 100644 --- a/dbms/src/IO/WriteHelpers.cpp +++ b/dbms/src/IO/WriteHelpers.cpp @@ -66,26 +66,4 @@ void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trac if (has_nested) writeException(Exception(Exception::CreateFromPoco, *e.nested()), buf, with_stack_trace); } - - -String backQuoteIfNeed(const String & x) -{ - String res(x.size(), '\0'); - { - WriteBufferFromString wb(res); - writeProbablyBackQuotedString(x, wb); - } - return res; -} - -String backQuote(const String & x) -{ - String res(x.size(), '\0'); - { - WriteBufferFromString wb(res); - writeBackQuotedString(x, wb); - } - return res; -} - } diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index ab3fad08860..49f34595fe1 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -410,36 +410,36 @@ inline void writeQuotedString(const StringRef & ref, WriteBuffer & buf) writeAnyQuotedString<'\''>(ref, buf); } -inline void writeDoubleQuotedString(const String & s, WriteBuffer & buf) +inline void writeDoubleQuotedString(const StringRef & s, WriteBuffer & buf) { writeAnyQuotedString<'"'>(s, buf); } /// Outputs a string in backquotes. -inline void writeBackQuotedString(const String & s, WriteBuffer & buf) +inline void writeBackQuotedString(const StringRef & s, WriteBuffer & buf) { writeAnyQuotedString<'`'>(s, buf); } /// Outputs a string in backquotes for MySQL. -inline void writeBackQuotedStringMySQL(const String & s, WriteBuffer & buf) +inline void writeBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf) { writeChar('`', buf); - writeAnyEscapedString<'`', true>(s.data(), s.data() + s.size(), buf); + writeAnyEscapedString<'`', true>(s.data, s.data + s.size, buf); writeChar('`', buf); } /// The same, but quotes apply only if there are characters that do not match the identifier without quotes. template -inline void writeProbablyQuotedStringImpl(const String & s, WriteBuffer & buf, F && write_quoted_string) +inline void writeProbablyQuotedStringImpl(const StringRef & s, WriteBuffer & buf, F && write_quoted_string) { - if (s.empty() || !isValidIdentifierBegin(s[0])) + if (!s.size || !isValidIdentifierBegin(s.data[0])) write_quoted_string(s, buf); else { - const char * pos = s.data() + 1; - const char * end = s.data() + s.size(); + const char * pos = s.data + 1; + const char * end = s.data + s.size; for (; pos < end; ++pos) if (!isWordCharASCII(*pos)) break; @@ -450,19 +450,19 @@ inline void writeProbablyQuotedStringImpl(const String & s, WriteBuffer & buf, F } } -inline void writeProbablyBackQuotedString(const String & s, WriteBuffer & buf) +inline void writeProbablyBackQuotedString(const StringRef & s, WriteBuffer & buf) { - writeProbablyQuotedStringImpl(s, buf, [](const String & s_, WriteBuffer & buf_) { return writeBackQuotedString(s_, buf_); }); + writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedString(s_, buf_); }); } -inline void writeProbablyDoubleQuotedString(const String & s, WriteBuffer & buf) +inline void writeProbablyDoubleQuotedString(const StringRef & s, WriteBuffer & buf) { - writeProbablyQuotedStringImpl(s, buf, [](const String & s_, WriteBuffer & buf_) { return writeDoubleQuotedString(s_, buf_); }); + writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeDoubleQuotedString(s_, buf_); }); } -inline void writeProbablyBackQuotedStringMySQL(const String & s, WriteBuffer & buf) +inline void writeProbablyBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf) { - writeProbablyQuotedStringImpl(s, buf, [](const String & s_, WriteBuffer & buf_) { return writeBackQuotedStringMySQL(s_, buf_); }); + writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedStringMySQL(s_, buf_); }); } @@ -905,11 +905,4 @@ inline String toString(const T & x) writeText(x, buf); return buf.str(); } - - -/// Quote the identifier with backquotes, if required. -String backQuoteIfNeed(const String & x); -/// Quote the identifier with backquotes. -String backQuote(const String & x); - } diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index c519d75b812..c587d1826e1 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 53c706bf85d..fc358c22189 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1158,7 +1158,7 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( tasks[bucket] = std::packaged_task(std::bind(converter, bucket, CurrentThread::getGroup())); if (thread_pool) - thread_pool->schedule([bucket, &tasks] { tasks[bucket](); }); + thread_pool->scheduleOrThrowOnError([bucket, &tasks] { tasks[bucket](); }); else tasks[bucket](); } @@ -1614,7 +1614,7 @@ private: if (max_scheduled_bucket_num >= NUM_BUCKETS) return; - parallel_merge_data->pool.schedule(std::bind(&MergingAndConvertingBlockInputStream::thread, this, + parallel_merge_data->pool.scheduleOrThrowOnError(std::bind(&MergingAndConvertingBlockInputStream::thread, this, max_scheduled_bucket_num, CurrentThread::getGroup())); } @@ -1968,7 +1968,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari auto task = std::bind(merge_bucket, bucket, aggregates_pool, CurrentThread::getGroup()); if (thread_pool) - thread_pool->schedule(task); + thread_pool->scheduleOrThrowOnError(task); else task(); } diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index 5c1fe4bcae2..b2e967b213c 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -24,12 +24,14 @@ namespace ErrorCodes extern const int PARAMETER_OUT_OF_BOUND; } -AnalyzedJoin::AnalyzedJoin(const Settings & settings) +AnalyzedJoin::AnalyzedJoin(const Settings & settings, const String & tmp_path_) : size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode}) + , default_max_bytes(settings.default_max_bytes_in_join) , join_use_nulls(settings.join_use_nulls) , partial_merge_join(settings.partial_merge_join) , partial_merge_join_optimizations(settings.partial_merge_join_optimizations) , partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks) + , tmp_path(tmp_path_) {} void AnalyzedJoin::addUsingKey(const ASTPtr & ast) diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 9629547328d..c979b50d3a3 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -38,6 +39,7 @@ class AnalyzedJoin friend class SyntaxAnalyzer; const SizeLimits size_limits; + const size_t default_max_bytes; const bool join_use_nulls; const bool partial_merge_join = false; const bool partial_merge_join_optimizations = false; @@ -48,6 +50,7 @@ class AnalyzedJoin ASTs key_asts_left; ASTs key_asts_right; ASTTableJoin table_join; + ASOF::Inequality asof_inequality = ASOF::Inequality::GreaterOrEquals; /// All columns which can be read from joined table. Duplicating names are qualified. NamesAndTypesList columns_from_joined_table; @@ -59,13 +62,16 @@ class AnalyzedJoin /// Original name -> name. Only ranamed columns. std::unordered_map renames; + String tmp_path; + public: - AnalyzedJoin(const Settings &); + AnalyzedJoin(const Settings &, const String & tmp_path); /// for StorageJoin AnalyzedJoin(SizeLimits limits, bool use_nulls, ASTTableJoin::Kind kind, ASTTableJoin::Strictness strictness, const Names & key_names_right_) : size_limits(limits) + , default_max_bytes(0) , join_use_nulls(use_nulls) , key_names_right(key_names_right_) { @@ -76,9 +82,11 @@ public: ASTTableJoin::Kind kind() const { return table_join.kind; } ASTTableJoin::Strictness strictness() const { return table_join.strictness; } const SizeLimits & sizeLimits() const { return size_limits; } + const String & getTemporaryPath() const { return tmp_path; } bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); } bool forceNullableLeft() const { return join_use_nulls && isRightOrFull(table_join.kind); } + size_t defaultMaxBytes() const { return default_max_bytes; } size_t maxRowsInRightBlock() const { return partial_merge_join_rows_in_right_blocks; } bool enablePartialMergeJoinOptimizations() const { return partial_merge_join_optimizations; } @@ -100,6 +108,9 @@ public: void addJoinedColumn(const NameAndTypePair & joined_column); void addJoinedColumnsAndCorrectNullability(Block & sample_block) const; + void setAsofInequality(ASOF::Inequality inequality) { asof_inequality = inequality; } + ASOF::Inequality getAsofInequality() { return asof_inequality; } + ASTPtr leftKeysList() const; ASTPtr rightKeysList() const; /// For ON syntax only diff --git a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 68e04b45d99..f8938f2a7d3 100644 --- a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -32,16 +32,20 @@ void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const } void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, - const std::pair & table_no) + const std::pair & table_no, const ASOF::Inequality & inequality) { if (table_no.first == 1 || table_no.second == 2) { asof_left_key = left_ast->clone(); asof_right_key = right_ast->clone(); - return; + analyzed_join.setAsofInequality(inequality); + } + else if (table_no.first == 2 || table_no.second == 1) + { + asof_left_key = right_ast->clone(); + asof_right_key = left_ast->clone(); + analyzed_join.setAsofInequality(ASOF::reverseInequality(inequality)); } - - throw Exception("ASOF JOIN for (left_table.x <= right_table.x) is not implemented", ErrorCodes::NOT_IMPLEMENTED); } void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() @@ -66,10 +70,9 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as return; } - bool less_or_equals = (func.name == "lessOrEquals"); - bool greater_or_equals = (func.name == "greaterOrEquals"); + ASOF::Inequality inequality = ASOF::getInequality(func.name); - if (data.is_asof && (less_or_equals || greater_or_equals)) + if (data.is_asof && (inequality != ASOF::Inequality::None)) { if (data.asof_left_key || data.asof_right_key) throwSyntaxException("ASOF JOIN expects exactly one inequality in ON section, unexpected " + queryToString(ast) + "."); @@ -78,11 +81,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr right = func.arguments->children.at(1); auto table_numbers = getTableNumbers(ast, left, right, data); - if (greater_or_equals) - data.addAsofJoinKeys(left, right, table_numbers); - else - data.addAsofJoinKeys(right, left, std::make_pair(table_numbers.second, table_numbers.first)); - + data.addAsofJoinKeys(left, right, table_numbers, inequality); return; } diff --git a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.h b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.h index 4d085dfcc31..0b4cb1fe857 100644 --- a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -12,6 +12,11 @@ namespace DB class ASTIdentifier; class AnalyzedJoin; +namespace ASOF +{ + enum class Inequality; +} + class CollectJoinOnKeysMatcher { public: @@ -29,7 +34,8 @@ public: bool has_some{false}; void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no); - void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no); + void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no, + const ASOF::Inequality & asof_inequality); void asofToJoinKeys(); }; diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index a37a8cbbe26..e1e3fa3cedc 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1,4 +1,5 @@ #include "config_core.h" +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include namespace ProfileEvents @@ -1167,6 +1169,58 @@ JoinPtr ExpressionActions::getTableJoinAlgo() const } +bool ExpressionActions::resultIsAlwaysEmpty() const +{ + /// Check that has join which returns empty result. + + for (auto & action : actions) + { + if (action.type == action.JOIN && action.join && action.join->alwaysReturnsEmptySet()) + return true; + } + + return false; +} + + +bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) const +{ + /// Check has column in (empty set). + String set_to_check; + + for (auto & action : actions) + { + if (action.type == action.APPLY_FUNCTION && action.function_base) + { + auto name = action.function_base->getName(); + if ((name == "in" || name == "globalIn") + && action.result_name == column_name + && action.argument_names.size() > 1) + { + set_to_check = action.argument_names[1]; + } + } + } + + if (!set_to_check.empty()) + { + for (auto & action : actions) + { + if (action.type == action.ADD_COLUMN && action.result_name == set_to_check) + { + if (auto * column_set = typeid_cast(action.added_column.get())) + { + if (column_set->getData()->getTotalRowCount() == 0) + return true; + } + } + } + } + + return false; +} + + /// It is not important to calculate the hash of individual strings or their concatenation UInt128 ExpressionAction::ActionHash::operator()(const ExpressionAction & action) const { diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 133e70d1fdb..ed416df5fe6 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -239,6 +239,13 @@ public: const Settings & getSettings() const { return settings; } + /// Check if result block has no rows. True if it's definite, false if we can't say for sure. + /// Call it only after subqueries for join were executed. + bool resultIsAlwaysEmpty() const; + + /// Check if column is always zero. True if it's definite, false if we can't say for sure. + /// Call it only after subqueries for sets were executed. + bool checkColumnIsAlwaysFalse(const String & column_name) const; struct ActionsHash { diff --git a/dbms/src/Interpreters/IJoin.h b/dbms/src/Interpreters/IJoin.h index af6d643cc2b..5c005dc1b1c 100644 --- a/dbms/src/Interpreters/IJoin.h +++ b/dbms/src/Interpreters/IJoin.h @@ -30,6 +30,7 @@ public: virtual void joinTotals(Block & block) const = 0; virtual size_t getTotalRowCount() const = 0; + virtual bool alwaysReturnsEmptySet() const { return false; } virtual BlockInputStreamPtr createStreamWithNonJoinedRows(const Block &, UInt64) const { return {}; } }; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index d187f68eda3..8913f6a4b12 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -203,6 +203,10 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) for (const auto & column : columns) { + /// Do not include virtual columns + if (column.is_virtual) + continue; + const auto column_declaration = std::make_shared(); ASTPtr column_declaration_ptr{column_declaration}; diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 7887ebc8892..565863d139a 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index dbdf3696b9c..905fe6e3f04 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include @@ -48,7 +47,6 @@ #include #include #include -#include #include #include @@ -714,35 +712,6 @@ InterpreterSelectQuery::analyzeExpressions( return res; } - -BlockInputStreamPtr InterpreterSelectQuery::createCheckNonEmptySetIfNeed(BlockInputStreamPtr stream, const ExpressionActionsPtr & expression) const -{ - for (const auto & action : expression->getActions()) - { - if (action.type == ExpressionAction::JOIN) - { - const auto * join = dynamic_cast(action.join.get()); - if (!join) - continue; - if (isInnerOrRight(join->getKind())) - { - stream = std::make_shared(stream, expression, syntax_analyzer_result->need_check_empty_sets); - break; - } - } - else if (action.type == ExpressionAction::ADD_COLUMN) - { - if (syntax_analyzer_result->need_check_empty_sets.count(action.result_name)) - { - stream = std::make_shared(stream, expression, syntax_analyzer_result->need_check_empty_sets); - break; - } - } - } - return stream; -} - - static Field getWithFillFieldValue(const ASTPtr & node, const Context & context) { const auto & [field, type] = evaluateConstantExpression(node, context); @@ -998,7 +967,7 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS }); else pipeline.streams.back() = std::make_shared( - createCheckNonEmptySetIfNeed(pipeline.streams.back(), expressions.prewhere_info->prewhere_actions), expressions.prewhere_info->prewhere_actions, + pipeline.streams.back(), expressions.prewhere_info->prewhere_actions, expressions.prewhere_info->prewhere_column_name, expressions.prewhere_info->remove_prewhere_column); // To remove additional columns in dry run @@ -1114,7 +1083,7 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS header_before_join = pipeline.firstStream()->getHeader(); /// Applies to all sources except stream_with_non_joined_data. for (auto & stream : pipeline.streams) - stream = std::make_shared(createCheckNonEmptySetIfNeed(stream, expressions.before_join), expressions.before_join); + stream = std::make_shared(stream, expressions.before_join); if (isMergeJoin(expressions.before_join->getTableJoinAlgo()) && settings.partial_merge_join_optimizations) { @@ -1695,7 +1664,7 @@ void InterpreterSelectQuery::executeWhere(Pipeline & pipeline, const ExpressionA { pipeline.transform([&](auto & stream) { - stream = std::make_shared(createCheckNonEmptySetIfNeed(stream, expression), expression, getSelectQuery().where()->getColumnName(), remove_fiter); + stream = std::make_shared(stream, expression, getSelectQuery().where()->getColumnName(), remove_fiter); }); } @@ -1711,7 +1680,7 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre { pipeline.transform([&](auto & stream) { - stream = std::make_shared(createCheckNonEmptySetIfNeed(stream, expression), expression); + stream = std::make_shared(stream, expression); }); Names key_names; @@ -2077,7 +2046,7 @@ void InterpreterSelectQuery::executeExpression(Pipeline & pipeline, const Expres { pipeline.transform([&](auto & stream) { - stream = std::make_shared(createCheckNonEmptySetIfNeed(stream, expression), expression); + stream = std::make_shared(stream, expression); }); } @@ -2114,19 +2083,8 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline, SortingInfoPtr so }); } - if (pipeline.hasMoreThanOneStream()) - { - pipeline.transform([&](auto & stream) - { - stream = std::make_shared(stream); - }); - - UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit); - pipeline.firstStream() = std::make_shared( - pipeline.streams, sorting_info->prefix_order_descr, - settings.max_block_size, limit_for_merging); - pipeline.streams.resize(1); - } + UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit); + executeMergeSorted(pipeline, sorting_info->prefix_order_descr, limit_for_merging); if (need_finish_sorting) { @@ -2248,12 +2206,20 @@ void InterpreterSelectQuery::executeMergeSorted(Pipeline & pipeline) SortDescription order_descr = getSortDescription(query, context); UInt64 limit = getLimitForSorting(query, context); - const Settings & settings = context.getSettingsRef(); - /// If there are several streams, then we merge them into one if (pipeline.hasMoreThanOneStream()) { unifyStreams(pipeline, pipeline.firstStream()->getHeader()); + executeMergeSorted(pipeline, order_descr, limit); + } +} + + +void InterpreterSelectQuery::executeMergeSorted(Pipeline & pipeline, const SortDescription & sort_description, UInt64 limit) +{ + if (pipeline.hasMoreThanOneStream()) + { + const Settings & settings = context.getSettingsRef(); /** MergingSortedBlockInputStream reads the sources sequentially. * To make the data on the remote servers prepared in parallel, we wrap it in AsynchronousBlockInputStream. @@ -2263,8 +2229,8 @@ void InterpreterSelectQuery::executeMergeSorted(Pipeline & pipeline) stream = std::make_shared(stream); }); - /// Merge the sorted sources into one sorted source. - pipeline.firstStream() = std::make_shared(pipeline.streams, order_descr, settings.max_block_size, limit); + pipeline.firstStream() = std::make_shared( + pipeline.streams, sort_description, settings.max_block_size, limit); pipeline.streams.resize(1); } } @@ -2275,15 +2241,20 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPipeline & pipeline) SortDescription order_descr = getSortDescription(query, context); UInt64 limit = getLimitForSorting(query, context); - const Settings & settings = context.getSettingsRef(); + executeMergeSorted(pipeline, order_descr, limit); +} +void InterpreterSelectQuery::executeMergeSorted(QueryPipeline & pipeline, const SortDescription & sort_description, UInt64 limit) +{ /// If there are several streams, then we merge them into one if (pipeline.getNumStreams() > 1) { + const Settings & settings = context.getSettingsRef(); + auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), - order_descr, + sort_description, settings.max_block_size, limit); pipeline.addPipe({ std::move(transform) }); @@ -2646,13 +2617,29 @@ void InterpreterSelectQuery::executeExtremes(QueryPipeline & pipeline) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline, SubqueriesForSets & subqueries_for_sets) { - executeUnion(pipeline, {}); + /// Merge streams to one. Use MergeSorting if data was read in sorted order, Union otherwise. + if (query_info.sorting_info) + { + if (pipeline.stream_with_non_joined_data) + throw Exception("Using read in order optimization, but has stream with non-joined data in pipeline", ErrorCodes::LOGICAL_ERROR); + executeMergeSorted(pipeline, query_info.sorting_info->prefix_order_descr, 0); + } + else + executeUnion(pipeline, {}); + pipeline.firstStream() = std::make_shared( pipeline.firstStream(), subqueries_for_sets, context); } void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, SubqueriesForSets & subqueries_for_sets) { + if (query_info.sorting_info) + { + if (pipeline.hasDelayedStream()) + throw Exception("Using read in order optimization, but has delayed stream in pipeline", ErrorCodes::LOGICAL_ERROR); + executeMergeSorted(pipeline, query_info.sorting_info->prefix_order_descr, 0); + } + const Settings & settings = context.getSettingsRef(); auto creating_sets = std::make_shared( diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index a3dae3cad33..ce49ce90c62 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -214,6 +214,7 @@ private: void executeDistinct(Pipeline & pipeline, bool before_order, Names columns); void executeExtremes(Pipeline & pipeline); void executeSubqueriesInSetsAndJoins(Pipeline & pipeline, std::unordered_map & subqueries_for_sets); + void executeMergeSorted(Pipeline & pipeline, const SortDescription & sort_description, UInt64 limit); void executeWhere(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool remove_fiter); void executeAggregation(QueryPipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final); @@ -231,6 +232,7 @@ private: void executeDistinct(QueryPipeline & pipeline, bool before_order, Names columns); void executeExtremes(QueryPipeline & pipeline); void executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, std::unordered_map & subqueries_for_sets); + void executeMergeSorted(QueryPipeline & pipeline, const SortDescription & sort_description, UInt64 limit); /// Add ConvertingBlockInputStream to specified header. void unifyStreams(Pipeline & pipeline, Block header); @@ -253,9 +255,6 @@ private: */ void initSettings(); - /// Whether you need to check if the set is empty before ExpressionActions is executed. Create a CheckNonEmptySetBlockInputStream if needed. - BlockInputStreamPtr createCheckNonEmptySetIfNeed(BlockInputStreamPtr stream, const ExpressionActionsPtr & expression) const; - const SelectQueryOptions options; ASTPtr query_ptr; Context context; diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index d4cdf10fd63..6da0b9333ac 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -327,7 +327,7 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context) ThreadPool pool(std::min(size_t(getNumberOfPhysicalCPUCores()), replica_names.size())); for (auto & table : replica_names) - pool.schedule([&] () { tryRestartReplica(table.first, table.second, system_context); }); + pool.scheduleOrThrowOnError([&]() { tryRestartReplica(table.first, table.second, system_context); }); pool.wait(); } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index d5381e1dc6d..3267e6a779b 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -70,6 +70,7 @@ Join::Join(std::shared_ptr table_join_, const Block & right_sample , nullable_right_side(table_join->forceNullableRight()) , nullable_left_side(table_join->forceNullableLeft()) , any_take_last_row(any_take_last_row_) + , asof_inequality(table_join->getAsofInequality()) , log(&Logger::get("Join")) { setSampleBlock(right_sample_block); @@ -466,6 +467,9 @@ bool Join::addJoinedBlock(const Block & block) size_t rows = block.rows(); + if (rows) + has_no_rows_in_maps = false; + blocks.push_back(block); Block * stored_block = &blocks.back(); @@ -635,7 +639,7 @@ std::unique_ptr NO_INLINE joinRightIndexedColumns( if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) { - if (const RowRef * found = mapped.findAsof(join.getAsofType(), asof_column, i)) + if (const RowRef * found = mapped.findAsof(join.getAsofType(), join.getAsofInequality(), asof_column, i)) { filter[i] = 1; mapped.setUsed(); diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 424512266fb..5cc104d0dac 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -159,13 +159,16 @@ public: BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, UInt64 max_block_size) const override; /// Number of keys in all built JOIN maps. - size_t getTotalRowCount() const override; + size_t getTotalRowCount() const final; /// Sum size in bytes of all buffers, used for JOIN maps and for all memory pools. size_t getTotalByteCount() const; + bool alwaysReturnsEmptySet() const final { return isInnerOrRight(getKind()) && has_no_rows_in_maps; } + ASTTableJoin::Kind getKind() const { return kind; } ASTTableJoin::Strictness getStrictness() const { return strictness; } AsofRowRefs::Type getAsofType() const { return *asof_type; } + ASOF::Inequality getAsofInequality() const { return asof_inequality; } bool anyTakeLastRow() const { return any_take_last_row; } /// Different types of keys for maps. @@ -299,12 +302,14 @@ private: BlockNullmapList blocks_nullmaps; MapsVariant maps; + bool has_no_rows_in_maps = true; /// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows. Arena pool; Type type = Type::EMPTY; std::optional asof_type; + ASOF::Inequality asof_inequality; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); diff --git a/dbms/src/Interpreters/MergeJoin.cpp b/dbms/src/Interpreters/MergeJoin.cpp index 430ff6693ab..45b9ac86bf6 100644 --- a/dbms/src/Interpreters/MergeJoin.cpp +++ b/dbms/src/Interpreters/MergeJoin.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -7,6 +9,10 @@ #include #include #include +#include +#include +#include +#include namespace DB { @@ -16,6 +22,7 @@ namespace ErrorCodes extern const int SET_SIZE_LIMIT_EXCEEDED; extern const int NOT_IMPLEMENTED; extern const int PARAMETER_OUT_OF_BOUND; + extern const int NOT_ENOUGH_SPACE; extern const int LOGICAL_ERROR; } @@ -133,9 +140,8 @@ public: return getNextEqualRangeImpl(rhs); } - int intersect(const Block & right_block, const Block & right_table_keys, const Names & key_names) + int intersect(const Block & min_max, const Names & key_names) { - const Block min_max = extractMinMax(right_block, right_table_keys); if (end() == 0 || min_max.rows() != 2) throw Exception("Unexpected block size", ErrorCodes::LOGICAL_ERROR); @@ -320,21 +326,127 @@ void joinInequalsLeft(const Block & left_block, MutableColumns & left_columns, M appendNulls(right_columns, rows_to_add); } +Blocks blocksListToBlocks(const BlocksList & in_blocks) +{ + Blocks out_blocks; + out_blocks.reserve(in_blocks.size()); + for (const auto & block : in_blocks) + out_blocks.push_back(block); + return out_blocks; +} + +std::unique_ptr flushBlockToFile(const String & tmp_path, const Block & header, Block && block) +{ + auto tmp_file = createTemporaryFile(tmp_path); + + OneBlockInputStream stream(block); + std::atomic is_cancelled{false}; + TemporaryFileStream::write(tmp_file->path(), header, stream, &is_cancelled); + if (is_cancelled) + throw Exception("Cannot flush MergeJoin data on disk. No space at " + tmp_path, ErrorCodes::NOT_ENOUGH_SPACE); + + return tmp_file; +} + +void flushStreamToFiles(const String & tmp_path, const Block & header, IBlockInputStream & stream, + std::vector> & files, + std::function callback = [](const Block &){}) +{ + while (Block block = stream.read()) + { + if (!block.rows()) + continue; + + callback(block); + auto tmp_file = flushBlockToFile(tmp_path, header, std::move(block)); + files.emplace_back(std::move(tmp_file)); + } +} + +BlockInputStreams makeSortedInputStreams(std::vector & sorted_files, const Block & header) +{ + BlockInputStreams inputs; + + for (const auto & track : sorted_files) + { + BlockInputStreams sequence; + for (const auto & file : track) + sequence.emplace_back(std::make_shared(file->path(), header)); + inputs.emplace_back(std::make_shared(sequence)); + } + + return inputs; +} + } +void MiniLSM::insert(const BlocksList & blocks) +{ + if (blocks.empty()) + return; -MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block) + SortedFiles sorted_blocks; + if (blocks.size() > 1) + { + BlockInputStreams inputs; + inputs.reserve(blocks.size()); + for (auto & block : blocks) + inputs.push_back(std::make_shared(block)); + + MergingSortedBlockInputStream sorted_input(inputs, sort_description, rows_in_block); + flushStreamToFiles(path, sample_block, sorted_input, sorted_blocks); + } + else + { + OneBlockInputStream sorted_input(blocks.front()); + flushStreamToFiles(path, sample_block, sorted_input, sorted_blocks); + } + + sorted_files.emplace_back(std::move(sorted_blocks)); + if (sorted_files.size() >= max_size) + merge(); +} + +/// TODO: better merge strategy +void MiniLSM::merge(std::function callback) +{ + BlockInputStreams inputs = makeSortedInputStreams(sorted_files, sample_block); + MergingSortedBlockInputStream sorted_stream(inputs, sort_description, rows_in_block); + + SortedFiles out; + flushStreamToFiles(path, sample_block, sorted_stream, out, callback); + + sorted_files.clear(); + sorted_files.emplace_back(std::move(out)); +} + + +MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block_) : table_join(table_join_) + , size_limits(table_join->sizeLimits()) + , right_sample_block(right_sample_block_) , nullable_right_side(table_join->forceNullableRight()) , is_all(table_join->strictness() == ASTTableJoin::Strictness::All) , is_inner(isInner(table_join->kind())) , is_left(isLeft(table_join->kind())) , skip_not_intersected(table_join->enablePartialMergeJoinOptimizations()) + , max_rows_in_right_block(table_join->maxRowsInRightBlock()) { if (!isLeft(table_join->kind()) && !isInner(table_join->kind())) throw Exception("Partial merge supported for LEFT and INNER JOINs only", ErrorCodes::NOT_IMPLEMENTED); + if (!max_rows_in_right_block) + throw Exception("partial_merge_join_rows_in_right_blocks cannot be zero", ErrorCodes::PARAMETER_OUT_OF_BOUND); + + if (!size_limits.hasLimits()) + { + size_limits.max_bytes = table_join->defaultMaxBytes(); + if (!size_limits.max_bytes) + throw Exception("No limit for MergeJoin (max_rows_in_join, max_bytes_in_join or default_max_bytes_in_join have to be set)", + ErrorCodes::PARAMETER_OUT_OF_BOUND); + } + JoinCommon::extractKeysForJoin(table_join->keyNamesRight(), right_sample_block, right_table_keys, right_columns_to_add); const NameSet required_right_keys = table_join->requiredRightKeys(); @@ -350,6 +462,8 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & ri makeSortAndMerge(table_join->keyNamesLeft(), left_sort_description, left_merge_description); makeSortAndMerge(table_join->keyNamesRight(), right_sort_description, right_merge_description); + + lsm = std::make_unique(table_join->getTemporaryPath(), right_sample_block, right_sort_description, max_rows_in_right_block); } void MergeJoin::setTotals(const Block & totals_block) @@ -365,24 +479,83 @@ void MergeJoin::joinTotals(Block & block) const void MergeJoin::mergeRightBlocks() { + if (is_in_memory) + mergeInMemoryRightBlocks(); + else + mergeFlushedRightBlocks(); +} + +void MergeJoin::mergeInMemoryRightBlocks() +{ + std::unique_lock lock(rwlock); + if (right_blocks.empty()) return; - Blocks unsorted_blocks; - unsorted_blocks.reserve(right_blocks.size()); - for (const auto & block : right_blocks) - unsorted_blocks.push_back(block); - - size_t max_rows_in_block = table_join->maxRowsInRightBlock(); - if (!max_rows_in_block) - throw Exception("partial_merge_join_rows_in_right_blocks cannot be zero", ErrorCodes::PARAMETER_OUT_OF_BOUND); + Blocks blocks_to_merge = blocksListToBlocks(right_blocks); + clearRightBlocksList(); /// TODO: there should be no splitted keys by blocks for RIGHT|FULL JOIN - MergeSortingBlocksBlockInputStream stream(unsorted_blocks, right_sort_description, max_rows_in_block); + MergeSortingBlocksBlockInputStream sorted_input(blocks_to_merge, right_sort_description, max_rows_in_right_block); - right_blocks.clear(); - while (Block block = stream.read()) - right_blocks.emplace_back(std::move(block)); + while (Block block = sorted_input.read()) + { + if (!block.rows()) + continue; + + if (skip_not_intersected) + min_max_right_blocks.emplace_back(extractMinMax(block, right_table_keys)); + countBlockSize(block); + loaded_right_blocks.emplace_back(std::make_shared(std::move(block))); + } +} + +void MergeJoin::mergeFlushedRightBlocks() +{ + std::unique_lock lock(rwlock); + + lsm->insert(right_blocks); + clearRightBlocksList(); + + auto callback = [&](const Block & block) + { + if (skip_not_intersected) + min_max_right_blocks.emplace_back(extractMinMax(block, right_table_keys)); + countBlockSize(block); + }; + + lsm->merge(callback); + flushed_right_blocks.swap(lsm->sorted_files.front()); + + /// Get memory limit or aproximate it from row limit and bytes per row factor + UInt64 memory_limit = size_limits.max_bytes; + UInt64 rows_limit = size_limits.max_rows; + if (!memory_limit && rows_limit) + memory_limit = right_blocks_bytes * rows_limit / right_blocks_row_count; + + cached_right_blocks = std::make_unique(memory_limit); +} + +void MergeJoin::flushRightBlocks() +{ + /// it's under unique_lock(rwlock) + + is_in_memory = false; + lsm->insert(right_blocks); + clearRightBlocksList(); +} + +bool MergeJoin::saveRightBlock(Block && block) +{ + std::unique_lock lock(rwlock); + + countBlockSize(block); + right_blocks.emplace_back(std::move(block)); + + bool has_memory = size_limits.softCheck(right_blocks_row_count, right_blocks_bytes); + if (!has_memory) + flushRightBlocks(); + return true; } bool MergeJoin::addJoinedBlock(const Block & src_block) @@ -391,14 +564,7 @@ bool MergeJoin::addJoinedBlock(const Block & src_block) JoinCommon::removeLowCardinalityInplace(block); sortBlock(block, right_sort_description); - - std::unique_lock lock(rwlock); - - right_blocks.push_back(block); - right_blocks_row_count += block.rows(); - right_blocks_bytes += block.bytes(); - - return table_join->sizeLimits().check(right_blocks_row_count, right_blocks_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); + return saveRightBlock(std::move(block)); } void MergeJoin::joinBlock(Block & block) @@ -408,7 +574,15 @@ void MergeJoin::joinBlock(Block & block) JoinCommon::removeLowCardinalityInplace(block); sortBlock(block, left_sort_description); + if (is_in_memory) + joinSortedBlock(block); + else + joinSortedBlock(block); +} +template +void MergeJoin::joinSortedBlock(Block & block) +{ std::shared_lock lock(rwlock); size_t rows_to_reserve = is_left ? block.rows() : 0; @@ -416,24 +590,27 @@ void MergeJoin::joinBlock(Block & block) MutableColumns right_columns = makeMutableColumns(right_columns_to_add, rows_to_reserve); MergeJoinCursor left_cursor(block, left_merge_description); size_t left_key_tail = 0; + size_t right_blocks_count = rightBlocksCount(); if (is_left) { - for (auto it = right_blocks.begin(); it != right_blocks.end(); ++it) + for (size_t i = 0; i < right_blocks_count; ++i) { if (left_cursor.atEnd()) break; if (skip_not_intersected) { - int intersection = left_cursor.intersect(*it, right_table_keys, table_join->keyNamesRight()); + int intersection = left_cursor.intersect(min_max_right_blocks[i], table_join->keyNamesRight()); if (intersection < 0) break; /// (left) ... (right) if (intersection > 0) continue; /// (right) ... (left) } - leftJoin(left_cursor, block, *it, left_columns, right_columns, left_key_tail); + std::shared_ptr right_block = loadRightBlock(i); + + leftJoin(left_cursor, block, *right_block, left_columns, right_columns, left_key_tail); } left_cursor.nextN(left_key_tail); @@ -445,21 +622,23 @@ void MergeJoin::joinBlock(Block & block) } else if (is_inner) { - for (auto it = right_blocks.begin(); it != right_blocks.end(); ++it) + for (size_t i = 0; i < right_blocks_count; ++i) { if (left_cursor.atEnd()) break; if (skip_not_intersected) { - int intersection = left_cursor.intersect(*it, right_table_keys, table_join->keyNamesRight()); + int intersection = left_cursor.intersect(min_max_right_blocks[i], table_join->keyNamesRight()); if (intersection < 0) break; /// (left) ... (right) if (intersection > 0) continue; /// (right) ... (left) } - innerJoin(left_cursor, block, *it, left_columns, right_columns, left_key_tail); + std::shared_ptr right_block = loadRightBlock(i); + + innerJoin(left_cursor, block, *right_block, left_columns, right_columns, left_key_tail); } left_cursor.nextN(left_key_tail); @@ -546,4 +725,30 @@ void MergeJoin::addRightColumns(Block & block, MutableColumns && right_columns) } } +template +size_t MergeJoin::rightBlocksCount() +{ + if constexpr (!in_memory) + return flushed_right_blocks.size(); + else + return loaded_right_blocks.size(); +} + +template +std::shared_ptr MergeJoin::loadRightBlock(size_t pos) +{ + if constexpr (!in_memory) + { + auto load_func = [&]() -> std::shared_ptr + { + TemporaryFileStream input(flushed_right_blocks[pos]->path(), right_sample_block); + return std::make_shared(input.block_in->read()); + }; + + return cached_right_blocks->getOrSet(pos, load_func).first; + } + else + return loaded_right_blocks[pos]; +} + } diff --git a/dbms/src/Interpreters/MergeJoin.h b/dbms/src/Interpreters/MergeJoin.h index 6d7d467fc8f..9c844dcfd66 100644 --- a/dbms/src/Interpreters/MergeJoin.h +++ b/dbms/src/Interpreters/MergeJoin.h @@ -3,10 +3,12 @@ #include #include +#include +#include #include #include #include - +#include namespace DB { @@ -15,6 +17,31 @@ class AnalyzedJoin; class MergeJoinCursor; struct MergeJoinEqualRange; +struct MiniLSM +{ + using SortedFiles = std::vector>; + + const String & path; + const Block & sample_block; + const SortDescription & sort_description; + const size_t rows_in_block; + const size_t max_size; + std::vector sorted_files; + + MiniLSM(const String & path_, const Block & sample_block_, const SortDescription & description, + size_t rows_in_block_, size_t max_size_ = 16) + : path(path_) + , sample_block(sample_block_) + , sort_description(description) + , rows_in_block(rows_in_block_) + , max_size(max_size_) + {} + + void insert(const BlocksList & blocks); + void merge(std::function callback = [](const Block &){}); +}; + + class MergeJoin : public IJoin { public: @@ -28,32 +55,77 @@ public: size_t getTotalRowCount() const override { return right_blocks_row_count; } private: + /// There're two size limits for right-hand table: max_rows_in_join, max_bytes_in_join. + /// max_bytes is prefered. If it isn't set we aproximate it as (max_rows * bytes/row). + struct BlockByteWeight + { + size_t operator()(const Block & block) const { return block.bytes(); } + }; + + using Cache = LRUCache, BlockByteWeight>; + mutable std::shared_mutex rwlock; std::shared_ptr table_join; + SizeLimits size_limits; SortDescription left_sort_description; SortDescription right_sort_description; SortDescription left_merge_description; SortDescription right_merge_description; + Block right_sample_block; Block right_table_keys; Block right_columns_to_add; BlocksList right_blocks; + Blocks min_max_right_blocks; + std::unique_ptr cached_right_blocks; + std::vector> loaded_right_blocks; + std::unique_ptr lsm; + MiniLSM::SortedFiles flushed_right_blocks; Block totals; size_t right_blocks_row_count = 0; size_t right_blocks_bytes = 0; + bool is_in_memory = true; const bool nullable_right_side; const bool is_all; const bool is_inner; const bool is_left; const bool skip_not_intersected; + const size_t max_rows_in_right_block; void changeLeftColumns(Block & block, MutableColumns && columns); void addRightColumns(Block & block, MutableColumns && columns); void mergeRightBlocks(); + + template + size_t rightBlocksCount(); + template + void joinSortedBlock(Block & block); + template + std::shared_ptr loadRightBlock(size_t pos); + void leftJoin(MergeJoinCursor & left_cursor, const Block & left_block, const Block & right_block, MutableColumns & left_columns, MutableColumns & right_columns, size_t & left_key_tail); void innerJoin(MergeJoinCursor & left_cursor, const Block & left_block, const Block & right_block, MutableColumns & left_columns, MutableColumns & right_columns, size_t & left_key_tail); + + bool saveRightBlock(Block && block); + void flushRightBlocks(); + + void mergeInMemoryRightBlocks(); + void mergeFlushedRightBlocks(); + + void clearRightBlocksList() + { + right_blocks.clear(); + right_blocks_row_count = 0; + right_blocks_bytes = 0; + } + + void countBlockSize(const Block & block) + { + right_blocks_row_count += block.rows(); + right_blocks_bytes += block.bytes(); + } }; } diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index a025ba87c1a..2641ab2a5c4 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -1,6 +1,11 @@ +#include "MutationsInterpreter.h" + +#include +#include +#include +#include #include #include -#include #include #include #include @@ -14,7 +19,6 @@ #include #include #include -#include "MutationsInterpreter.h" namespace DB @@ -27,6 +31,67 @@ namespace ErrorCodes extern const int CANNOT_UPDATE_COLUMN; } +namespace +{ +struct FirstNonDeterministicFuncData +{ + using TypeToVisit = ASTFunction; + + explicit FirstNonDeterministicFuncData(const Context & context_) + : context{context_} + {} + + const Context & context; + std::optional nondeterministic_function_name; + + void visit(ASTFunction & function, ASTPtr &) + { + if (nondeterministic_function_name) + return; + + const auto func = FunctionFactory::instance().get(function.name, context); + if (!func->isDeterministic()) + nondeterministic_function_name = func->getName(); + } +}; + +using FirstNonDeterministicFuncFinder = + InDepthNodeVisitor, true>; + +std::optional findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context) +{ + FirstNonDeterministicFuncData finder_data(context); + + switch (command.type) + { + case MutationCommand::UPDATE: + { + auto update_assignments_ast = command.ast->as().update_assignments->clone(); + FirstNonDeterministicFuncFinder(finder_data).visit(update_assignments_ast); + + if (finder_data.nondeterministic_function_name) + return finder_data.nondeterministic_function_name; + + [[fallthrough]]; + } + + case MutationCommand::DELETE: + { + auto predicate_ast = command.predicate->clone(); + FirstNonDeterministicFuncFinder(finder_data).visit(predicate_ast); + + return finder_data.nondeterministic_function_name; + } + + default: + break; + } + + return {}; +} +}; + + bool MutationsInterpreter::isStorageTouchedByMutations() const { if (commands.empty()) @@ -440,6 +505,21 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(const std::ve void MutationsInterpreter::validate(TableStructureReadLockHolder &) { + /// For Replicated* storages mutations cannot employ non-deterministic functions + /// because that produces inconsistencies between replicas + if (startsWith(storage->getName(), "Replicated")) + { + for (const auto & command : commands) + { + const auto nondeterministic_func_name = findFirstNonDeterministicFuncName(command, context); + if (nondeterministic_func_name) + throw Exception( + "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions! " + "Function '" + *nondeterministic_func_name + "' is non-deterministic", + ErrorCodes::BAD_ARGUMENTS); + } + } + const auto & select_query = prepare(/* dry_run = */ true); InterpreterSelectQuery interpreter{select_query, context, storage, SelectQueryOptions().analyze(/* dry_run = */ true).ignoreLimits()}; /// Do not use getSampleBlock in order to check the whole pipeline. diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index a97233d0798..2a307c6ed7f 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -406,7 +406,7 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast /// TODO: get tables from evaluateAsterisk instead of tablesOnly() to extract asterisks in general way std::vector tables_with_columns = TranslateQualifiedNamesVisitor::Data::tablesOnly(tables); - TranslateQualifiedNamesVisitor::Data qn_visitor_data({}, tables_with_columns, false); + TranslateQualifiedNamesVisitor::Data qn_visitor_data({}, std::move(tables_with_columns), false); TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor::Data query_aliases_data{aliases}; diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 100ecc00dc1..a2d5ee7c07a 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -90,6 +90,8 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as const auto queue_max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); if (!is_unlimited_query && max_size && processes.size() >= max_size) { + if (queue_max_wait_ms) + LOG_WARNING(&Logger::get("ProcessList"), "Too many simultaneous queries, will wait " << queue_max_wait_ms << " ms."); if (!queue_max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(queue_max_wait_ms), [&]{ return processes.size() < max_size; })) throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); } diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.cpp b/dbms/src/Interpreters/QueryAliasesVisitor.cpp index 98069396d81..6de0ece8b59 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.cpp +++ b/dbms/src/Interpreters/QueryAliasesVisitor.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index c2991885cf3..e109e4a63fd 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 325499d59d2..1cbcb758bf3 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/dbms/src/Interpreters/RowRefs.cpp b/dbms/src/Interpreters/RowRefs.cpp index 2ac61af7d9f..949bdd33096 100644 --- a/dbms/src/Interpreters/RowRefs.cpp +++ b/dbms/src/Interpreters/RowRefs.cpp @@ -58,26 +58,27 @@ void AsofRowRefs::insert(Type type, const IColumn * asof_column, const Block * b callWithType(type, call); } -const RowRef * AsofRowRefs::findAsof(Type type, const IColumn * asof_column, size_t row_num) const +const RowRef * AsofRowRefs::findAsof(Type type, ASOF::Inequality inequality, const IColumn * asof_column, size_t row_num) const { const RowRef * out = nullptr; + bool ascending = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::LessOrEquals); + bool is_strict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater); + auto call = [&](const auto & t) { using T = std::decay_t; - using LookupPtr = typename Entry::LookupPtr; + using EntryType = Entry; + using LookupPtr = typename EntryType::LookupPtr; auto * column = typeid_cast *>(asof_column); T key = column->getElement(row_num); auto & typed_lookup = std::get(lookups); - // The first thread that calls upper_bound ensures that the data is sorted - auto it = typed_lookup->upper_bound(Entry(key)); - - // cbegin() is safe to call now because the array is immutable after sorting - // hence the pointer to a entry can be returned - if (it != typed_lookup->cbegin()) - out = &((--it)->row_ref); + if (is_strict) + out = typed_lookup->upperBound(EntryType(key), ascending); + else + out = typed_lookup->lowerBound(EntryType(key), ascending); }; callWithType(type, call); diff --git a/dbms/src/Interpreters/RowRefs.h b/dbms/src/Interpreters/RowRefs.h index 03309831322..604d5458620 100644 --- a/dbms/src/Interpreters/RowRefs.h +++ b/dbms/src/Interpreters/RowRefs.h @@ -1,8 +1,8 @@ #pragma once #include -#include #include +#include #include #include @@ -144,34 +144,45 @@ public: array.push_back(std::forward(x), std::forward(allocator_params)...); } - // Transition into second stage, ensures that the vector is sorted - typename Base::const_iterator upper_bound(const TEntry & k) + const RowRef * upperBound(const TEntry & k, bool ascending) { - sort(); - return std::upper_bound(array.cbegin(), array.cend(), k); + sort(ascending); + auto it = std::upper_bound(array.cbegin(), array.cend(), k, (ascending ? less : greater)); + if (it != array.cend()) + return &(it->row_ref); + return nullptr; } - // After ensuring that the vector is sorted by calling a lookup these are safe to call - typename Base::const_iterator cbegin() const { return array.cbegin(); } - typename Base::const_iterator cend() const { return array.cend(); } + const RowRef * lowerBound(const TEntry & k, bool ascending) + { + sort(ascending); + auto it = std::lower_bound(array.cbegin(), array.cend(), k, (ascending ? less : greater)); + if (it != array.cend()) + return &(it->row_ref); + return nullptr; + } private: std::atomic sorted = false; Base array; mutable std::mutex lock; - struct RadixSortTraits : RadixSortNumTraits + static bool less(const TEntry & a, const TEntry & b) { - using Element = TEntry; - static TKey & extractKey(Element & elem) { return elem.asof_value; } - }; + return a.asof_value < b.asof_value; + } + + static bool greater(const TEntry & a, const TEntry & b) + { + return a.asof_value > b.asof_value; + } // Double checked locking with SC atomics works in C++ // https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ // The first thread that calls one of the lookup methods sorts the data // After calling the first lookup method it is no longer allowed to insert any data // the array becomes immutable - void sort() + void sort(bool ascending) { if (!sorted.load(std::memory_order_acquire)) { @@ -179,13 +190,7 @@ private: if (!sorted.load(std::memory_order_relaxed)) { if (!array.empty()) - { - /// TODO: It has been tested only for UInt32 yet. It needs to check UInt64, Float32/64. - if constexpr (std::is_same_v) - RadixSort::executeLSD(&array[0], array.size()); - else - std::sort(array.begin(), array.end()); - } + std::sort(array.begin(), array.end(), (ascending ? less : greater)); sorted.store(true, std::memory_order_release); } @@ -206,11 +211,6 @@ public: Entry(T v) : asof_value(v) {} Entry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} - - bool operator < (const Entry & o) const - { - return asof_value < o.asof_value; - } }; using Lookups = std::variant< @@ -236,7 +236,7 @@ public: void insert(Type type, const IColumn * asof_column, const Block * block, size_t row_num); // This will internally synchronize - const RowRef * findAsof(Type type, const IColumn * asof_column, size_t row_num) const; + const RowRef * findAsof(Type type, ASOF::Inequality inequality, const IColumn * asof_column, size_t row_num) const; private: // Lookups can be stored in a HashTable because it is memmovable diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 68c219c3a91..188d0a84b49 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -246,7 +246,7 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co else if (const auto * func = elem->as()) { Field function_result; - const TupleBackend * tuple = nullptr; + const Tuple * tuple = nullptr; if (func->name != "tuple") { if (!tuple_type) @@ -257,7 +257,7 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co throw Exception("Invalid type of set. Expected tuple, got " + String(function_result.getTypeName()), ErrorCodes::INCORRECT_ELEMENT_OF_SET); - tuple = &function_result.get().toUnderType(); + tuple = &function_result.get(); } size_t tuple_size = tuple ? tuple->size() : func->arguments->children.size(); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index b16df1a7eb5..228aea0b2f2 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -74,36 +74,28 @@ using LogAST = DebugASTLog; /// set to true to enable logs /// Add columns from storage to source_columns list. -void collectSourceColumns(const ASTSelectQuery * select_query, StoragePtr storage, NamesAndTypesList & source_columns) +void collectSourceColumns(const ColumnsDescription & columns, NamesAndTypesList & source_columns, bool add_virtuals) { - if (storage) - { - auto physical_columns = storage->getColumns().getAllPhysical(); - if (source_columns.empty()) - source_columns.swap(physical_columns); - else - source_columns.insert(source_columns.end(), physical_columns.begin(), physical_columns.end()); + auto physical_columns = columns.getAllPhysical(); + if (source_columns.empty()) + source_columns.swap(physical_columns); + else + source_columns.insert(source_columns.end(), physical_columns.begin(), physical_columns.end()); - if (select_query) - { - const auto & storage_aliases = storage->getColumns().getAliases(); - const auto & storage_virtuals = storage->getColumns().getVirtuals(); - source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end()); - source_columns.insert(source_columns.end(), storage_virtuals.begin(), storage_virtuals.end()); - } + if (add_virtuals) + { + const auto & storage_aliases = columns.getAliases(); + const auto & storage_virtuals = columns.getVirtuals(); + source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end()); + source_columns.insert(source_columns.end(), storage_virtuals.begin(), storage_virtuals.end()); } } -/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form. -/// Expand asterisks and qualified asterisks with column names. -/// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer. -void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const Context & context, - const Names & source_columns_list, const NameSet & source_columns_set, - const NameSet & columns_from_joined_table) +std::vector getTablesWithColumns(const ASTSelectQuery & select_query, const Context & context) { - auto & settings = context.getSettingsRef(); - std::vector tables_with_columns = getDatabaseAndTablesWithColumnNames(select_query, context); + + auto & settings = context.getSettingsRef(); if (settings.joined_subquery_requires_alias && tables_with_columns.size() > 1) { for (auto & pr : tables_with_columns) @@ -112,21 +104,18 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query ErrorCodes::ALIAS_REQUIRED); } - if (tables_with_columns.empty()) - { - Names all_columns_name = source_columns_list; + return tables_with_columns; +} - if (!settings.asterisk_left_columns_only) - { - for (auto & column : columns_from_joined_table) - all_columns_name.emplace_back(column); - } - - tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); - } +/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form. +/// Expand asterisks and qualified asterisks with column names. +/// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer. +void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const NameSet & source_columns_set, + std::vector && tables_with_columns) +{ LogAST log; - TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns); + TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, std::move(tables_with_columns)); TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream()); visitor.visit(query); @@ -456,7 +445,7 @@ void optimizeUsing(const ASTSelectQuery * select_query) } void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query, - const Names & source_columns, const NameSet & source_columns_set) + const NamesAndTypesList & source_columns, const NameSet & source_columns_set) { if (ASTPtr array_join_expression_list = select_query->array_join_expression_list()) { @@ -482,12 +471,12 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const else /// This is a nested table. { bool found = false; - for (const auto & column_name : source_columns) + for (const auto & column : source_columns) { - auto splitted = Nested::splitName(column_name); + auto splitted = Nested::splitName(column.name); if (splitted.first == source_name && !splitted.second.empty()) { - result.array_join_result_to_source[Nested::concatenateName(result_name, splitted.second)] = column_name; + result.array_join_result_to_source[Nested::concatenateName(result_name, splitted.second)] = column.name; found = true; break; } @@ -615,39 +604,6 @@ std::vector getAggregates(const ASTPtr & query) return {}; } -void collectCanShortCircuitSet(const ASTPtr & ast, NameSet & need_check_empty_sets) -{ - if (const auto * function = ast->as()) - { - if (function->name == "in" || function->name == "globalIn") - { - for (size_t i = 0; i < function->arguments->children.size(); ++i) - { - ASTPtr child = function->arguments->children[i]; - if (const auto * subquery = child->as()) - need_check_empty_sets.insert(subquery->getColumnName()); - } - } - else if (function->name != "or") - { - for (size_t i = 0; i < function->arguments->children.size(); ++i) - { - ASTPtr child = function->arguments->children[i]; - collectCanShortCircuitSet(child, need_check_empty_sets); - } - } - } - else - { - for (auto & child : ast->children) - { - /// Do not go to FROM, JOIN, UNION. - if (!child->as() && !child->as()) - collectCanShortCircuitSet(child, need_check_empty_sets); - } - } -} - } /// Calculate which columns are required to execute the expression. @@ -838,19 +794,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( SyntaxAnalyzerResult result; result.storage = storage; result.source_columns = source_columns_; - result.analyzed_join = std::make_shared(settings); /// TODO: move to select_query logic + result.analyzed_join = std::make_shared(settings, context.getTemporaryPath()); /// TODO: move to select_query logic - collectSourceColumns(select_query, result.storage, result.source_columns); + if (storage) + collectSourceColumns(storage->getColumns(), result.source_columns, (select_query != nullptr)); NameSet source_columns_set = removeDuplicateColumns(result.source_columns); - Names source_columns_list; - source_columns_list.reserve(result.source_columns.size()); - for (const auto & type_name : result.source_columns) - source_columns_list.emplace_back(type_name.name); - - if (source_columns_set.size() != source_columns_list.size()) - throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR); - if (select_query) { if (remove_duplicates) @@ -871,9 +820,28 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( result.analyzed_join->deduplicateAndQualifyColumnNames(source_columns_set, table.getQualifiedNamePrefix()); } - translateQualifiedNames(query, *select_query, context, - (storage ? storage->getColumns().getOrdinary().getNames() : source_columns_list), source_columns_set, - result.analyzed_join->getQualifiedColumnsSet()); + auto tables_with_columns = getTablesWithColumns(*select_query, context); + + /// If empty make fake table with list of source and joined columns + if (tables_with_columns.empty()) + { + Names columns_list; + if (storage) + columns_list = storage->getColumns().getOrdinary().getNames(); + else + { + columns_list.reserve(result.source_columns.size()); + for (const auto & column : result.source_columns) + columns_list.emplace_back(column.name); + } + + for (auto & column : result.analyzed_join->getQualifiedColumnsSet()) + columns_list.emplace_back(column); + + tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(columns_list)); + } + + translateQualifiedNames(query, *select_query, source_columns_set, std::move(tables_with_columns)); /// Rewrite IN and/or JOIN for distributed tables according to distributed_product_mode setting. InJoinSubqueriesPreprocessor(context).visit(query); @@ -923,14 +891,13 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( optimizeUsing(select_query); /// array_join_alias_to_name, array_join_result_to_source. - getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); + getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set); /// Push the predicate expression down to the subqueries. result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); setJoinStrictness(*select_query, settings.join_default_strictness, result.analyzed_join->table_join); collectJoinedColumns(*result.analyzed_join, *select_query, source_columns_set, result.aliases); - collectCanShortCircuitSet(query, result.need_check_empty_sets); } result.aggregates = getAggregates(query); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index 03b1c96ba0a..44fdc61ded3 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -40,9 +40,6 @@ struct SyntaxAnalyzerResult /// Note: not used further. NameToNameMap array_join_name_to_alias; - /// For sets created during query execution, check if they are empty after creation. - NameSet need_check_empty_sets; - /// Predicate optimizer overrides the sub queries bool rewrite_subqueries = false; diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 413651ea843..4bf18b59cb9 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -25,11 +25,11 @@ public: struct Data { const NameSet source_columns; - const std::vector & tables; + const std::vector tables; std::unordered_set join_using_columns; bool has_columns; - Data(const NameSet & source_columns_, const std::vector & tables_, bool has_columns_ = true) + Data(const NameSet & source_columns_, std::vector && tables_, bool has_columns_ = true) : source_columns(source_columns_) , tables(tables_) , has_columns(has_columns_) diff --git a/dbms/src/Interpreters/Users.cpp b/dbms/src/Interpreters/Users.cpp index 86e37f0b729..8d8704165f4 100644 --- a/dbms/src/Interpreters/Users.cpp +++ b/dbms/src/Interpreters/Users.cpp @@ -1,22 +1,10 @@ #include #include -#include -#include -#include -#include #include -#include #include #include -#include -#include -#include -#include -#include #include #include -#include -#include namespace DB @@ -24,255 +12,12 @@ namespace DB namespace ErrorCodes { - extern const int DNS_ERROR; extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; extern const int UNKNOWN_USER; - extern const int REQUIRED_PASSWORD; - extern const int WRONG_PASSWORD; - extern const int IP_ADDRESS_NOT_ALLOWED; extern const int BAD_ARGUMENTS; } -static Poco::Net::IPAddress toIPv6(const Poco::Net::IPAddress addr) -{ - if (addr.family() == Poco::Net::IPAddress::IPv6) - return addr; - - return Poco::Net::IPAddress("::FFFF:" + addr.toString()); -} - - -/// IP-address or subnet mask. Example: 213.180.204.3 or 10.0.0.1/8 or 312.234.1.1/255.255.255.0 -/// 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/FFFF:FFFF:FFFF:FFFF:: -class IPAddressPattern : public IAddressPattern -{ -private: - /// Address of mask. Always transformed to IPv6. - Poco::Net::IPAddress mask_address; - /// Mask of net (ip form). Always transformed to IPv6. - Poco::Net::IPAddress subnet_mask; - -public: - explicit IPAddressPattern(const String & str) - { - const char * pos = strchr(str.c_str(), '/'); - - if (nullptr == pos) - { - construct(Poco::Net::IPAddress(str)); - } - else - { - String addr(str, 0, pos - str.c_str()); - auto real_address = Poco::Net::IPAddress(addr); - - String str_mask(str, addr.length() + 1, str.length() - addr.length() - 1); - if (isDigits(str_mask)) - { - UInt8 prefix_bits = parse(pos + 1); - construct(prefix_bits, real_address.family() == Poco::Net::AddressFamily::IPv4); - } - else - { - subnet_mask = netmaskToIPv6(Poco::Net::IPAddress(str_mask)); - } - - mask_address = toIPv6(real_address); - } - } - - bool contains(const Poco::Net::IPAddress & addr) const override - { - return prefixBitsEquals(addr, mask_address, subnet_mask); - } - -private: - void construct(const Poco::Net::IPAddress & mask_address_) - { - mask_address = toIPv6(mask_address_); - subnet_mask = Poco::Net::IPAddress(128, Poco::Net::IPAddress::IPv6); - } - - void construct(UInt8 prefix_bits, bool is_ipv4) - { - prefix_bits = is_ipv4 ? prefix_bits + 96 : prefix_bits; - subnet_mask = Poco::Net::IPAddress(prefix_bits, Poco::Net::IPAddress::IPv6); - } - - static bool prefixBitsEquals(const Poco::Net::IPAddress & ip_address, const Poco::Net::IPAddress & net_address, const Poco::Net::IPAddress & mask) - { - return ((toIPv6(ip_address) & mask) == (toIPv6(net_address) & mask)); - } - - static bool isDigits(const std::string & str) - { - return std::all_of(str.begin(), str.end(), isNumericASCII); - } - - static Poco::Net::IPAddress netmaskToIPv6(Poco::Net::IPAddress mask) - { - if (mask.family() == Poco::Net::IPAddress::IPv6) - return mask; - - return Poco::Net::IPAddress(96, Poco::Net::IPAddress::IPv6) | toIPv6(mask); - } -}; - -/// Check that address equals to one of hostname addresses. -class HostExactPattern : public IAddressPattern -{ -private: - String host; - - static bool containsImpl(const String & host, const Poco::Net::IPAddress & addr) - { - Poco::Net::IPAddress addr_v6 = toIPv6(addr); - - /// Resolve by hand, because Poco don't use AI_ALL flag but we need it. - addrinfo * ai = nullptr; - - addrinfo hints; - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; - hints.ai_flags |= AI_V4MAPPED | AI_ALL; - - int ret = getaddrinfo(host.c_str(), nullptr, &hints, &ai); - if (0 != ret) - throw Exception("Cannot getaddrinfo: " + std::string(gai_strerror(ret)), ErrorCodes::DNS_ERROR); - - SCOPE_EXIT( - { - freeaddrinfo(ai); - }); - - for (; ai != nullptr; ai = ai->ai_next) - { - if (ai->ai_addrlen && ai->ai_addr) - { - if (ai->ai_family == AF_INET6) - { - if (addr_v6 == Poco::Net::IPAddress( - &reinterpret_cast(ai->ai_addr)->sin6_addr, sizeof(in6_addr), - reinterpret_cast(ai->ai_addr)->sin6_scope_id)) - { - return true; - } - } - else if (ai->ai_family == AF_INET) - { - if (addr_v6 == toIPv6(Poco::Net::IPAddress( - &reinterpret_cast(ai->ai_addr)->sin_addr, sizeof(in_addr)))) - { - return true; - } - } - } - } - - return false; - } - -public: - explicit HostExactPattern(const String & host_) : host(host_) {} - - bool contains(const Poco::Net::IPAddress & addr) const override - { - static SimpleCache cache; - return cache(host, addr); - } -}; - - -/// Check that PTR record for address match the regexp (and in addition, check that PTR record is resolved back to client address). -class HostRegexpPattern : public IAddressPattern -{ -private: - Poco::RegularExpression host_regexp; - - static String getDomain(const Poco::Net::IPAddress & addr) - { - Poco::Net::SocketAddress sock_addr(addr, 0); - - /// Resolve by hand, because Poco library doesn't have such functionality. - char domain[1024]; - int gai_errno = getnameinfo(sock_addr.addr(), sock_addr.length(), domain, sizeof(domain), nullptr, 0, NI_NAMEREQD); - if (0 != gai_errno) - throw Exception("Cannot getnameinfo: " + std::string(gai_strerror(gai_errno)), ErrorCodes::DNS_ERROR); - - return domain; - } - -public: - explicit HostRegexpPattern(const String & host_regexp_) : host_regexp(host_regexp_) {} - - bool contains(const Poco::Net::IPAddress & addr) const override - { - static SimpleCache cache; - - String domain = cache(addr); - Poco::RegularExpression::Match match; - - if (host_regexp.match(domain, match) && HostExactPattern(domain).contains(addr)) - return true; - - return false; - } -}; - - - -bool AddressPatterns::contains(const Poco::Net::IPAddress & addr) const -{ - for (size_t i = 0, size = patterns.size(); i < size; ++i) - { - /// If host cannot be resolved, skip it and try next. - try - { - if (patterns[i]->contains(addr)) - return true; - } - catch (const DB::Exception & e) - { - LOG_WARNING(&Logger::get("AddressPatterns"), - "Failed to check if pattern contains address " << addr.toString() << ". " << e.displayText() << ", code = " << e.code()); - - if (e.code() == ErrorCodes::DNS_ERROR) - { - continue; - } - else - throw; - } - } - - return false; -} - -void AddressPatterns::addFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) -{ - Poco::Util::AbstractConfiguration::Keys config_keys; - config.keys(config_elem, config_keys); - - for (Poco::Util::AbstractConfiguration::Keys::const_iterator it = config_keys.begin(); it != config_keys.end(); ++it) - { - Container::value_type pattern; - String value = config.getString(config_elem + "." + *it); - - if (startsWith(*it, "ip")) - pattern = std::make_unique(value); - else if (startsWith(*it, "host_regexp")) - pattern = std::make_unique(value); - else if (startsWith(*it, "host")) - pattern = std::make_unique(value); - else - throw Exception("Unknown address pattern type: " + *it, ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE); - - patterns.emplace_back(std::move(pattern)); - } -} - - User::User(const String & name_, const String & config_elem, const Poco::Util::AbstractConfiguration & config) : name(name_) { @@ -288,28 +33,43 @@ User::User(const String & name_, const String & config_elem, const Poco::Util::A throw Exception("Either 'password' or 'password_sha256_hex' or 'password_double_sha1_hex' must be specified for user " + name + ".", ErrorCodes::BAD_ARGUMENTS); if (has_password) - password = config.getString(config_elem + ".password"); - - if (has_password_sha256_hex) { - password_sha256_hex = Poco::toLower(config.getString(config_elem + ".password_sha256_hex")); - - if (password_sha256_hex.size() != 64) - throw Exception("password_sha256_hex for user " + name + " has length " + toString(password_sha256_hex.size()) + " but must be exactly 64 symbols.", ErrorCodes::BAD_ARGUMENTS); + authentication = Authentication{Authentication::PLAINTEXT_PASSWORD}; + authentication.setPassword(config.getString(config_elem + ".password")); } - - if (has_password_double_sha1_hex) + else if (has_password_sha256_hex) { - password_double_sha1_hex = Poco::toLower(config.getString(config_elem + ".password_double_sha1_hex")); - - if (password_double_sha1_hex.size() != 40) - throw Exception("password_double_sha1_hex for user " + name + " has length " + toString(password_double_sha1_hex.size()) + " but must be exactly 40 symbols.", ErrorCodes::BAD_ARGUMENTS); + authentication = Authentication{Authentication::SHA256_PASSWORD}; + authentication.setPasswordHashHex(config.getString(config_elem + ".password_sha256_hex")); + } + else if (has_password_double_sha1_hex) + { + authentication = Authentication{Authentication::DOUBLE_SHA1_PASSWORD}; + authentication.setPasswordHashHex(config.getString(config_elem + ".password_double_sha1_hex")); } profile = config.getString(config_elem + ".profile"); quota = config.getString(config_elem + ".quota"); - addresses.addFromConfig(config_elem + ".networks", config); + /// Fill list of allowed hosts. + const auto config_networks = config_elem + ".networks"; + if (config.has(config_networks)) + { + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(config_networks, config_keys); + for (Poco::Util::AbstractConfiguration::Keys::const_iterator it = config_keys.begin(); it != config_keys.end(); ++it) + { + String value = config.getString(config_networks + "." + *it); + if (startsWith(*it, "ip")) + allowed_client_hosts.addSubnet(value); + else if (startsWith(*it, "host_regexp")) + allowed_client_hosts.addHostRegexp(value); + else if (startsWith(*it, "host")) + allowed_client_hosts.addHostName(value); + else + throw Exception("Unknown address pattern type: " + *it, ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE); + } + } /// Fill list of allowed databases. const auto config_sub_elem = config_elem + ".allow_databases"; diff --git a/dbms/src/Interpreters/Users.h b/dbms/src/Interpreters/Users.h index 090bc693e9a..a2d4ccece45 100644 --- a/dbms/src/Interpreters/Users.h +++ b/dbms/src/Interpreters/Users.h @@ -1,20 +1,16 @@ #pragma once #include +#include +#include #include #include #include -#include namespace Poco { - namespace Net - { - class IPAddress; - } - namespace Util { class AbstractConfiguration; @@ -24,44 +20,19 @@ namespace Poco namespace DB { - - -/// Allow to check that address matches a pattern. -class IAddressPattern -{ -public: - virtual bool contains(const Poco::Net::IPAddress & addr) const = 0; - virtual ~IAddressPattern() {} -}; - - -class AddressPatterns -{ -private: - using Container = std::vector>; - Container patterns; - -public: - bool contains(const Poco::Net::IPAddress & addr) const; - void addFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); -}; - - /** User and ACL. */ struct User { String name; - /// Required password. Could be stored in plaintext or in SHA256. - String password; - String password_sha256_hex; - String password_double_sha1_hex; + /// Required password. + Authentication authentication; String profile; String quota; - AddressPatterns addresses; + AllowedClientHosts allowed_client_hosts; /// List of allowed databases. using DatabaseSet = std::unordered_set; diff --git a/dbms/src/Interpreters/UsersManager.cpp b/dbms/src/Interpreters/UsersManager.cpp index ee6293c3ee2..50b5d6653a3 100644 --- a/dbms/src/Interpreters/UsersManager.cpp +++ b/dbms/src/Interpreters/UsersManager.cpp @@ -1,18 +1,7 @@ #include -#include "config_core.h" #include -#include -#include -#include -#include -#include -#include -#include #include -#if USE_SSL -# include -#endif namespace DB @@ -20,14 +9,7 @@ namespace DB namespace ErrorCodes { - extern const int DNS_ERROR; - extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; extern const int UNKNOWN_USER; - extern const int REQUIRED_PASSWORD; - extern const int WRONG_PASSWORD; - extern const int IP_ADDRESS_NOT_ALLOWED; - extern const int BAD_ARGUMENTS; - extern const int SUPPORT_IS_DISABLED; } using UserPtr = UsersManager::UserPtr; @@ -58,62 +40,8 @@ UserPtr UsersManager::authorizeAndGetUser( if (users.end() == it) throw Exception("Unknown user " + user_name, ErrorCodes::UNKNOWN_USER); - if (!it->second->addresses.contains(address)) - throw Exception("User " + user_name + " is not allowed to connect from address " + address.toString(), ErrorCodes::IP_ADDRESS_NOT_ALLOWED); - - auto on_wrong_password = [&]() - { - if (password.empty()) - throw Exception("Password required for user " + user_name, ErrorCodes::REQUIRED_PASSWORD); - else - throw Exception("Wrong password for user " + user_name, ErrorCodes::WRONG_PASSWORD); - }; - - if (!it->second->password_sha256_hex.empty()) - { -#if USE_SSL - unsigned char hash[32]; - - SHA256_CTX ctx; - SHA256_Init(&ctx); - SHA256_Update(&ctx, reinterpret_cast(password.data()), password.size()); - SHA256_Final(hash, &ctx); - - String hash_hex; - { - WriteBufferFromString buf(hash_hex); - HexWriteBuffer hex_buf(buf); - hex_buf.write(reinterpret_cast(hash), sizeof(hash)); - } - - Poco::toLowerInPlace(hash_hex); - - if (hash_hex != it->second->password_sha256_hex) - on_wrong_password(); -#else - throw DB::Exception("SHA256 passwords support is disabled, because ClickHouse was built without SSL library", DB::ErrorCodes::SUPPORT_IS_DISABLED); -#endif - } - else if (!it->second->password_double_sha1_hex.empty()) - { - Poco::SHA1Engine engine; - engine.update(password); - const auto & first_sha1 = engine.digest(); - - /// If it was MySQL compatibility server, then first_sha1 already contains double SHA1. - if (Poco::SHA1Engine::digestToHex(first_sha1) == it->second->password_double_sha1_hex) - return it->second; - - engine.update(first_sha1.data(), first_sha1.size()); - - if (Poco::SHA1Engine::digestToHex(engine.digest()) != it->second->password_double_sha1_hex) - on_wrong_password(); - } - else if (password != it->second->password) - { - on_wrong_password(); - } - + it->second->allowed_client_hosts.checkContains(address, user_name); + it->second->authentication.checkPassword(password, user_name); return it->second; } diff --git a/dbms/src/Interpreters/asof.h b/dbms/src/Interpreters/asof.h new file mode 100644 index 00000000000..439bf4cc58c --- /dev/null +++ b/dbms/src/Interpreters/asof.h @@ -0,0 +1,46 @@ +#pragma once +#include + +namespace DB +{ +namespace ASOF +{ + +enum class Inequality +{ + None = 0, + Less, + Greater, + LessOrEquals, + GreaterOrEquals, +}; + +inline Inequality getInequality(const std::string & func_name) +{ + Inequality inequality{Inequality::None}; + if (func_name == "less") + inequality = Inequality::Less; + else if (func_name == "greater") + inequality = Inequality::Greater; + else if (func_name == "lessOrEquals") + inequality = Inequality::LessOrEquals; + else if (func_name == "greaterOrEquals") + inequality = Inequality::GreaterOrEquals; + return inequality; +} + +inline Inequality reverseInequality(Inequality inequality) +{ + if (inequality == Inequality::Less) + return Inequality::Greater; + else if (inequality == Inequality::Greater) + return Inequality::Less; + else if (inequality == Inequality::LessOrEquals) + return Inequality::GreaterOrEquals; + else if (inequality == Inequality::GreaterOrEquals) + return Inequality::LessOrEquals; + return Inequality::None; +} + +} +} diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 8bc6ee5b9d5..372aad048e5 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -225,28 +225,30 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { if (src.getType() == Field::Types::Array) { - const DataTypePtr nested_type = removeNullable(type_array->getNestedType()); - const Array & src_arr = src.get(); size_t src_arr_size = src_arr.size(); + auto & element_type = *(type_array->getNestedType()); + bool have_unconvertible_element = false; Array res(src_arr_size); for (size_t i = 0; i < src_arr_size; ++i) { - res[i] = convertFieldToType(src_arr[i], *nested_type); - if (res[i].isNull() && !type_array->getNestedType()->isNullable()) - throw Exception("Type mismatch of array elements in IN or VALUES section. Expected: " + type_array->getNestedType()->getName() - + ". Got NULL in position " + toString(i + 1), ErrorCodes::TYPE_MISMATCH); + res[i] = convertFieldToType(src_arr[i], element_type); + if (res[i].isNull() && !element_type.isNullable()) + { + // See the comment for Tuples below. + have_unconvertible_element = true; + } } - return res; + return have_unconvertible_element ? Field(Null()) : Field(res); } } else if (const DataTypeTuple * type_tuple = typeid_cast(&type)) { if (src.getType() == Field::Types::Tuple) { - const TupleBackend & src_tuple = src.get(); + const auto & src_tuple = src.get(); size_t src_tuple_size = src_tuple.size(); size_t dst_tuple_size = type_tuple->getElements().size(); @@ -254,11 +256,34 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID throw Exception("Bad size of tuple in IN or VALUES section. Expected size: " + toString(dst_tuple_size) + ", actual size: " + toString(src_tuple_size), ErrorCodes::TYPE_MISMATCH); - TupleBackend res(dst_tuple_size); + Tuple res(dst_tuple_size); + bool have_unconvertible_element = false; for (size_t i = 0; i < dst_tuple_size; ++i) - res[i] = convertFieldToType(src_tuple[i], *type_tuple->getElements()[i]); + { + auto & element_type = *(type_tuple->getElements()[i]); + res[i] = convertFieldToType(src_tuple[i], element_type); + if (!res[i].isNull() || element_type.isNullable()) + continue; - return res; + /* + * Either the source element was Null, or the conversion did not + * succeed, because the source and the requested types of the + * element are compatible, but the value is not convertible + * (e.g. trying to convert -1 from Int8 to UInt8). In these + * cases, consider the whole tuple also compatible but not + * convertible. According to the specification of this function, + * we must return Null in this case. + * + * The following elements might be not even compatible, so it + * makes sense to check them to detect user errors. Remember + * that there is an unconvertible element, and try to process + * the remaining ones. The convertFieldToType for each element + * will throw if it detects incompatibility. + */ + have_unconvertible_element = true; + } + + return have_unconvertible_element ? Field(Null()) : Field(res); } } else if (const DataTypeAggregateFunction * agg_func_type = typeid_cast(&type)) diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp index 07a657fb7dd..2e46ff294cc 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.cpp +++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB @@ -31,6 +32,9 @@ std::pair> evaluateConstantExpression(co { NamesAndTypesList source_columns = {{ "_dummy", std::make_shared() }}; auto ast = node->clone(); + ReplaceQueryParameterVisitor param_visitor(context.getQueryParameters()); + param_visitor.visit(ast); + String name = ast->getColumnName(); auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns); ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions(); @@ -42,8 +46,6 @@ std::pair> evaluateConstantExpression(co if (!block_with_constants || block_with_constants.rows() == 0) throw Exception("Logical error: empty block after evaluation of constant expression for IN, VALUES or LIMIT", ErrorCodes::LOGICAL_ERROR); - String name = node->getColumnName(); - if (!block_with_constants.has(name)) throw Exception("Element of set in IN, VALUES or LIMIT is not a constant expression (result column not found): " + name, ErrorCodes::BAD_ARGUMENTS); @@ -60,11 +62,11 @@ std::pair> evaluateConstantExpression(co ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context & context) { - /// Branch with string in query. + /// If it's already a literal. if (node->as()) return node; - /// Branch with TableFunction in query. + /// Skip table functions. if (const auto * table_func_ptr = node->as()) if (TableFunctionFactory::instance().isTableFunctionName(table_func_ptr->name)) return node; diff --git a/dbms/src/Interpreters/evaluateConstantExpression.h b/dbms/src/Interpreters/evaluateConstantExpression.h index a901612040b..a84104c53f4 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.h +++ b/dbms/src/Interpreters/evaluateConstantExpression.h @@ -20,6 +20,7 @@ using ExpressionActionsPtr = std::shared_ptr; /** Evaluate constant expression and its type. * Used in rare cases - for elements of set for IN, for data to INSERT. + * Throws exception if it's not a constant expression. * Quite suboptimal. */ std::pair> evaluateConstantExpression(const ASTPtr & node, const Context & context); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index ea27ab35968..edd052469d8 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -120,6 +121,10 @@ static void logQuery(const String & query, const Context & context, bool interna /// Call this inside catch block. static void setExceptionStackTrace(QueryLogElement & elem) { + /// Disable memory tracker for stack trace. + /// Because if exception is "Memory limit (for query) exceed", then we probably can't allocate another one string. + auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); + try { throw; diff --git a/dbms/src/Interpreters/tests/internal_iotop.cpp b/dbms/src/Interpreters/tests/internal_iotop.cpp index 3a171c0c845..75086796c42 100644 --- a/dbms/src/Interpreters/tests/internal_iotop.cpp +++ b/dbms/src/Interpreters/tests/internal_iotop.cpp @@ -140,7 +140,7 @@ try size_t num_threads = 2; ThreadPool pool(num_threads); for (size_t i = 0; i < num_threads; ++i) - pool.schedule([i]() { do_io(i); }); + pool.scheduleOrThrowOnError([i]() { do_io(i); }); pool.wait(); test_perf(); diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 69ef80d4a02..93f21ae5c5e 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB @@ -183,9 +183,7 @@ void ASTAlterCommand::formatImpl( settings.ostr << "VOLUME "; break; } - WriteBufferFromOwnString move_destination_name_buf; - writeQuoted(move_destination_name, move_destination_name_buf); - settings.ostr << move_destination_name_buf.str(); + settings.ostr << quoteString(move_destination_name); } else if (type == ASTAlterCommand::REPLACE_PARTITION) { diff --git a/dbms/src/Parsers/ASTCheckQuery.h b/dbms/src/Parsers/ASTCheckQuery.h index 40665f6f2b6..e453a82cdb4 100644 --- a/dbms/src/Parsers/ASTCheckQuery.h +++ b/dbms/src/Parsers/ASTCheckQuery.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTColumnDeclaration.cpp b/dbms/src/Parsers/ASTColumnDeclaration.cpp index e718d5c292d..b281315f555 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.cpp +++ b/dbms/src/Parsers/ASTColumnDeclaration.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTColumnsMatcher.cpp b/dbms/src/Parsers/ASTColumnsMatcher.cpp index e9cdb822c6e..1dde9507149 100644 --- a/dbms/src/Parsers/ASTColumnsMatcher.cpp +++ b/dbms/src/Parsers/ASTColumnsMatcher.cpp @@ -1,9 +1,6 @@ #include "ASTColumnsMatcher.h" - -#include #include -#include - +#include #include @@ -22,10 +19,8 @@ void ASTColumnsMatcher::appendColumnName(WriteBuffer & ostr) const { writeString void ASTColumnsMatcher::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const { - WriteBufferFromOwnString pattern_quoted; - writeQuotedString(original_pattern, pattern_quoted); - - settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(" << pattern_quoted.str() << ")"; + settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(" + << quoteString(original_pattern) << ")"; } void ASTColumnsMatcher::setPattern(String pattern) diff --git a/dbms/src/Parsers/ASTConstraintDeclaration.cpp b/dbms/src/Parsers/ASTConstraintDeclaration.cpp index a1b063fc44a..f268141f619 100644 --- a/dbms/src/Parsers/ASTConstraintDeclaration.cpp +++ b/dbms/src/Parsers/ASTConstraintDeclaration.cpp @@ -1,4 +1,6 @@ #include +#include + namespace DB { diff --git a/dbms/src/Parsers/ASTCreateQuery.cpp b/dbms/src/Parsers/ASTCreateQuery.cpp index bdade881b2c..bc4a8290d8d 100644 --- a/dbms/src/Parsers/ASTCreateQuery.cpp +++ b/dbms/src/Parsers/ASTCreateQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTDictionaryAttributeDeclaration.cpp b/dbms/src/Parsers/ASTDictionaryAttributeDeclaration.cpp index ebe0b900ec5..2b056cb3743 100644 --- a/dbms/src/Parsers/ASTDictionaryAttributeDeclaration.cpp +++ b/dbms/src/Parsers/ASTDictionaryAttributeDeclaration.cpp @@ -1,4 +1,6 @@ #include +#include + namespace DB { diff --git a/dbms/src/Parsers/ASTDropQuery.cpp b/dbms/src/Parsers/ASTDropQuery.cpp index b4586bf372c..56d0878ceed 100644 --- a/dbms/src/Parsers/ASTDropQuery.cpp +++ b/dbms/src/Parsers/ASTDropQuery.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTIndexDeclaration.h b/dbms/src/Parsers/ASTIndexDeclaration.h index 61e966b3d1b..c71ab21cf57 100644 --- a/dbms/src/Parsers/ASTIndexDeclaration.h +++ b/dbms/src/Parsers/ASTIndexDeclaration.h @@ -2,7 +2,8 @@ #include #include -#include +#include +#include #include #include #include @@ -52,7 +53,7 @@ public: s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); type->formatImpl(s, state, frame); s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); - s.ostr << toString(granularity); + s.ostr << granularity; } }; diff --git a/dbms/src/Parsers/ASTInsertQuery.cpp b/dbms/src/Parsers/ASTInsertQuery.cpp index 1ac92f49735..89158fa0649 100644 --- a/dbms/src/Parsers/ASTInsertQuery.cpp +++ b/dbms/src/Parsers/ASTInsertQuery.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTNameTypePair.h b/dbms/src/Parsers/ASTNameTypePair.h index ac72448e2e9..48dd7ae1ac9 100644 --- a/dbms/src/Parsers/ASTNameTypePair.h +++ b/dbms/src/Parsers/ASTNameTypePair.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTOptimizeQuery.cpp b/dbms/src/Parsers/ASTOptimizeQuery.cpp index 5e95dc41795..92968f2b277 100644 --- a/dbms/src/Parsers/ASTOptimizeQuery.cpp +++ b/dbms/src/Parsers/ASTOptimizeQuery.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp index 462a08b0447..915ecd5e7e4 100644 --- a/dbms/src/Parsers/ASTQueryParameter.cpp +++ b/dbms/src/Parsers/ASTQueryParameter.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTQueryWithOnCluster.cpp b/dbms/src/Parsers/ASTQueryWithOnCluster.cpp index 9519a33c1e5..b0ccaf8b1fa 100644 --- a/dbms/src/Parsers/ASTQueryWithOnCluster.cpp +++ b/dbms/src/Parsers/ASTQueryWithOnCluster.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp b/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp index 1e16fb6f0ee..3a776590f80 100644 --- a/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTRenameQuery.h b/dbms/src/Parsers/ASTRenameQuery.h index 1666873ed9c..4cf007d3b36 100644 --- a/dbms/src/Parsers/ASTRenameQuery.h +++ b/dbms/src/Parsers/ASTRenameQuery.h @@ -3,11 +3,12 @@ #include #include #include +#include + namespace DB { - /** RENAME query */ class ASTRenameQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster diff --git a/dbms/src/Parsers/ASTShowTablesQuery.cpp b/dbms/src/Parsers/ASTShowTablesQuery.cpp index 4a33aeba99c..34a8c9fb76a 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.cpp +++ b/dbms/src/Parsers/ASTShowTablesQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTSystemQuery.cpp b/dbms/src/Parsers/ASTSystemQuery.cpp index b0046b0179b..4e7525bb176 100644 --- a/dbms/src/Parsers/ASTSystemQuery.cpp +++ b/dbms/src/Parsers/ASTSystemQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTUseQuery.h b/dbms/src/Parsers/ASTUseQuery.h index f1ef1b3b408..2127bf9f2c0 100644 --- a/dbms/src/Parsers/ASTUseQuery.h +++ b/dbms/src/Parsers/ASTUseQuery.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTWatchQuery.h b/dbms/src/Parsers/ASTWatchQuery.h index 06d1460f038..c4046a8771f 100644 --- a/dbms/src/Parsers/ASTWatchQuery.h +++ b/dbms/src/Parsers/ASTWatchQuery.h @@ -12,6 +12,7 @@ limitations under the License. */ #pragma once #include +#include namespace DB diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index c896ed2ce3f..d7c56d80a21 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -5,7 +5,6 @@ #include #include #include -#include /// backQuote, backQuoteIfNeed #include #include @@ -223,5 +222,4 @@ private: size_t checkDepthImpl(size_t max_depth, size_t level) const; }; - } diff --git a/dbms/src/Parsers/TablePropertiesQueriesASTs.h b/dbms/src/Parsers/TablePropertiesQueriesASTs.h index 1d787d855fc..6a8e3b2ce83 100644 --- a/dbms/src/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/src/Parsers/TablePropertiesQueriesASTs.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB diff --git a/dbms/src/Processors/Executors/ParallelPipelineExecutor.cpp b/dbms/src/Processors/Executors/ParallelPipelineExecutor.cpp index 7f0969e6451..cf963e45a4a 100644 --- a/dbms/src/Processors/Executors/ParallelPipelineExecutor.cpp +++ b/dbms/src/Processors/Executors/ParallelPipelineExecutor.cpp @@ -85,7 +85,7 @@ namespace DB // active_processors.insert(current_processor); // } // -// pool.schedule([processor = current_processor, &watch, this] +// pool.scheduleOrThrowOnError([processor = current_processor, &watch, this] // { // processor->work(); // { diff --git a/dbms/src/Processors/Formats/IRowInputFormat.cpp b/dbms/src/Processors/Formats/IRowInputFormat.cpp index 5aec8e94cf1..39422bf20b5 100644 --- a/dbms/src/Processors/Formats/IRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/IRowInputFormat.cpp @@ -46,6 +46,7 @@ Chunk IRowInputFormat::generate() size_t prev_rows = total_rows; ///auto chunk_missing_values = std::make_unique(); + block_missing_values.clear(); try { diff --git a/dbms/src/Processors/Formats/IRowInputFormat.h b/dbms/src/Processors/Formats/IRowInputFormat.h index 9443d2b633b..38dcb6acc9e 100644 --- a/dbms/src/Processors/Formats/IRowInputFormat.h +++ b/dbms/src/Processors/Formats/IRowInputFormat.h @@ -77,7 +77,6 @@ protected: private: Params params; - Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; size_t total_rows = 0; size_t num_errors = 0; diff --git a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index b5ee30fb7f8..a13bb365192 100644 --- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -24,12 +24,17 @@ CSVRowInputFormat::CSVRowInputFormat(const Block & header_, ReadBuffer & in_, co , with_names(with_names_) , format_settings(format_settings_) { + + const String bad_delimiters = " \t\"'.UL"; + if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos) + throw Exception(String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter + + "'. Try use CustomSeparated format instead.", ErrorCodes::BAD_ARGUMENTS); + auto & sample = getPort().getHeader(); size_t num_columns = sample.columns(); data_types.resize(num_columns); column_indexes_by_names.reserve(num_columns); - column_idx_to_nullable_column_idx.resize(num_columns); for (size_t i = 0; i < num_columns; ++i) { @@ -37,16 +42,6 @@ CSVRowInputFormat::CSVRowInputFormat(const Block & header_, ReadBuffer & in_, co data_types[i] = column_info.type; column_indexes_by_names.emplace(column_info.name, i); - - /// If input_format_null_as_default=1 we need ColumnNullable of type DataTypeNullable(nested_type) - /// to parse value as nullable before inserting it in corresponding column of not-nullable type. - /// Constructing temporary column for each row is slow, so we prepare it here - if (format_settings_.csv.null_as_default && !column_info.type->isNullable() && column_info.type->canBeInsideNullable()) - { - column_idx_to_nullable_column_idx[i] = nullable_columns.size(); - nullable_types.emplace_back(std::make_shared(column_info.type)); - nullable_columns.emplace_back(nullable_types.back()->createColumn()); - } } } @@ -220,6 +215,7 @@ bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext /// it doesn't have to check it. bool have_default_columns = have_always_default_columns; + ext.read_columns.assign(read_columns.size(), true); const auto delimiter = format_settings.csv.delimiter; for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) { @@ -229,9 +225,8 @@ bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext if (table_column) { skipWhitespacesAndTabs(in); - read_columns[*table_column] = readField(*columns[*table_column], data_types[*table_column], - is_last_file_column, *table_column); - if (!read_columns[*table_column]) + ext.read_columns[*table_column] = readField(*columns[*table_column], data_types[*table_column], is_last_file_column); + if (!ext.read_columns[*table_column]) have_default_columns = true; skipWhitespacesAndTabs(in); } @@ -258,9 +253,9 @@ bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext /// value, we do not have to use the default value specified by /// the data type, and can just use IColumn::insertDefault(). columns[i]->insertDefault(); + ext.read_columns[i] = false; } } - ext.read_columns = read_columns; } return true; @@ -365,8 +360,7 @@ void CSVRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, IColumn & if (column_indexes_for_input_fields[file_column]) { const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size(); - if (!readField(column, type, is_last_file_column, *column_indexes_for_input_fields[file_column])) - column.insertDefault(); + readField(column, type, is_last_file_column); } else { @@ -378,12 +372,14 @@ void CSVRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, IColumn & skipWhitespacesAndTabs(in); } -bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column, size_t column_idx) +bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column) { const bool at_delimiter = !in.eof() && *in.position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n' || *in.position() == '\r'); + /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default + /// only one empty or NULL column will be expected if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end)) { @@ -393,20 +389,13 @@ bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bo /// commas, which might be also used as delimiters. However, /// they do not contain empty unquoted fields, so this check /// works for tuples as well. + column.insertDefault(); return false; } - else if (column_idx_to_nullable_column_idx[column_idx]) + else if (format_settings.null_as_default && !type->isNullable()) { /// If value is null but type is not nullable then use default value instead. - const size_t nullable_idx = *column_idx_to_nullable_column_idx[column_idx]; - auto & tmp_col = *nullable_columns[nullable_idx]; - nullable_types[nullable_idx]->deserializeAsTextCSV(tmp_col, in, format_settings); - Field value = tmp_col[0]; - tmp_col.popBack(1); /// do not store copy of values in memory - if (value.isNull()) - return false; - column.insert(value); - return true; + return DataTypeNullable::deserializeTextCSV(column, in, format_settings, type); } else { diff --git a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h index b8a3a956e1e..cebc8f6d6a1 100644 --- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -61,12 +61,7 @@ private: return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter; } - /// For setting input_format_null_as_default - DataTypes nullable_types; - MutableColumns nullable_columns; - OptionalIndexes column_idx_to_nullable_column_idx; - - bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column, size_t column_idx); + bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column); }; } diff --git a/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index 1d6cd4a839e..be4e6eaaf3f 100644 --- a/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -81,12 +81,11 @@ static Field convertNodeToField(const capnp::DynamicValue::Reader & value) auto structValue = value.as(); const auto & fields = structValue.getSchema().getFields(); - Field field = Tuple(TupleBackend(fields.size())); - TupleBackend & tuple = get(field).toUnderType(); + Tuple tuple(fields.size()); for (auto i : kj::indices(fields)) tuple[i] = convertNodeToField(structValue.get(fields[i])); - return field; + return tuple; } case capnp::DynamicValue::CAPABILITY: throw Exception("CAPABILITY type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -271,7 +270,7 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension // Populate array with a single tuple elements for (size_t off = 0; off < size; ++off) { - const TupleBackend & tuple = DB::get(collected[off]).toUnderType(); + const auto & tuple = DB::get(collected[off]); flattened[off] = tuple[column_index]; } auto & col = columns[action.columns[column_index]]; diff --git a/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index c6c88b8505f..8c834c44932 100644 --- a/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -151,7 +151,7 @@ private: { info.special_parser.is_array = true; info.type = applyVisitor(FieldToDataType(), info.literal->value); - auto nested_type = dynamic_cast(*info.type).getNestedType(); + auto nested_type = assert_cast(*info.type).getNestedType(); /// It can be Array(Nullable(nested_type)) bool array_of_nullable = false; @@ -212,9 +212,9 @@ private: /// E.g. template of "position('some string', 'other string') != 0" is /// ["position", "(", DataTypeString, ",", DataTypeString, ")", "!=", DataTypeUInt64] ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & replaced_literals, TokenIterator expression_begin, TokenIterator expression_end, - ASTPtr & expression, const IDataType & result_type, const Context & context) + ASTPtr & expression, const IDataType & result_type, bool null_as_default_, const Context & context) { - + null_as_default = null_as_default_; std::sort(replaced_literals.begin(), replaced_literals.end(), [](const LiteralInfo & a, const LiteralInfo & b) { @@ -252,16 +252,17 @@ ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & ++prev_end; } - addNodesToCastResult(result_type, expression); - result_column_name = expression->getColumnName(); + addNodesToCastResult(result_type, expression, null_as_default); auto syntax_result = SyntaxAnalyzer(context).analyze(expression, literals.getNamesAndTypesList()); + result_column_name = expression->getColumnName(); actions_on_literals = ExpressionAnalyzer(expression, syntax_result, context).getActions(false); } size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, const DataTypePtr & result_column_type, + bool null_as_default, const String & salt) { /// TODO distinguish expressions with the same AST and different tokens (e.g. "CAST(expr, 'Type')" and "CAST(expr AS Type)") @@ -272,6 +273,7 @@ size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTP for (const auto & info : replaced_literals) hash_state.update(info.type->getName()); + hash_state.update(null_as_default); /// Allows distinguish expression in the last column in Values format hash_state.update(salt); @@ -288,6 +290,7 @@ size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTP ConstantExpressionTemplate::TemplateStructurePtr ConstantExpressionTemplate::Cache::getFromCacheOrConstruct(const DataTypePtr & result_column_type, + bool null_as_default, TokenIterator expression_begin, TokenIterator expression_end, const ASTPtr & expression_, @@ -298,17 +301,18 @@ ConstantExpressionTemplate::Cache::getFromCacheOrConstruct(const DataTypePtr & r TemplateStructurePtr res; ASTPtr expression = expression_->clone(); ReplaceLiteralsVisitor visitor(context); - visitor.visit(expression, result_column_type->isNullable()); + visitor.visit(expression, result_column_type->isNullable() || null_as_default); ReplaceQueryParameterVisitor param_visitor(context.getQueryParameters()); param_visitor.visit(expression); - size_t template_hash = TemplateStructure::getTemplateHash(expression, visitor.replaced_literals, result_column_type, salt); + size_t template_hash = TemplateStructure::getTemplateHash(expression, visitor.replaced_literals, result_column_type, null_as_default, salt); auto iter = cache.find(template_hash); if (iter == cache.end()) { if (max_size <= cache.size()) cache.clear(); - res = std::make_shared(visitor.replaced_literals, expression_begin, expression_end, expression, *result_column_type, context); + res = std::make_shared(visitor.replaced_literals, expression_begin, expression_end, + expression, *result_column_type, null_as_default, context); cache.insert({template_hash, res}); if (found_in_cache) *found_in_cache = false; @@ -416,7 +420,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, co const Field & array = ast->as().value; auto array_type = applyVisitor(FieldToDataType(), array); - auto nested_type = dynamic_cast(*array_type).getNestedType(); + auto nested_type = assert_cast(*array_type).getNestedType(); if (type_info.is_nullable) if (auto nullable = dynamic_cast(nested_type.get())) nested_type = nullable->getNestedType(); @@ -488,7 +492,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, co } } -ColumnPtr ConstantExpressionTemplate::evaluateAll() +ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, size_t column_idx, size_t offset) { Block evaluated = structure->literals.cloneWithColumns(std::move(columns)); columns = structure->literals.cloneEmptyColumns(); @@ -506,23 +510,40 @@ ColumnPtr ConstantExpressionTemplate::evaluateAll() ErrorCodes::LOGICAL_ERROR); rows_count = 0; - return evaluated.getByName(structure->result_column_name).column->convertToFullColumnIfConst(); + ColumnPtr res = evaluated.getByName(structure->result_column_name).column->convertToFullColumnIfConst(); + if (!structure->null_as_default) + return res; + + /// Extract column with evaluated expression and mask for NULLs + auto & tuple = assert_cast(*res); + if (tuple.tupleSize() != 2) + throw Exception("Invalid tuple size, it'a a bug", ErrorCodes::LOGICAL_ERROR); + auto & is_null = assert_cast(tuple.getColumn(1)); + + for (size_t i = 0; i < is_null.size(); ++i) + if (is_null.getUInt(i)) + nulls.setBit(column_idx, offset + i); + + return tuple.getColumnPtr(0); } -void ConstantExpressionTemplate::TemplateStructure::addNodesToCastResult(const IDataType & result_column_type, ASTPtr & expr) +void ConstantExpressionTemplate::TemplateStructure::addNodesToCastResult(const IDataType & result_column_type, ASTPtr & expr, bool null_as_default) { - auto result_type = std::make_shared(result_column_type.getName()); + /// Replace "expr" with "CAST(expr, 'TypeName')" + /// or with "(CAST(assumeNotNull(expr as _expression), 'TypeName'), isNull(_expression))" if null_as_default is true + if (null_as_default) + { + expr->setAlias("_expression"); + expr = makeASTFunction("assumeNotNull", std::move(expr)); + } - auto arguments = std::make_shared(); - arguments->children.push_back(std::move(expr)); - arguments->children.push_back(std::move(result_type)); + expr = makeASTFunction("CAST", std::move(expr), std::make_shared(result_column_type.getName())); - auto cast = std::make_shared(); - cast->name = "CAST"; - cast->arguments = std::move(arguments); - cast->children.push_back(cast->arguments); - - expr = std::move(cast); + if (null_as_default) + { + auto is_null = makeASTFunction("isNull", std::make_shared("_expression")); + expr = makeASTFunction("tuple", std::move(expr), std::move(is_null)); + } } } diff --git a/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.h index 4f4dac849b2..96e1c653c03 100644 --- a/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.h +++ b/dbms/src/Processors/Formats/Impl/ConstantExpressionTemplate.h @@ -21,10 +21,11 @@ class ConstantExpressionTemplate : boost::noncopyable struct TemplateStructure : boost::noncopyable { TemplateStructure(LiteralsInfo & replaced_literals, TokenIterator expression_begin, TokenIterator expression_end, - ASTPtr & expr, const IDataType & result_type, const Context & context); + ASTPtr & expr, const IDataType & result_type, bool null_as_default_, const Context & context); - static void addNodesToCastResult(const IDataType & result_column_type, ASTPtr & expr); - static size_t getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, const DataTypePtr & result_column_type, const String & salt); + static void addNodesToCastResult(const IDataType & result_column_type, ASTPtr & expr, bool null_as_default); + static size_t getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, + const DataTypePtr & result_column_type, bool null_as_default, const String & salt); String result_column_name; @@ -35,6 +36,7 @@ class ConstantExpressionTemplate : boost::noncopyable ExpressionActionsPtr actions_on_literals; std::vector special_parser; + bool null_as_default; }; public: @@ -50,6 +52,7 @@ public: /// Deduce template of expression of type result_column_type and add it to cache (or use template from cache) TemplateStructurePtr getFromCacheOrConstruct(const DataTypePtr & result_column_type, + bool null_as_default, TokenIterator expression_begin, TokenIterator expression_end, const ASTPtr & expression_, @@ -65,8 +68,9 @@ public: /// and parse literals into temporary columns bool parseExpression(ReadBuffer & istr, const FormatSettings & settings); - /// Evaluate batch of expressions were parsed using template - ColumnPtr evaluateAll(); + /// Evaluate batch of expressions were parsed using template. + /// If template was deduced with null_as_default == true, set bits in nulls for NULL values in column_idx, starting from offset. + ColumnPtr evaluateAll(BlockMissingValues & nulls, size_t column_idx, size_t offset = 0); size_t rowsCount() const { return rows_count; } diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 20830d2eccf..e49f9315887 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -129,21 +130,23 @@ void JSONEachRowRowInputFormat::skipUnknownField(const StringRef & name_ref) void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns) { - if (read_columns[index]) + if (seen_columns[index]) throw Exception("Duplicate field found while parsing JSONEachRow format: " + columnName(index), ErrorCodes::INCORRECT_DATA); try { - auto & header = getPort().getHeader(); - header.getByPosition(index).type->deserializeAsTextJSON(*columns[index], in, format_settings); + seen_columns[index] = read_columns[index] = true; + const auto & type = getPort().getHeader().getByPosition(index).type; + if (format_settings.null_as_default && !type->isNullable()) + read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type); + else + type->deserializeAsTextJSON(*columns[index], in, format_settings); } catch (Exception & e) { e.addMessage("(while read the value of key " + columnName(index) + ")"); throw; } - - read_columns[index] = true; } inline bool JSONEachRowRowInputFormat::advanceToNextKey(size_t key_index) @@ -230,8 +233,8 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi size_t num_columns = columns.size(); - /// Set of columns for which the values were read. The rest will be filled with default values. read_columns.assign(num_columns, false); + seen_columns.assign(num_columns, false); nested_prefix_length = 0; readJSONObject(columns); @@ -239,7 +242,7 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi auto & header = getPort().getHeader(); /// Fill non-visited columns with the default values. for (size_t i = 0; i < num_columns; ++i) - if (!read_columns[i]) + if (!seen_columns[i]) header.getByPosition(i).type->insertDefaultInto(*columns[i]); /// return info about defaults set diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index afa3c9f2ba1..3e7a38e593a 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -55,7 +55,12 @@ private: /// the nested column names are 'n.i' and 'n.s' and the nested prefix is 'n.' size_t nested_prefix_length = 0; + /// Set of columns for which the values were read. The rest will be filled with default values. std::vector read_columns; + /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. + std::vector seen_columns; + /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true + /// for row like {..., "non-nullable column name" : null, ...} /// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map. using NameMap = HashMap; diff --git a/dbms/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index f6a8222d854..3adbedff2a7 100644 --- a/dbms/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include diff --git a/dbms/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 15652dec08d..10d475f599a 100644 --- a/dbms/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -1,3 +1,4 @@ +#include #include #include ///#include diff --git a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index a420f438b19..b8162fa32cd 100644 --- a/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 8cf3702d3bf..60df642836c 100644 --- a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -98,6 +99,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex /// Set of columns for which the values were read. The rest will be filled with default values. read_columns.assign(num_columns, false); + seen_columns.assign(num_columns, false); if (unlikely(*in.position() == '\n')) { @@ -131,12 +133,15 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex { index = *lookupResultGetMapped(it); - if (read_columns[index]) + if (seen_columns[index]) throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); - read_columns[index] = true; - - header.getByPosition(index).type->deserializeAsTextEscaped(*columns[index], in, format_settings); + seen_columns[index] = read_columns[index] = true; + const auto & type = getPort().getHeader().getByPosition(index).type; + if (format_settings.null_as_default && !type->isNullable()) + read_columns[index] = DataTypeNullable::deserializeTextEscaped(*columns[index], in, format_settings, type); + else + header.getByPosition(index).type->deserializeAsTextEscaped(*columns[index], in, format_settings); } } else @@ -166,7 +171,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex if (index >= 0) { columns[index]->popBack(1); - read_columns[index] = false; + seen_columns[index] = read_columns[index] = false; } throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); @@ -176,7 +181,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex /// Fill in the not met columns with default values. for (size_t i = 0; i < num_columns; ++i) - if (!read_columns[i]) + if (!seen_columns[i]) header.getByPosition(i).type->insertDefaultInto(*columns[i]); /// return info about defaults set diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.h b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.h index 52330665395..cd7ad69aed6 100644 --- a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.h @@ -41,7 +41,12 @@ private: using NameMap = HashMap; NameMap name_map; + /// Set of columns for which the values were read. The rest will be filled with default values. std::vector read_columns; + /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. + std::vector seen_columns; + /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true + /// for row like ..., non-nullable column name=\N, ... }; } diff --git a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index cb9ff5b53be..b57fc51f183 100644 --- a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -117,9 +118,10 @@ void TabSeparatedRowInputFormat::fillUnreadColumnsWithDefaults(MutableColumns & } for (const auto column_index : columns_to_fill_with_default_values) + { data_types[column_index]->insertDefaultInto(*columns[column_index]); - - row_read_extension.read_columns = read_columns; + row_read_extension.read_columns[column_index] = false; + } } @@ -174,12 +176,15 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens updateDiagnosticInfo(); + ext.read_columns.assign(read_columns.size(), true); for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) { const auto & column_index = column_indexes_for_input_fields[file_column]; + const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size(); if (column_index) { - data_types[*column_index]->deserializeAsTextEscaped(*columns[*column_index], in, format_settings); + const auto & type = data_types[*column_index]; + ext.read_columns[*column_index] = readField(*columns[*column_index], type, is_last_file_column); } else { @@ -206,6 +211,22 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens return true; } + +bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column) +{ + const bool at_delimiter = !is_last_file_column && !in.eof() && *in.position() == '\t'; + const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n'); + if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) + { + column.insertDefault(); + return false; + } + else if (format_settings.null_as_default && !type->isNullable()) + return DataTypeNullable::deserializeTextEscaped(column, in, format_settings, type); + type->deserializeAsTextEscaped(column, in, format_settings); + return true; +} + bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) { for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) @@ -303,7 +324,10 @@ void TabSeparatedRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, I { prev_pos = in.position(); if (column_indexes_for_input_fields[file_column]) - type->deserializeAsTextEscaped(column, in, format_settings); + { + const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size(); + readField(column, type, is_last_file_column); + } else { NullSink null_sink; diff --git a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index a28ac62ed4f..9d3f0b52d11 100644 --- a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -41,6 +41,8 @@ private: std::vector read_columns; std::vector columns_to_fill_with_default_values; + bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column); + void addInputColumn(const String & column_name); void setupAllColumnsByTableSchema(); void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext); diff --git a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index f0b2238ec7f..b77ec5417b0 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -23,7 +24,8 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_) : RowInputFormatWithDiagnosticInfo(header_, buf, params_), buf(in_), data_types(header_.getDataTypes()), settings(settings_), ignore_spaces(ignore_spaces_), - format(std::move(format_)), row_format(std::move(row_format_)) + format(std::move(format_)), row_format(std::move(row_format_)), + default_csv_delimiter(settings.csv.delimiter) { /// Validate format string for result set bool has_data = false; @@ -68,6 +70,10 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer column_in_format[col_idx] = true; } } + + for (size_t i = 0; i < header_.columns(); ++i) + if (!column_in_format[i]) + always_default_columns.push_back(i); } void TemplateRowInputFormat::readPrefix() @@ -166,8 +172,7 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension if (row_format.format_idx_to_column_idx[i]) { size_t col_idx = *row_format.format_idx_to_column_idx[i]; - deserializeField(*data_types[col_idx], *columns[col_idx], row_format.formats[i]); - extra.read_columns[col_idx] = true; + extra.read_columns[col_idx] = deserializeField(data_types[col_idx], *columns[col_idx], i); } else skipField(row_format.formats[i]); @@ -177,30 +182,47 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension skipSpaces(); assertString(row_format.delimiters.back(), buf); - for (size_t i = 0; i < columns.size(); ++i) - if (!extra.read_columns[i]) - data_types[i]->insertDefaultInto(*columns[i]); + for (const auto & idx : always_default_columns) + data_types[idx]->insertDefaultInto(*columns[idx]); return true; } -void TemplateRowInputFormat::deserializeField(const IDataType & type, IColumn & column, ColumnFormat col_format) +bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) { + ColumnFormat col_format = row_format.formats[file_column]; + bool read = true; + bool parse_as_nullable = settings.null_as_default && !type->isNullable(); try { switch (col_format) { case ColumnFormat::Escaped: - type.deserializeAsTextEscaped(column, buf, settings); + if (parse_as_nullable) + read = DataTypeNullable::deserializeTextEscaped(column, buf, settings, type); + else + type->deserializeAsTextEscaped(column, buf, settings); break; case ColumnFormat::Quoted: - type.deserializeAsTextQuoted(column, buf, settings); + if (parse_as_nullable) + read = DataTypeNullable::deserializeTextQuoted(column, buf, settings, type); + else + type->deserializeAsTextQuoted(column, buf, settings); break; case ColumnFormat::Csv: - type.deserializeAsTextCSV(column, buf, settings); + /// Will read unquoted string until settings.csv.delimiter + settings.csv.delimiter = row_format.delimiters[file_column + 1].empty() ? default_csv_delimiter : + row_format.delimiters[file_column + 1].front(); + if (parse_as_nullable) + read = DataTypeNullable::deserializeTextCSV(column, buf, settings, type); + else + type->deserializeAsTextCSV(column, buf, settings); break; case ColumnFormat::Json: - type.deserializeAsTextJSON(column, buf, settings); + if (parse_as_nullable) + read = DataTypeNullable::deserializeTextJSON(column, buf, settings, type); + else + type->deserializeAsTextJSON(column, buf, settings); break; default: __builtin_unreachable(); @@ -212,6 +234,7 @@ void TemplateRowInputFormat::deserializeField(const IDataType & type, IColumn & throwUnexpectedEof(); throw; } + return read; } void TemplateRowInputFormat::skipField(TemplateRowInputFormat::ColumnFormat col_format) @@ -391,7 +414,7 @@ void TemplateRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, IColu { prev_pos = buf.position(); if (row_format.format_idx_to_column_idx[file_column]) - deserializeField(*type, column, row_format.formats[file_column]); + deserializeField(type, column, file_column); else skipField(row_format.formats[file_column]); curr_pos = buf.position(); diff --git a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 2142d492988..7b62347c37d 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -29,7 +29,7 @@ public: void syncAfterError() override; private: - void deserializeField(const IDataType & type, IColumn & column, ColumnFormat col_format); + bool deserializeField(const DataTypePtr & type, IColumn & column, size_t file_column); void skipField(ColumnFormat col_format); inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(buf); } @@ -50,13 +50,15 @@ private: PeekableReadBuffer buf; DataTypes data_types; - const FormatSettings settings; + FormatSettings settings; const bool ignore_spaces; ParsedTemplateFormatString format; ParsedTemplateFormatString row_format; size_t format_data_idx; bool end_of_stream = false; + std::vector always_default_columns; + char default_csv_delimiter; }; } diff --git a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 8fda4294e3d..05bde62d902 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -26,7 +27,7 @@ namespace ErrorCodes extern const int CANNOT_READ_ARRAY_FROM_TEXT; extern const int CANNOT_PARSE_DATE; extern const int SYNTAX_ERROR; - extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; + extern const int TYPE_MISMATCH; extern const int SUPPORT_IS_DISABLED; } @@ -47,6 +48,7 @@ Chunk ValuesBlockInputFormat::generate() { const Block & header = getPort().getHeader(); MutableColumns columns = header.cloneEmptyColumns(); + block_missing_values.clear(); for (size_t rows_in_block = 0; rows_in_block < params.max_block_size; ++rows_in_block) { @@ -55,7 +57,7 @@ Chunk ValuesBlockInputFormat::generate() skipWhitespaceIfAny(buf); if (buf.eof() || *buf.position() == ';') break; - readRow(columns); + readRow(columns, rows_in_block); if (params.callback) params.callback(); } @@ -73,10 +75,10 @@ Chunk ValuesBlockInputFormat::generate() if (!templates[i] || !templates[i]->rowsCount()) continue; if (columns[i]->empty()) - columns[i] = std::move(*templates[i]->evaluateAll()).mutate(); + columns[i] = std::move(*templates[i]->evaluateAll(block_missing_values, i)).mutate(); else { - ColumnPtr evaluated = templates[i]->evaluateAll(); + ColumnPtr evaluated = templates[i]->evaluateAll(block_missing_values, i, columns[i]->size()); columns[i]->insertRangeFrom(*evaluated, 0, evaluated->size()); } } @@ -91,7 +93,7 @@ Chunk ValuesBlockInputFormat::generate() return Chunk{std::move(columns), rows_in_block}; } -void ValuesBlockInputFormat::readRow(MutableColumns & columns) +void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num) { assertChar('(', buf); @@ -99,17 +101,22 @@ void ValuesBlockInputFormat::readRow(MutableColumns & columns) { skipWhitespaceIfAny(buf); PeekableReadBufferCheckpoint checkpoint{buf}; + bool read; /// Parse value using fast streaming parser for literals and slow SQL parser for expressions. /// If there is SQL expression in some row, template of this expression will be deduced, /// so it makes possible to parse the following rows much faster /// if expressions in the following rows have the same structure if (parser_type_for_column[column_idx] == ParserType::Streaming) - tryReadValue(*columns[column_idx], column_idx); + read = tryReadValue(*columns[column_idx], column_idx); else if (parser_type_for_column[column_idx] == ParserType::BatchTemplate) - tryParseExpressionUsingTemplate(columns[column_idx], column_idx); + read = tryParseExpressionUsingTemplate(columns[column_idx], column_idx); else /// if (parser_type_for_column[column_idx] == ParserType::SingleExpressionEvaluation) - parseExpression(*columns[column_idx], column_idx); + read = parseExpression(*columns[column_idx], column_idx); + + if (!read) + block_missing_values.setBit(column_idx, row_num); + /// If read is true, value still may be missing. Bit mask for these values will be copied from ConstantExpressionTemplate later. } skipWhitespaceIfAny(buf); @@ -119,22 +126,22 @@ void ValuesBlockInputFormat::readRow(MutableColumns & columns) ++total_rows; } -void ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx) +bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx) { /// Try to parse expression using template if one was successfully deduced while parsing the first row if (templates[column_idx]->parseExpression(buf, format_settings)) { ++rows_parsed_using_template[column_idx]; - return; + return true; } /// Expression in the current row is not match template deduced on the first row. /// Evaluate expressions, which were parsed using this template. if (column->empty()) - column = std::move(*templates[column_idx]->evaluateAll()).mutate(); + column = std::move(*templates[column_idx]->evaluateAll(block_missing_values, column_idx)).mutate(); else { - ColumnPtr evaluated = templates[column_idx]->evaluateAll(); + ColumnPtr evaluated = templates[column_idx]->evaluateAll(block_missing_values, column_idx, column->size()); column->insertRangeFrom(*evaluated, 0, evaluated->size()); } /// Do not use this template anymore @@ -142,19 +149,25 @@ void ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr & buf.rollbackToCheckpoint(); /// It will deduce new template or fallback to slow SQL parser - parseExpression(*column, column_idx); + return parseExpression(*column, column_idx); } -void ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx) +bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx) { bool rollback_on_exception = false; try { - types[column_idx]->deserializeAsTextQuoted(column, buf, format_settings); + bool read = true; + const auto & type = types[column_idx]; + if (format_settings.null_as_default && !type->isNullable()) + read = DataTypeNullable::deserializeTextQuoted(column, buf, format_settings, type); + else + type->deserializeAsTextQuoted(column, buf, format_settings); rollback_on_exception = true; skipWhitespaceIfAny(buf); assertDelimiterAfterValue(column_idx); + return read; } catch (const Exception & e) { @@ -166,12 +179,11 @@ void ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx) /// Switch to SQL parser and don't try to use streaming parser for complex expressions /// Note: Throwing exceptions for each expression may be very slow because of stacktraces buf.rollbackToCheckpoint(); - parseExpression(column, column_idx); + return parseExpression(column, column_idx); } } -void -ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx) +bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx) { const Block & header = getPort().getHeader(); const IDataType & type = *header.getByPosition(column_idx).type; @@ -223,7 +235,7 @@ ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx) if (ok) { parser_type_for_column[column_idx] = ParserType::Streaming; - return; + return true; } else if (rollback_on_exception) column.popBack(1); @@ -243,7 +255,8 @@ ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx) bool found_in_cache = false; const auto & result_type = header.getByPosition(column_idx).type; const char * delimiter = (column_idx + 1 == num_columns) ? ")" : ","; - auto structure = templates_cache.getFromCacheOrConstruct(result_type, TokenIterator(tokens), token_iterator, + auto structure = templates_cache.getFromCacheOrConstruct(result_type, format_settings.null_as_default, + TokenIterator(tokens), token_iterator, ast, *context, &found_in_cache, delimiter); templates[column_idx].emplace(structure); if (found_in_cache) @@ -256,7 +269,7 @@ ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx) { ++rows_parsed_using_template[column_idx]; parser_type_for_column[column_idx] = ParserType::BatchTemplate; - return; + return true; } } catch (...) @@ -290,15 +303,20 @@ ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx) /// Check that we are indeed allowed to insert a NULL. if (value.isNull() && !type.isNullable()) { + if (format_settings.null_as_default) + { + type.insertDefaultInto(column); + return false; + } buf.rollbackToCheckpoint(); - throw Exception{"Expression returns value " + applyVisitor(FieldVisitorToString(), value) - + ", that is out of range of type " + type.getName() - + ", at: " + + throw Exception{"Cannot insert NULL value into a column of type '" + type.getName() + "'" + + " at: " + String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())), - ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE}; + ErrorCodes::TYPE_MISMATCH}; } column.insert(value); + return true; } /// Can be used in fileSegmentationEngine for parallel parsing of Values diff --git a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index ec6a3b931e4..cf2c4efb1cf 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -33,6 +33,8 @@ public: String getName() const override { return "ValuesBlockInputFormat"; } + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } + private: enum class ParserType { @@ -45,11 +47,11 @@ private: Chunk generate() override; - void readRow(MutableColumns & columns); + void readRow(MutableColumns & columns, size_t row_num); - void tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx); - ALWAYS_INLINE inline void tryReadValue(IColumn & column, size_t column_idx); - void parseExpression(IColumn & column, size_t column_idx); + bool tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx); + ALWAYS_INLINE inline bool tryReadValue(IColumn & column, size_t column_idx); + bool parseExpression(IColumn & column, size_t column_idx); ALWAYS_INLINE inline void assertDelimiterAfterValue(size_t column_idx); ALWAYS_INLINE inline bool checkDelimiterAfterValue(size_t column_idx); @@ -81,6 +83,8 @@ private: ConstantExpressionTemplate::Cache templates_cache; DataTypes types; + + BlockMissingValues block_missing_values; }; } diff --git a/dbms/src/Processors/Port.h b/dbms/src/Processors/Port.h index fb7d7ed72c9..d37e11668e7 100644 --- a/dbms/src/Processors/Port.h +++ b/dbms/src/Processors/Port.h @@ -6,8 +6,10 @@ #include #include +#include #include #include +#include namespace DB { diff --git a/dbms/src/Processors/Transforms/ConvertingTransform.cpp b/dbms/src/Processors/Transforms/ConvertingTransform.cpp index 8729b896084..e801fe7cb26 100644 --- a/dbms/src/Processors/Transforms/ConvertingTransform.cpp +++ b/dbms/src/Processors/Transforms/ConvertingTransform.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { diff --git a/dbms/src/Processors/Transforms/ExpressionTransform.cpp b/dbms/src/Processors/Transforms/ExpressionTransform.cpp index 90ec1031314..a5755ae072b 100644 --- a/dbms/src/Processors/Transforms/ExpressionTransform.cpp +++ b/dbms/src/Processors/Transforms/ExpressionTransform.cpp @@ -21,6 +21,18 @@ ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionAction void ExpressionTransform::transform(Chunk & chunk) { + if (!initialized) + { + initialized = true; + + if (expression->resultIsAlwaysEmpty()) + { + stopReading(); + chunk = Chunk(getOutputPort().getHeader().getColumns(), 0); + return; + } + } + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); if (on_totals) diff --git a/dbms/src/Processors/Transforms/ExpressionTransform.h b/dbms/src/Processors/Transforms/ExpressionTransform.h index 6c6d474d872..5a5d60bfacf 100644 --- a/dbms/src/Processors/Transforms/ExpressionTransform.h +++ b/dbms/src/Processors/Transforms/ExpressionTransform.h @@ -21,6 +21,7 @@ private: ExpressionActionsPtr expression; bool on_totals; bool default_totals; + bool initialized = false; }; } diff --git a/dbms/src/Processors/Transforms/FilterTransform.cpp b/dbms/src/Processors/Transforms/FilterTransform.cpp index 7389410d0af..e4f82025fb1 100644 --- a/dbms/src/Processors/Transforms/FilterTransform.cpp +++ b/dbms/src/Processors/Transforms/FilterTransform.cpp @@ -83,6 +83,18 @@ void FilterTransform::removeFilterIfNeed(Chunk & chunk) void FilterTransform::transform(Chunk & chunk) { + if (!initialized) + { + initialized = true; + /// Cannot check this in prepare. Because in prepare columns for set may be not created yet. + if (expression->checkColumnIsAlwaysFalse(filter_column_name)) + { + stopReading(); + chunk = Chunk(getOutputPort().getHeader().getColumns(), 0); + return; + } + } + size_t num_rows_before_filtration = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/dbms/src/Processors/Transforms/FilterTransform.h b/dbms/src/Processors/Transforms/FilterTransform.h index 127eb5a8039..c595d72a70b 100644 --- a/dbms/src/Processors/Transforms/FilterTransform.h +++ b/dbms/src/Processors/Transforms/FilterTransform.h @@ -36,6 +36,8 @@ private: /// Header after expression, but before removing filter column. Block transformed_header; + bool initialized = false; + void removeFilterIfNeed(Chunk & chunk); }; diff --git a/dbms/src/Processors/Transforms/LimitByTransform.cpp b/dbms/src/Processors/Transforms/LimitByTransform.cpp index 83268d178bd..f9f7fbee3fe 100644 --- a/dbms/src/Processors/Transforms/LimitByTransform.cpp +++ b/dbms/src/Processors/Transforms/LimitByTransform.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB diff --git a/dbms/src/Processors/tests/processors_test.cpp b/dbms/src/Processors/tests/processors_test.cpp index 519eb79e017..3e2e6abd1da 100644 --- a/dbms/src/Processors/tests/processors_test.cpp +++ b/dbms/src/Processors/tests/processors_test.cpp @@ -88,7 +88,7 @@ public: void schedule(EventCounter & watch) override { active = true; - pool.schedule([&watch, this] + pool.scheduleOrThrowOnError([&watch, this] { usleep(sleep_useconds); current_chunk = generate(); diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index b1b63258f06..5dce68ec381 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -339,11 +339,19 @@ void DistributedBlockOutputStream::writeSync(const Block & block) per_shard_jobs[current_selector[i]].shard_current_block_permuation.push_back(i); } - /// Run jobs in parallel for each block and wait them - finished_jobs_count = 0; - for (size_t shard_index : ext::range(0, shards_info.size())) - for (JobReplica & job : per_shard_jobs[shard_index].replicas_jobs) - pool->schedule(runWritingJob(job, block)); + try + { + /// Run jobs in parallel for each block and wait them + finished_jobs_count = 0; + for (size_t shard_index : ext::range(0, shards_info.size())) + for (JobReplica & job : per_shard_jobs[shard_index].replicas_jobs) + pool->scheduleOrThrowOnError(runWritingJob(job, block)); + } + catch (...) + { + pool->wait(); + throw; + } try { @@ -373,17 +381,27 @@ void DistributedBlockOutputStream::writeSuffix() if (insert_sync && pool) { finished_jobs_count = 0; - for (auto & shard_jobs : per_shard_jobs) - for (JobReplica & job : shard_jobs.replicas_jobs) + try + { + for (auto & shard_jobs : per_shard_jobs) { - if (job.stream) + for (JobReplica & job : shard_jobs.replicas_jobs) { - pool->schedule([&job] () + if (job.stream) { - job.stream->writeSuffix(); - }); + pool->scheduleOrThrowOnError([&job]() + { + job.stream->writeSuffix(); + }); + } } } + } + catch (...) + { + pool->wait(); + throw; + } try { diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h index 0d5a2e08b11..97297aae434 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index f614ff8dc50..4b55cedbfcc 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index ced109922dc..8d30f31bde3 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,7 @@ struct ColumnSize * - data storage structure (compression, etc.) * - concurrent access to data (locks, etc.) */ -class IStorage : public std::enable_shared_from_this +class IStorage : public std::enable_shared_from_this, public TypePromotion { public: IStorage() = default; diff --git a/dbms/src/Storages/LiveView/StorageLiveView.cpp b/dbms/src/Storages/LiveView/StorageLiveView.cpp index 089c7ccd47e..0e4b6f0b945 100644 --- a/dbms/src/Storages/LiveView/StorageLiveView.cpp +++ b/dbms/src/Storages/LiveView/StorageLiveView.cpp @@ -273,6 +273,8 @@ bool StorageLiveView::hasColumn(const String & column_name) const Block StorageLiveView::getHeader() const { + std::lock_guard lock(sample_block_lock); + if (!sample_block) { auto storage = global_context.getTable(select_database_name, select_table_name); @@ -375,7 +377,7 @@ void StorageLiveView::noUsersThread(std::shared_ptr storage, co { while (1) { - std::unique_lock lock(storage->no_users_thread_mutex); + std::unique_lock lock(storage->no_users_thread_wakeup_mutex); if (!storage->no_users_thread_condition.wait_for(lock, std::chrono::seconds(timeout), [&] { return storage->no_users_thread_wakeup; })) { storage->no_users_thread_wakeup = false; @@ -421,17 +423,22 @@ void StorageLiveView::startNoUsersThread(const UInt64 & timeout) if (is_temporary) { + std::lock_guard no_users_thread_lock(no_users_thread_mutex); + + if (shutdown_called) + return; + if (no_users_thread.joinable()) { { - std::lock_guard lock(no_users_thread_mutex); + std::lock_guard lock(no_users_thread_wakeup_mutex); no_users_thread_wakeup = true; no_users_thread_condition.notify_one(); } no_users_thread.join(); } { - std::lock_guard lock(no_users_thread_mutex); + std::lock_guard lock(no_users_thread_wakeup_mutex); no_users_thread_wakeup = false; } if (!is_dropped) @@ -453,12 +460,15 @@ void StorageLiveView::shutdown() if (!shutdown_called.compare_exchange_strong(expected, true)) return; - if (no_users_thread.joinable()) { + std::lock_guard no_users_thread_lock(no_users_thread_mutex); + if (no_users_thread.joinable()) { - std::lock_guard lock(no_users_thread_mutex); - no_users_thread_wakeup = true; - no_users_thread_condition.notify_one(); + { + std::lock_guard lock(no_users_thread_wakeup_mutex); + no_users_thread_wakeup = true; + no_users_thread_condition.notify_one(); + } } } } @@ -466,8 +476,12 @@ void StorageLiveView::shutdown() StorageLiveView::~StorageLiveView() { shutdown(); - if (no_users_thread.joinable()) - no_users_thread.detach(); + + { + std::lock_guard lock(no_users_thread_mutex); + if (no_users_thread.joinable()) + no_users_thread.detach(); + } } void StorageLiveView::drop(TableStructureWriteLockHolder &) @@ -539,11 +553,14 @@ BlockInputStreams StorageLiveView::watch( context.getSettingsRef().live_view_heartbeat_interval.totalSeconds(), context.getSettingsRef().temporary_live_view_timeout.totalSeconds()); - if (no_users_thread.joinable()) { - std::lock_guard lock(no_users_thread_mutex); - no_users_thread_wakeup = true; - no_users_thread_condition.notify_one(); + std::lock_guard no_users_thread_lock(no_users_thread_mutex); + if (no_users_thread.joinable()) + { + std::lock_guard lock(no_users_thread_wakeup_mutex); + no_users_thread_wakeup = true; + no_users_thread_condition.notify_one(); + } } { @@ -567,11 +584,14 @@ BlockInputStreams StorageLiveView::watch( context.getSettingsRef().live_view_heartbeat_interval.totalSeconds(), context.getSettingsRef().temporary_live_view_timeout.totalSeconds()); - if (no_users_thread.joinable()) { - std::lock_guard lock(no_users_thread_mutex); - no_users_thread_wakeup = true; - no_users_thread_condition.notify_one(); + std::lock_guard no_users_thread_lock(no_users_thread_mutex); + if (no_users_thread.joinable()) + { + std::lock_guard lock(no_users_thread_wakeup_mutex); + no_users_thread_wakeup = true; + no_users_thread_condition.notify_one(); + } } { diff --git a/dbms/src/Storages/LiveView/StorageLiveView.h b/dbms/src/Storages/LiveView/StorageLiveView.h index 710f4ec1602..3f1dffb898c 100644 --- a/dbms/src/Storages/LiveView/StorageLiveView.h +++ b/dbms/src/Storages/LiveView/StorageLiveView.h @@ -73,7 +73,7 @@ public: } /// No users thread mutex, predicate and wake up condition void startNoUsersThread(const UInt64 & timeout); - std::mutex no_users_thread_mutex; + std::mutex no_users_thread_wakeup_mutex; bool no_users_thread_wakeup = false; std::condition_variable no_users_thread_condition; /// Get blocks hash @@ -149,6 +149,8 @@ private: ASTPtr inner_query; Context & global_context; bool is_temporary = false; + /// Mutex to protect access to sample block + mutable std::mutex sample_block_lock; mutable Block sample_block; /// Mutex for the blocks and ready condition @@ -168,6 +170,7 @@ private: /// Background thread for temporary tables /// which drops this table if there are no users static void noUsersThread(std::shared_ptr storage, const UInt64 & timeout); + std::mutex no_users_thread_mutex; std::thread no_users_thread; std::atomic shutdown_called = false; std::atomic start_no_users_thread_called = false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a470cc97bee..af985c02927 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -802,7 +802,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) for (size_t i = 0; i < part_names_with_disks.size(); ++i) { - pool.schedule([&, i] + pool.scheduleOrThrowOnError([&, i] { const auto & part_name = part_names_with_disks[i].first; const auto part_disk_ptr = part_names_with_disks[i].second; @@ -1155,7 +1155,7 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts_to_re /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. for (const DataPartPtr & part : parts_to_remove) { - pool.schedule([&] + pool.scheduleOrThrowOnError([&] { LOG_DEBUG(log, "Removing part from filesystem " << part->name); part->remove(); @@ -2488,12 +2488,12 @@ void MergeTreeData::throwInsertIfNeeded() const MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart( const MergeTreePartInfo & part_info, MergeTreeData::DataPartState state, DataPartsLock & /*lock*/) { - auto committed_parts_range = getDataPartsStateRange(state); + auto current_state_parts_range = getDataPartsStateRange(state); /// The part can be covered only by the previous or the next one in data_parts. auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo{state, part_info}); - if (it != committed_parts_range.end()) + if (it != current_state_parts_range.end()) { if ((*it)->info == part_info) return *it; @@ -2501,7 +2501,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart( return *it; } - if (it != committed_parts_range.begin()) + if (it != current_state_parts_range.begin()) { --it; if ((*it)->info.contains(part_info)) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 56a18122f29..856354959f9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -331,7 +331,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( if (which.isTuple() && function->name == "tuple") { - const TupleBackend & tuple = get(value_field).toUnderType(); + const Tuple & tuple = get(value_field); const auto value_tuple_data_type = typeid_cast(value_type.get()); const ASTs & arguments = typeid_cast(*function->arguments).children; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h index e276d811cd9..196b4df9eda 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -206,7 +206,7 @@ public: size_t bloom_filter_hashes; /// Bloom filter seed. size_t seed; - /// Fucntion for selecting next token. + /// Function for selecting next token. std::unique_ptr token_extractor_func; }; diff --git a/dbms/src/Storages/MutationCommands.cpp b/dbms/src/Storages/MutationCommands.cpp index 2358bab6202..f8bc781f166 100644 --- a/dbms/src/Storages/MutationCommands.cpp +++ b/dbms/src/Storages/MutationCommands.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index bb4ccf8720e..44f2c466a5f 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 0eafafbd41e..d3796735a07 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -104,11 +104,12 @@ StoragePtr StorageFactory::get( } if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by || storage_def->sample_by || - (query.columns_list && query.columns_list->indices && !query.columns_list->indices->children.empty())) + storage_def->ttl_table || !columns.getColumnTTLs().empty() || + (query.columns_list && query.columns_list->indices && !query.columns_list->indices->children.empty())) && !endsWith(name, "MergeTree")) { throw Exception( - "Engine " + name + " doesn't support PARTITION BY, PRIMARY KEY, ORDER BY or SAMPLE BY clauses and skipping indices. " + "Engine " + name + " doesn't support PARTITION BY, PRIMARY KEY, ORDER BY, TTL or SAMPLE BY clauses and skipping indices. " "Currently only the MergeTree family of engines supports them", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index daffbbd149f..c752109e328 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -216,7 +216,7 @@ std::vector StorageMergeTree::prepar }; if (thread_pool) - thread_pool->schedule(job); + thread_pool->scheduleOrThrowOnError(job); else job(); } diff --git a/dbms/src/Storages/System/StorageSystemPartsBase.cpp b/dbms/src/Storages/System/StorageSystemPartsBase.cpp index aa6c61f202f..0cf5f5e7013 100644 --- a/dbms/src/Storages/System/StorageSystemPartsBase.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsBase.cpp @@ -77,7 +77,9 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const MutableColumnPtr database_column_mut = ColumnString::create(); for (const auto & database : databases) { - if (context.hasDatabaseAccessRights(database.first)) + /// Lazy database can not contain MergeTree tables + /// and it's unnecessary to load all tables of Lazy database just to filter all of them. + if (context.hasDatabaseAccessRights(database.first) && database.second->getEngineName() != "Lazy") database_column_mut->insert(database.first); } block_to_filter.insert(ColumnWithTypeAndName( @@ -101,10 +103,6 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const String database_name = (*database_column_)[i].get(); const DatabasePtr database = databases.at(database_name); - /// Lazy database can not contain MergeTree tables - if (database->getEngineName() == "Lazy") - continue; - offsets[i] = i ? offsets[i - 1] : 0; for (auto iterator = database->getIterator(context); iterator->isValid(); iterator->next()) { diff --git a/dbms/src/Storages/getStructureOfRemoteTable.cpp b/dbms/src/Storages/getStructureOfRemoteTable.cpp index 137abcea649..2b6924695bf 100644 --- a/dbms/src/Storages/getStructureOfRemoteTable.cpp +++ b/dbms/src/Storages/getStructureOfRemoteTable.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include diff --git a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 34f6ce64278..797cb677d6a 100644 --- a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,13 @@ using namespace DB; struct State { Context context{Context::createGlobal()}; - NamesAndTypesList columns{{"column", std::make_shared()}}; + NamesAndTypesList columns{ + {"column", std::make_shared()}, + {"apply_id", std::make_shared()}, + {"apply_type", std::make_shared()}, + {"apply_status", std::make_shared()}, + {"create_time", std::make_shared()}, + }; State() { @@ -85,5 +92,11 @@ TEST(TransformQueryForExternalDatabase, MultipleAndSubqueries) check("SELECT column FROM test.table WHERE toString(column) = '42' AND left(column, 10) = RIGHT(column, 10) AND column = 42", "SELECT \"column\" FROM \"test\".\"table\" WHERE (\"column\" = 42)", state().context, state().columns); - +} + +TEST(TransformQueryForExternalDatabase, Issue7245) +{ + check("select apply_id from test.table where apply_type = 2 and create_time > addDays(toDateTime('2019-01-01 01:02:03'),-7) and apply_status in (3,4)", + "SELECT \"apply_id\", \"apply_type\", \"apply_status\", \"create_time\" FROM \"test\".\"table\" WHERE (\"apply_type\" = 2) AND (\"create_time\" > '2018-12-25 01:02:03') AND (\"apply_status\" IN (3, 4))", + state().context, state().columns); } diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index b6e48836efa..aab240dc070 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -8,6 +9,8 @@ #include #include #include +#include +#include #include #include @@ -20,31 +23,64 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static void replaceConstFunction(IAST & node, const Context & context, const NamesAndTypesList & all_columns) +namespace { - for (size_t i = 0; i < node.children.size(); ++i) - { - auto child = node.children[i]; - if (auto * exp_list = child->as()) - replaceConstFunction(*exp_list, context, all_columns); - if (auto * function = child->as()) +class ReplacingConstantExpressionsMatcher +{ +public: + using Data = Block; + + static bool needChildVisit(ASTPtr &, const ASTPtr &) + { + return true; + } + + static void visit(ASTPtr & node, Block & block_with_constants) + { + if (!node->as()) + return; + + std::string name = node->getColumnName(); + if (block_with_constants.has(name)) { - NamesAndTypesList source_columns = all_columns; - ASTPtr query = function->ptr(); - auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_columns); - auto result_block = KeyCondition::getBlockWithConstants(query, syntax_result, context); - if (!result_block.has(child->getColumnName())) + auto result = block_with_constants.getByName(name); + if (!isColumnConst(*result.column)) return; - auto result_column = result_block.getByName(child->getColumnName()).column; + if (result.column->isNullAt(0)) + { + node = std::make_shared(Field()); + } + else if (isNumber(result.type)) + { + node = std::make_shared(assert_cast(*result.column).getField()); + } + else + { + /// Everything except numbers is put as string literal. This is important for Date, DateTime, UUID. - node.children[i] = std::make_shared((*result_column)[0]); + const IColumn & inner_column = assert_cast(*result.column).getDataColumn(); + + WriteBufferFromOwnString out; + result.type->serializeAsText(inner_column, 0, out, FormatSettings()); + node = std::make_shared(out.str()); + } } } +}; + +void replaceConstantExpressions(ASTPtr & node, const Context & context, const NamesAndTypesList & all_columns) +{ + auto syntax_result = SyntaxAnalyzer(context).analyze(node, all_columns); + Block block_with_constants = KeyCondition::getBlockWithConstants(node, syntax_result, context); + + InDepthNodeVisitor visitor(block_with_constants); + visitor.visit(node); } -static bool isCompatible(const IAST & node) + +bool isCompatible(const IAST & node) { if (const auto * function = node.as()) { @@ -99,6 +135,8 @@ static bool isCompatible(const IAST & node) return false; } +} + String transformQueryForExternalDatabase( const IAST & query, @@ -131,7 +169,8 @@ String transformQueryForExternalDatabase( ASTPtr original_where = clone_query->as().where(); if (original_where) { - replaceConstFunction(*original_where, context, available_columns); + replaceConstantExpressions(original_where, context, available_columns); + if (isCompatible(*original_where)) { select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where)); diff --git a/dbms/src/TableFunctions/TableFunctionMySQL.cpp b/dbms/src/TableFunctions/TableFunctionMySQL.cpp index 3cb9b8dea60..820a55c3a2c 100644 --- a/dbms/src/TableFunctions/TableFunctionMySQL.cpp +++ b/dbms/src/TableFunctions/TableFunctionMySQL.cpp @@ -18,9 +18,9 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/dbms/src/TableFunctions/TableFunctionValues.cpp b/dbms/src/TableFunctions/TableFunctionValues.cpp index 30a423a3384..d4ca0ff4211 100644 --- a/dbms/src/TableFunctions/TableFunctionValues.cpp +++ b/dbms/src/TableFunctions/TableFunctionValues.cpp @@ -44,7 +44,7 @@ static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args { const auto & [value_field, value_type_ptr] = evaluateConstantExpression(args[i], context); const DataTypes & value_types_tuple = typeid_cast(value_type_ptr.get())->getElements(); - const TupleBackend & value_tuple = value_field.safeGet().toUnderType(); + const Tuple & value_tuple = value_field.safeGet(); if (value_tuple.size() != sample_block.columns()) throw Exception("Values size should match with number of columns", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 28a41fc1f06..10743cd5acc 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -45,13 +45,29 @@ def remove_control_characters(s): s = re.sub(r"[\x00-\x08\x0b\x0e-\x1f\x7f]", "", s) return s -def run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file): - if ext == '.sql': - command = "{0} --send_logs_level={1} --testmode --multiquery < {2} > {3} 2> {4}".format(args.client_with_database, server_logs_level, case_file, stdout_file, stderr_file) - else: - command = "{} > {} 2> {}".format(case_file, stdout_file, stderr_file) - proc = Popen(command, shell = True) +def run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file): + + # print(client_options) + + params = { + 'client': args.client_with_database, + 'logs_level': server_logs_level, + 'options': client_options, + 'test': case_file, + 'stdout': stdout_file, + 'stderr': stderr_file, + } + + pattern = '{test} > {stdout} 2> {stderr}' + + if ext == '.sql': + pattern = "{client} --send_logs_level={logs_level} --testmode --multiquery {options} < " + pattern + + command = pattern.format(**params) + #print(command) + + proc = Popen(command, shell=True, env=os.environ) start_time = datetime.now() while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None: sleep(0.01) @@ -67,15 +83,18 @@ def run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr return proc, stdout, stderr + def need_retry(stderr): return any(msg in stderr for msg in MESSAGES_TO_RETRY) + def get_processlist(client_cmd): try: return subprocess.check_output("{} --query 'SHOW PROCESSLIST FORMAT Vertical'".format(client_cmd), shell=True) except: return "" # server seems dead + def get_stacktraces(server_pid): cmd = "gdb -q -ex 'set pagination off' -ex 'backtrace' -ex 'thread apply all backtrace' -ex 'detach' -ex 'quit' --pid {} 2>/dev/null".format(server_pid) try: @@ -83,6 +102,7 @@ def get_stacktraces(server_pid): except Exception as ex: return "Error occured while receiving stack traces {}".format(str(ex)) + def get_server_pid(server_tcp_port): cmd = "lsof -i tcp:{port} | grep '*:{port}'".format(port=server_tcp_port) try: @@ -95,17 +115,19 @@ def get_server_pid(server_tcp_port): except Exception as ex: return None + def colored(text, args, color=None, on_color=None, attrs=None): if termcolor and (sys.stdout.isatty() or args.force_color): return termcolor.colored(text, color, on_color, attrs) else: return text + SERVER_DIED = False exit_code = 0 -#def run_tests_array(all_tests, suite, suite_dir, suite_tmp_dir, run_total): +# def run_tests_array(all_tests, suite, suite_dir, suite_tmp_dir, run_total): def run_tests_array(all_tests_with_params): all_tests, suite, suite_dir, suite_tmp_dir, run_total = all_tests_with_params global exit_code @@ -125,6 +147,8 @@ def run_tests_array(all_tests_with_params): failures = 0 failures_chain = 0 + client_options = get_additional_client_options(args) + if len(all_tests): print("\nRunning {} {} tests.".format(len(all_tests), suite) + "\n") @@ -170,7 +194,7 @@ def run_tests_array(all_tests_with_params): stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout' stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr' - proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file) + proc, stdout, stderr = run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file) if proc.returncode is None: try: proc.kill() @@ -183,7 +207,7 @@ def run_tests_array(all_tests_with_params): else: counter = 1 while proc.returncode != 0 and need_retry(stderr): - proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file) + proc, stdout, stderr = run_single_test(args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file) sleep(2**counter) counter += 1 if counter > 6: @@ -214,7 +238,7 @@ def run_tests_array(all_tests_with_params): result_is_different = subprocess.call(['diff', '-q', reference_file, stdout_file], stdout = PIPE) if result_is_different: - diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0] + diff = Popen(['diff', '-U', str(args.unified), reference_file, stdout_file], stdout = PIPE).communicate()[0] failures += 1 print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff)) else: @@ -245,8 +269,10 @@ def run_tests_array(all_tests_with_params): else: print(colored("\n{passed_total} tests passed. {skipped_total} tests skipped.".format(passed_total = passed_total, skipped_total = skipped_total), args, "green", attrs=["bold"])) + server_logs_level = "warning" + def main(args): global SERVER_DIED global exit_code @@ -435,6 +461,20 @@ def find_binary(name): return False +def get_additional_client_options(args): + if args.client_option: + return ' '.join('--' + option for option in args.client_option) + + return '' + + +def get_additional_client_options_url(args): + if args.client_option: + return '&'.join(args.client_option) + + return '' + + if __name__ == '__main__': parser=ArgumentParser(description='ClickHouse functional tests') parser.add_argument('-q', '--queries', help='Path to queries dir') @@ -456,6 +496,7 @@ if __name__ == '__main__': parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)') parser.add_argument('--parallel', default='1/1', help='One parallel test run number/total') parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel') + parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context') parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests') parser.add_argument('--no-stateful', action='store_true', help='Disable all stateful tests') @@ -467,6 +508,7 @@ if __name__ == '__main__': group=parser.add_mutually_exclusive_group(required=False) group.add_argument('--shard', action='store_true', default=None, dest='shard', help='Run sharding related tests (required to clickhouse-server listen 127.0.0.2 127.0.0.3)') group.add_argument('--no-shard', action='store_false', default=None, dest='shard', help='Do not run shard related tests') + group.add_argument('--client-option', nargs='+', help='Specify additional client argument') args = parser.parse_args() @@ -503,6 +545,24 @@ if __name__ == '__main__': if os.getenv("CLICKHOUSE_DATABASE"): args.client += ' --database=' + os.getenv("CLICKHOUSE_DATABASE") + if args.client_option: + # Set options for client + if 'CLICKHOUSE_CLIENT_OPT' in os.environ: + os.environ['CLICKHOUSE_CLIENT_OPT'] += ' ' + else: + os.environ['CLICKHOUSE_CLIENT_OPT'] = '' + + os.environ['CLICKHOUSE_CLIENT_OPT'] += get_additional_client_options(args) + + # Set options for curl + if 'CLICKHOUSE_URL_PARAMS' in os.environ: + os.environ['CLICKHOUSE_URL_PARAMS'] += '&' + else: + os.environ['CLICKHOUSE_URL_PARAMS'] = '' + + os.environ['CLICKHOUSE_URL_PARAMS'] += get_additional_client_options_url(args) + + args.client_with_database = args.client if not args.database: def random_str(length=6): @@ -520,6 +580,6 @@ if __name__ == '__main__': args.extract_from_config = args.binary + ' extract-from-config' if args.jobs is None: - args.jobs=multiprocessing.cpu_count() + args.jobs = multiprocessing.cpu_count() main(args) diff --git a/dbms/tests/performance/date_time.xml b/dbms/tests/performance/date_time.xml index b62cde40860..77a6c634b34 100644 --- a/dbms/tests/performance/date_time.xml +++ b/dbms/tests/performance/date_time.xml @@ -54,6 +54,11 @@ toYYYYMM toYYYYMMDD toYYYYMMDDhhmmss + + timeSlot + toRelativeQuarterNum + toStartOfTenMinutes + toUnixTimestamp @@ -70,6 +75,7 @@ toDate toMonday + toStartOfDay toStartOfMonth toStartOfQuarter toStartOfYear @@ -83,18 +89,55 @@ toYYYYMM toYYYYMMDD toYYYYMMDDhhmmss + + toRelativeQuarterNum + toUnixTimestamp - - - time_zone - + + + time_zone + UTC Europe/Moscow Asia/Kolkata - - + + + + binary_function + + lessOrEquals + less + greater + greaterOrEquals + equals + notEquals + plus + minus + addDays + addHours + addMinutes + addMonths + addQuarters + addSeconds + addWeeks + addYears + subtractDays + subtractHours + subtractMinutes + subtractMonths + subtractQuarters + subtractSeconds + subtractWeeks + subtractYears + + SELECT count() FROM system.numbers WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) + SELECT count() FROM system.numbers WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) - + + SELECT count() FROM system.numbers WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + + SELECT count() FROM system.numbers WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) + \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00039_inserts_through_http.sh b/dbms/tests/queries/0_stateless/00039_inserts_through_http.sh index 0f37827d0be..947d3c8e30a 100755 --- a/dbms/tests/queries/0_stateless/00039_inserts_through_http.sh +++ b/dbms/tests/queries/0_stateless/00039_inserts_through_http.sh @@ -7,6 +7,6 @@ echo 'DROP TABLE IF EXISTS long_insert' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_U echo 'CREATE TABLE long_insert (a String) ENGINE = Memory' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL} -d @- for string_size in 1 10 100 1000 10000 100000 1000000; do # LC_ALL=C is needed because otherwise Perl will bark on bad tuned environment. - LC_ALL=C perl -we 'for my $letter ("a" .. "z") { print(($letter x '$string_size') . "\n") }' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}?query=INSERT+INTO+long_insert+FORMAT+TabSeparated" --data-binary @- + LC_ALL=C perl -we 'for my $letter ("a" .. "z") { print(($letter x '$string_size') . "\n") }' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT+INTO+long_insert+FORMAT+TabSeparated" --data-binary @- echo 'SELECT substring(a, 1, 1) AS c, length(a) AS l FROM long_insert ORDER BY c, l' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL} -d @- done diff --git a/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh b/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh index 6960676ceb6..49e6bcb5ee1 100755 --- a/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh +++ b/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh @@ -3,12 +3,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=DROP+TABLE" -d 'IF EXISTS insert' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=CREATE" -d 'TABLE insert (x UInt8) ENGINE = Memory' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=DROP+TABLE" -d 'IF EXISTS insert' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=CREATE" -d 'TABLE insert (x UInt8) ENGINE = Memory' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'INSERT INTO insert VALUES (1),(2)' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=INSERT+INTO+insert+VALUES" -d '(3),(4)' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=INSERT+INTO+insert" -d 'VALUES (5),(6)' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=INSERT+INTO+insert+VALUES+(7)" -d ',(8)' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=INSERT+INTO+insert+VALUES+(9),(10)" -d ' ' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+insert+VALUES" -d '(3),(4)' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+insert" -d 'VALUES (5),(6)' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+insert+VALUES+(7)" -d ',(8)' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+insert+VALUES+(9),(10)" -d ' ' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT x FROM insert ORDER BY x' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=DROP+TABLE" -d 'insert' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=DROP+TABLE" -d 'insert' diff --git a/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh b/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh index e540d7e3475..c350ae98407 100755 --- a/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh +++ b/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL}?extremes=1 -d @- <<< "DROP TABLE IF EXISTS test_00210" -${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL}?extremes=1 -d @- <<< "CREATE TABLE test_00210 (x UInt8) ENGINE = Log" -${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL}?extremes=1 -d @- <<< "INSERT INTO test_00210 SELECT 1 AS x" -${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL}?extremes=1 -d @- <<< "DROP TABLE test_00210" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1" -d @- <<< "DROP TABLE IF EXISTS test_00210" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1" -d @- <<< "CREATE TABLE test_00210 (x UInt8) ENGINE = Log" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1" -d @- <<< "INSERT INTO test_00210 SELECT 1 AS x" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1" -d @- <<< "DROP TABLE test_00210" diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference index 7ca0f2fb7be..63686e2e352 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference @@ -1,3 +1,4 @@ +uniqHLL12 1 1 3 1 6 1 @@ -50,6 +51,7 @@ 31 54151 35 54328 36 52997 +uniqHLL12 round(float) 0.125 1 0.5 1 0.05 1 @@ -102,6 +104,7 @@ 0.043 54620 0.037 53394 0.071 53951 +uniqHLL12 round(toFloat32()) 0.5 1 0.05 1 0.25 1 @@ -154,6 +157,7 @@ 0.037 53394 0.1 54138 1 54571 +uniqHLL12 IPv4NumToString 1 1 3 1 6 1 @@ -206,7 +210,9 @@ 31 53574 35 55022 36 53961 +uniqHLL12 remote() 1 +uniqCombined 1 1 3 1 6 1 @@ -259,6 +265,7 @@ 31 53948 35 53931 36 53982 +uniqCombined(12) 1 1 3 1 6 1 @@ -311,6 +318,7 @@ 31 53763 35 54635 36 53155 +uniqCombined(17) 1 1 3 1 6 1 @@ -363,6 +371,7 @@ 31 53948 35 53931 36 53982 +uniqCombined(20) 1 1 3 1 6 1 @@ -415,6 +424,7 @@ 31 54054 35 54054 36 54054 +uniqCombined(round(float)) 0.125 1 0.5 1 0.05 1 @@ -467,6 +477,7 @@ 0.043 54150 0.037 54047 0.071 53963 +uniqCombined(12)(round(float)) 0.125 1 0.5 1 0.05 1 @@ -519,6 +530,7 @@ 0.043 53827 0.037 53920 0.071 53409 +uniqCombined(17)(round(float)) 0.125 1 0.5 1 0.05 1 @@ -571,6 +583,7 @@ 0.043 54150 0.037 54047 0.071 53963 +uniqCombined(20)(round(float)) 0.125 1 0.5 1 0.05 1 @@ -623,6 +636,7 @@ 0.043 54053 0.037 54053 0.071 54054 +uniqCombined(X)(round(toFloat32())) 0.5 1 0.05 1 0.25 1 @@ -675,6 +689,7 @@ 0.037 54047 0.1 53853 1 53901 +uniqCombined(12)(round(toFloat32())) 0.5 1 0.05 1 0.25 1 @@ -727,6 +742,7 @@ 0.037 53920 0.1 53417 1 54708 +uniqCombined(17)(round(toFloat32())) 0.5 1 0.05 1 0.25 1 @@ -779,6 +795,7 @@ 0.037 54047 0.1 53853 1 53901 +uniqCombined(20)(round(toFloat32())) 0.5 1 0.05 1 0.25 1 @@ -831,6 +848,7 @@ 0.037 54053 0.1 54053 1 54054 +uniqCombined(Z)(IPv4NumToString) 1 1 3 1 6 1 @@ -883,6 +901,7 @@ 31 54074 35 54153 36 53999 +uniqCombined(12)(IPv4NumToString) 1 1 3 1 6 1 @@ -897,24 +916,24 @@ 31 1 35 1 36 1 -0 162 +0 161 1 162 3 162 6 162 -7 163 -9 163 +7 164 +9 162 10 81 -11 163 -13 162 -14 162 -17 162 -19 162 -20 162 -21 162 -22 162 +11 160 +13 163 +14 161 +17 159 +19 165 +20 163 +21 163 +22 158 26 162 31 162 -35 162 +35 164 36 162 0 52613 1 54468 @@ -935,6 +954,7 @@ 31 55200 35 54808 36 53051 +uniqCombined(17)(IPv4NumToString) 1 1 3 1 6 1 @@ -987,6 +1007,7 @@ 31 54074 35 54153 36 53999 +uniqCombined(20)(IPv4NumToString) 1 1 3 1 6 1 @@ -1020,25 +1041,26 @@ 31 162 35 162 36 162 -0 54054 -1 54054 -3 54054 -6 54054 -7 54054 -9 54054 +0 54038 +1 54104 +3 54033 +6 54084 +7 54081 +9 54093 10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54054 -22 54054 -26 54054 -31 54054 -35 54054 -36 54054 +11 54064 +13 54055 +14 54063 +17 54055 +19 53960 +20 54033 +21 53988 +22 54086 +26 54106 +31 54039 +35 54018 +36 54084 +uniqCombined remote() 1 1 1 diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql index ae54831b1af..afef71ae06d 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql @@ -1,93 +1,134 @@ -/* uniqHLL12 */ +-- uniqHLL12 + +SELECT 'uniqHLL12'; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqHLL12 round(float)'; + SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqHLL12 round(toFloat32())'; + SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqHLL12 IPv4NumToString'; + SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqHLL12 remote()'; + SELECT uniqHLL12(dummy) FROM remote('127.0.0.{2,3}', system.one); -/* uniqCombined */ +-- uniqCombined + +SELECT 'uniqCombined'; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(12)'; + SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(17)'; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(20)'; + SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(round(float))'; + SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(12)(round(float))'; + SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(17)(round(float))'; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(20)(round(float))'; + SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(X)(round(toFloat32()))'; + SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(12)(round(toFloat32()))'; + SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(17)(round(toFloat32()))'; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(20)(round(toFloat32()))'; + SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(Z)(IPv4NumToString)'; + SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(12)(IPv4NumToString)'; + SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(17)(IPv4NumToString)'; + SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(20)(IPv4NumToString)'; + SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined remote()'; + SELECT uniqCombined(dummy) FROM remote('127.0.0.{2,3}', system.one); - SELECT uniqCombined(12)(dummy) FROM remote('127.0.0.{2,3}', system.one); - SELECT uniqCombined(17)(dummy) FROM remote('127.0.0.{2,3}', system.one); - SELECT uniqCombined(20)(dummy) FROM remote('127.0.0.{2,3}', system.one); diff --git a/dbms/tests/queries/0_stateless/00265_content_type.sh b/dbms/tests/queries/0_stateless/00265_content_type.sh index dfef65de901..feddb46a6a4 100755 --- a/dbms/tests/queries/0_stateless/00265_content_type.sh +++ b/dbms/tests/queries/0_stateless/00265_content_type.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL}?default_format=JSONCompact --data-binary @- <<< "SELECT 1" 2>&1 | grep '< Content-Type'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&default_format=JSONCompact" --data-binary @- <<< "SELECT 1" 2>&1 | grep '< Content-Type'; ${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT JSON" 2>&1 | grep '< Content-Type'; ${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1" 2>&1 | grep '< Content-Type'; ${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1 FORMAT TabSeparated" 2>&1 | grep '< Content-Type'; diff --git a/dbms/tests/queries/0_stateless/00301_csv.reference b/dbms/tests/queries/0_stateless/00301_csv.reference index 92cb50c0727..a9351f91f70 100644 --- a/dbms/tests/queries/0_stateless/00301_csv.reference +++ b/dbms/tests/queries/0_stateless/00301_csv.reference @@ -4,10 +4,6 @@ Hello "world" 789 2016-01-03 Hello\n world 100 2016-01-04 default 1 2019-06-19 default-eof 1 2019-06-19 -0 1 42 2019-07-22 -1 world 3 2019-07-23 -2 Hello 123 2019-06-19 -3 Hello 42 2019-06-19 2016-01-01 01:02:03 1 2016-01-02 01:02:03 2 2017-08-15 13:15:01 3 diff --git a/dbms/tests/queries/0_stateless/00301_csv.sh b/dbms/tests/queries/0_stateless/00301_csv.sh index c1bb6710c1f..cb0167b4e99 100755 --- a/dbms/tests/queries/0_stateless/00301_csv.sh +++ b/dbms/tests/queries/0_stateless/00301_csv.sh @@ -17,17 +17,6 @@ Hello "world", 789 ,2016-01-03 $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; - -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (i Int8, s String DEFAULT 'Hello', n UInt64 DEFAULT 42, d Date DEFAULT '2019-06-19') ENGINE = Memory"; -echo '\N, 1, \N, "2019-07-22" -1, world, 3, "2019-07-23" -2, \N, 123, \N -3, \N, \N, \N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO csv FORMAT CSV"; - -$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY i"; -$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; - - $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Europe/Moscow'), s String) ENGINE = Memory"; echo '"2016-01-01 01:02:03","1" diff --git a/dbms/tests/queries/0_stateless/00302_http_compression.sh b/dbms/tests/queries/0_stateless/00302_http_compression.sh index 399fe27ea9b..99921d114cd 100755 --- a/dbms/tests/queries/0_stateless/00302_http_compression.sh +++ b/dbms/tests/queries/0_stateless/00302_http_compression.sh @@ -3,33 +3,29 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10'; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=0" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10'; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip, deflate' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: zip, eflate' -d 'SELECT number FROM system.numbers LIMIT 10'; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: br' -d 'SELECT number FROM system.numbers LIMIT 10' | brotli -d; - -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10'; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?enable_http_compression=0" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10'; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: gzip, deflate' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: zip, eflate' -d 'SELECT number FROM system.numbers LIMIT 10'; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: br' -d 'SELECT number FROM system.numbers LIMIT 10' | brotli -d; - -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: deflate' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: gzip, deflate' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: zip, eflate' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: br' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: deflate' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip, deflate' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: zip, eflate' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: br' -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep --text '< Content-Encoding'; echo "SELECT 1" | ${CLICKHOUSE_CURL} -sS --data-binary @- ${CLICKHOUSE_URL}; -echo "SELECT 1" | gzip -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: gzip' ${CLICKHOUSE_URL}; -echo "SELECT 1" | brotli | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: br' ${CLICKHOUSE_URL}; +echo "SELECT 1" | gzip -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}"; +echo "SELECT 1" | brotli | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: br' "${CLICKHOUSE_URL}"; -echo "'Hello, world'" | ${CLICKHOUSE_CURL} -sS --data-binary @- "${CLICKHOUSE_URL}?query=SELECT"; -echo "'Hello, world'" | gzip -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}?query=SELECT"; -echo "'Hello, world'" | brotli | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: br' "${CLICKHOUSE_URL}?query=SELECT"; +echo "'Hello, world'" | ${CLICKHOUSE_CURL} -sS --data-binary @- "${CLICKHOUSE_URL}&query=SELECT"; +echo "'Hello, world'" | gzip -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}&query=SELECT"; +echo "'Hello, world'" | brotli | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: br' "${CLICKHOUSE_URL}&query=SELECT"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' | wc -c; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' | wc -c; # POST multiple concatenated gzip streams. -(echo -n "SELECT 'Part1" | gzip -c; echo " Part2'" | gzip -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}?" --data-binary @- +(echo -n "SELECT 'Part1" | gzip -c; echo " Part2'" | gzip -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}" --data-binary @- diff --git a/dbms/tests/queries/0_stateless/00304_http_external_data.sh b/dbms/tests/queries/0_stateless/00304_http_external_data.sh index 0914094f8bd..30752771753 100755 --- a/dbms/tests/queries/0_stateless/00304_http_external_data.sh +++ b/dbms/tests/queries/0_stateless/00304_http_external_data.sh @@ -3,5 +3,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -echo -ne '1,Hello\n2,World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}?query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String"; -echo -ne '1@Hello\n2@World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}?query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String&format_csv_delimiter=@"; +echo -ne '1,Hello\n2,World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String"; +echo -ne '1@Hello\n2@World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String&format_csv_delimiter=@"; diff --git a/dbms/tests/queries/0_stateless/00305_http_and_readonly.sh b/dbms/tests/queries/0_stateless/00305_http_and_readonly.sh index 56ae8131a15..a86d828b901 100755 --- a/dbms/tests/queries/0_stateless/00305_http_and_readonly.sh +++ b/dbms/tests/queries/0_stateless/00305_http_and_readonly.sh @@ -4,20 +4,20 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh # POST permits everything. -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=SELECT+name,value,changed+FROM+system.settings+WHERE+name+IN+('readonly','max_rows_to_read')&max_rows_to_read=10000&default_format=PrettySpaceNoEscapes" -d' ' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+name,value,changed+FROM+system.settings+WHERE+name+IN+('readonly','max_rows_to_read')&max_rows_to_read=10000&default_format=PrettySpaceNoEscapes" -d' ' # GET implies readonly = 2. -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=SELECT+name,value,changed+FROM+system.settings+WHERE+name+IN+('readonly','max_rows_to_read')&max_rows_to_read=10000&default_format=PrettySpaceNoEscapes" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+name,value,changed+FROM+system.settings+WHERE+name+IN+('readonly','max_rows_to_read')&max_rows_to_read=10000&default_format=PrettySpaceNoEscapes" # It is possible to simultaneously set more strict variant of readonly and specify some other settings. -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=SELECT+name,value,changed+FROM+system.settings+WHERE+name+IN+('readonly','max_rows_to_read')&readonly=1&max_rows_to_read=10000&default_format=PrettySpaceNoEscapes" -d' ' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=SELECT+name,value,changed+FROM+system.settings+WHERE+name+IN+('readonly','max_rows_to_read')&readonly=2&max_rows_to_read=10000&default_format=PrettySpaceNoEscapes" -d' ' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+name,value,changed+FROM+system.settings+WHERE+name+IN+('readonly','max_rows_to_read')&readonly=1&max_rows_to_read=10000&default_format=PrettySpaceNoEscapes" -d' ' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+name,value,changed+FROM+system.settings+WHERE+name+IN+('readonly','max_rows_to_read')&readonly=2&max_rows_to_read=10000&default_format=PrettySpaceNoEscapes" -d' ' -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?query=DROP+TABLE+IF+EXISTS+nonexistent" 2>&1 | grep -q '500 Internal Server Error' && echo 'Ok' || echo 'Fail' -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?readonly=0&query=DROP+TABLE+IF+EXISTS+nonexistent" 2>&1 | grep -q '500 Internal Server Error' && echo 'Ok' || echo 'Fail' +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&query=DROP+TABLE+IF+EXISTS+nonexistent" 2>&1 | grep -q '500 Internal Server Error' && echo 'Ok' || echo 'Fail' +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&readonly=0&query=DROP+TABLE+IF+EXISTS+nonexistent" 2>&1 | grep -q '500 Internal Server Error' && echo 'Ok' || echo 'Fail' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=DROP+TABLE+IF+EXISTS+nonexistent" -d ' ' | wc -l -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?readonly=0&query=DROP+TABLE+IF+EXISTS+nonexistent" -d ' ' | wc -l +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=DROP+TABLE+IF+EXISTS+nonexistent" -d ' ' | wc -l +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&readonly=0&query=DROP+TABLE+IF+EXISTS+nonexistent" -d ' ' | wc -l -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?readonly=1&query=DROP+TABLE+IF+EXISTS+nonexistent" -d ' ' 2>&1 | grep -q '500 Internal Server Error' && echo 'Ok' || echo 'Fail' -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?readonly=2&query=DROP+TABLE+IF+EXISTS+nonexistent" -d ' ' 2>&1 | grep -q '500 Internal Server Error' && echo 'Ok' || echo 'Fail' +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&readonly=1&query=DROP+TABLE+IF+EXISTS+nonexistent" -d ' ' 2>&1 | grep -q '500 Internal Server Error' && echo 'Ok' || echo 'Fail' +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&readonly=2&query=DROP+TABLE+IF+EXISTS+nonexistent" -d ' ' 2>&1 | grep -q '500 Internal Server Error' && echo 'Ok' || echo 'Fail' diff --git a/dbms/tests/queries/0_stateless/00313_const_totals_extremes.sh b/dbms/tests/queries/0_stateless/00313_const_totals_extremes.sh index 82e5cdd3c1f..c88b97394b7 100755 --- a/dbms/tests/queries/0_stateless/00313_const_totals_extremes.sh +++ b/dbms/tests/queries/0_stateless/00313_const_totals_extremes.sh @@ -3,10 +3,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?extremes=1&output_format_write_statistics=0" -d "SELECT 1 AS k, count() GROUP BY k WITH TOTALS"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?extremes=1&output_format_write_statistics=0" -d "SELECT 1234567890123 AS k, count() GROUP BY k WITH TOTALS FORMAT JSON"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?extremes=1&output_format_write_statistics=0" -d "SELECT toFloat32(1.23) AS k, count() GROUP BY k WITH TOTALS FORMAT JSONCompact"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1 AS k, count() GROUP BY k WITH TOTALS"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1234567890123 AS k, count() GROUP BY k WITH TOTALS FORMAT JSON"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toFloat32(1.23) AS k, count() GROUP BY k WITH TOTALS FORMAT JSONCompact"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?extremes=1&output_format_write_statistics=0" -d "SELECT toDate('2010-01-01') AS k, count() GROUP BY k WITH TOTALS"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?extremes=1&output_format_write_statistics=0" -d "SELECT toDateTime('2010-01-01 01:02:03') AS k, count() GROUP BY k WITH TOTALS FORMAT JSON"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?extremes=1&output_format_write_statistics=0" -d "SELECT 1.1 AS k, count() GROUP BY k WITH TOTALS FORMAT JSONCompact"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDate('2010-01-01') AS k, count() GROUP BY k WITH TOTALS"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT toDateTime('2010-01-01 01:02:03') AS k, count() GROUP BY k WITH TOTALS FORMAT JSON"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1&output_format_write_statistics=0" -d "SELECT 1.1 AS k, count() GROUP BY k WITH TOTALS FORMAT JSONCompact"; diff --git a/dbms/tests/queries/0_stateless/00322_disable_checksumming.sh b/dbms/tests/queries/0_stateless/00322_disable_checksumming.sh index f553ea8118c..6d34caafe4a 100755 --- a/dbms/tests/queries/0_stateless/00322_disable_checksumming.sh +++ b/dbms/tests/queries/0_stateless/00322_disable_checksumming.sh @@ -3,5 +3,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -echo -ne '\x50\x74\x32\xf2\x59\xe9\x8a\xdb\x37\xc6\x4a\xa7\xfb\x22\xc4\x39''\x82\x13\x00\x00\x00\x09\x00\x00\x00''\x90SELECT 1\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?decompress=1" --data-binary @- -echo -ne 'xxxxxxxxxxxxxxxx''\x82\x13\x00\x00\x00\x09\x00\x00\x00''\x90SELECT 1\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- +echo -ne '\x50\x74\x32\xf2\x59\xe9\x8a\xdb\x37\xc6\x4a\xa7\xfb\x22\xc4\x39''\x82\x13\x00\x00\x00\x09\x00\x00\x00''\x90SELECT 1\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&decompress=1" --data-binary @- +echo -ne 'xxxxxxxxxxxxxxxx''\x82\x13\x00\x00\x00\x09\x00\x00\x00''\x90SELECT 1\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @- diff --git a/dbms/tests/queries/0_stateless/00335_bom.sh b/dbms/tests/queries/0_stateless/00335_bom.sh index aa1ef818026..a90f659b8ec 100755 --- a/dbms/tests/queries/0_stateless/00335_bom.sh +++ b/dbms/tests/queries/0_stateless/00335_bom.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo 'DROP TABLE IF EXISTS bom' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- echo 'CREATE TABLE bom (a UInt8, b UInt8, c UInt8) ENGINE = Memory' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- -echo -ne '1,2,3\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=INSERT+INTO+bom+FORMAT+CSV" --data-binary @- -echo -ne '\xEF\xBB\xBF4,5,6\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=INSERT+INTO+bom+FORMAT+CSV" --data-binary @- +echo -ne '1,2,3\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+bom+FORMAT+CSV" --data-binary @- +echo -ne '\xEF\xBB\xBF4,5,6\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+bom+FORMAT+CSV" --data-binary @- echo 'SELECT * FROM bom ORDER BY a' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- echo 'DROP TABLE bom' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- diff --git a/dbms/tests/queries/0_stateless/00336_shard_stack_trace.sh b/dbms/tests/queries/0_stateless/00336_shard_stack_trace.sh index a7f25be52be..73ce77c521c 100755 --- a/dbms/tests/queries/0_stateless/00336_shard_stack_trace.sh +++ b/dbms/tests/queries/0_stateless/00336_shard_stack_trace.sh @@ -4,8 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT a' | wc -l -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?stacktrace=0" -d 'SELECT a' | wc -l -[[ $(${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?stacktrace=1" -d 'SELECT a' | wc -l) -ge 3 ]] && echo 'Ok' || echo 'Fail' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&stacktrace=0" -d 'SELECT a' | wc -l +[[ $(${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&stacktrace=1" -d 'SELECT a' | wc -l) -ge 3 ]] && echo 'Ok' || echo 'Fail' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT intDiv(number, 0) FROM remote('127.0.0.{2,3}', system.numbers)" | wc -l $CLICKHOUSE_CLIENT --query="SELECT a" --server_logs_file=/dev/null 2>&1 | wc -l diff --git a/dbms/tests/queries/0_stateless/00366_multi_statements.sh b/dbms/tests/queries/0_stateless/00366_multi_statements.sh index 3223702ac0a..035d2daf89f 100755 --- a/dbms/tests/queries/0_stateless/00366_multi_statements.sh +++ b/dbms/tests/queries/0_stateless/00366_multi_statements.sh @@ -30,21 +30,21 @@ $CLICKHOUSE_CLIENT -n --query="SELECT * FROM t_00366" $CLICKHOUSE_CLIENT -n --query="INSERT INTO t_00366 VALUES" <<< "(4),(5),(6)" $CLICKHOUSE_CLIENT -n --query="SELECT * FROM t_00366" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "SELECT 1" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "SELECT 1;" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "SELECT 1; " -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "SELECT 1 ; " +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1;" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1; " +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1 ; " -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "SELECT 1; S" 2>&1 | grep -o 'Syntax error' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "SELECT 1; SELECT 2" 2>&1 | grep -o 'Syntax error' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "SELECT 1; SELECT 2;" 2>&1 | grep -o 'Syntax error' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1; S" 2>&1 | grep -o 'Syntax error' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1; SELECT 2" 2>&1 | grep -o 'Syntax error' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1; SELECT 2;" 2>&1 | grep -o 'Syntax error' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" -d "INSERT INTO t_00366 VALUES (1),(2),(3);" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "INSERT INTO t_00366 VALUES (1),(2),(3);" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT" -d "INTO t_00366 VALUES (4),(5),(6);" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT" -d "INTO t_00366 VALUES (4),(5),(6);" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT+INTO+t_00366+VALUES" -d "(7),(8),(9)" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+t_00366+VALUES" -d "(7),(8),(9)" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" $CLICKHOUSE_CLIENT -n --query="DROP TABLE t_00366;" diff --git a/dbms/tests/queries/0_stateless/00372_cors_header.sh b/dbms/tests/queries/0_stateless/00372_cors_header.sh index 3498b351e50..2fe1f5df3d8 100755 --- a/dbms/tests/queries/0_stateless/00372_cors_header.sh +++ b/dbms/tests/queries/0_stateless/00372_cors_header.sh @@ -3,6 +3,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?add_http_cors_header=1" -H "Origin:smi2.ru" --data-binary @- <<< "SELECT 1" 2>&1 | grep -F "< Access-Control-Allow-Origin: *" | wc -l -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?add_http_cors_header=0" -H "Origin:smi2.ru" --data-binary @- <<< "SELECT 1" 2>&1 | grep -F "< Access-Control-Allow-Origin: *" | wc -l -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?add_http_cors_header=1" --data-binary @- <<< "SELECT 1" 2>&1 | grep -F "< Access-Control-Allow-Origin: *" | wc -l +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&add_http_cors_header=1" -H "Origin:smi2.ru" --data-binary @- <<< "SELECT 1" 2>&1 | grep -F "< Access-Control-Allow-Origin: *" | wc -l +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&add_http_cors_header=0" -H "Origin:smi2.ru" --data-binary @- <<< "SELECT 1" 2>&1 | grep -F "< Access-Control-Allow-Origin: *" | wc -l +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&add_http_cors_header=1" --data-binary @- <<< "SELECT 1" 2>&1 | grep -F "< Access-Control-Allow-Origin: *" | wc -l diff --git a/dbms/tests/queries/0_stateless/00379_system_processes_port.sh b/dbms/tests/queries/0_stateless/00379_system_processes_port.sh index 0c46cccb8ae..c359f1bb1d0 100755 --- a/dbms/tests/queries/0_stateless/00379_system_processes_port.sh +++ b/dbms/tests/queries/0_stateless/00379_system_processes_port.sh @@ -4,4 +4,4 @@ set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -sS --local-port 1390 "${CLICKHOUSE_URL}?query_id=my_id&query=SELECT+port+FROM+system.processes+WHERE+query_id%3D%27my_id%27+ORDER+BY+elapsed+LIMIT+1" +${CLICKHOUSE_CURL} -sS --local-port 1390 "${CLICKHOUSE_URL}&query_id=my_id&query=SELECT+port+FROM+system.processes+WHERE+query_id%3D%27my_id%27+ORDER+BY+elapsed+LIMIT+1" diff --git a/dbms/tests/queries/0_stateless/00386_long_in_pk.python b/dbms/tests/queries/0_stateless/00386_long_in_pk.python index 247e0fe1e61..f189233d299 100644 --- a/dbms/tests/queries/0_stateless/00386_long_in_pk.python +++ b/dbms/tests/queries/0_stateless/00386_long_in_pk.python @@ -40,7 +40,7 @@ import requests import os def main(): - url = os.environ['CLICKHOUSE_URL_PARAMS'] + url = os.environ['CLICKHOUSE_URL'] for q in gen_queries(): resp = requests.post(url, data=q) diff --git a/dbms/tests/queries/0_stateless/00408_http_keep_alive.sh b/dbms/tests/queries/0_stateless/00408_http_keep_alive.sh index d4f63522800..79e39d83704 100755 --- a/dbms/tests/queries/0_stateless/00408_http_keep_alive.sh +++ b/dbms/tests/queries/0_stateless/00408_http_keep_alive.sh @@ -3,9 +3,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< "SELECT 1" 2>&1 | perl -lnE 'print if /Keep-Alive/'; -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL} --data-binary @- <<< " error here " 2>&1 | perl -lnE 'print if /Keep-Alive/'; -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL}ping 2>&1 | perl -lnE 'print if /Keep-Alive/'; +URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/" + +${CLICKHOUSE_CURL} -vsS ${URL} --data-binary @- <<< "SELECT 1" 2>&1 | perl -lnE 'print if /Keep-Alive/'; +${CLICKHOUSE_CURL} -vsS ${URL} --data-binary @- <<< " error here " 2>&1 | perl -lnE 'print if /Keep-Alive/'; +${CLICKHOUSE_CURL} -vsS ${URL}ping 2>&1 | perl -lnE 'print if /Keep-Alive/'; # no keep-alive: -${CLICKHOUSE_CURL} -vsS ${CLICKHOUSE_URL}404/not/found/ 2>&1 | perl -lnE 'print if /Keep-Alive/'; +${CLICKHOUSE_CURL} -vsS ${URL}404/not/found/ 2>&1 | perl -lnE 'print if /Keep-Alive/'; diff --git a/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index c86154a8402..6b331288749 100755 --- a/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -3,20 +3,20 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' # This test will fail with external poco (progress not supported) -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d # 'send_progress_in_http_headers' is false by default -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&http_headers_progress_interval_ms=0" -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -q 'X-ClickHouse-Progress' && echo 'Fail' || true +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0" -d 'SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -q 'X-ClickHouse-Progress' && echo 'Fail' || true # have header? -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 1' 2>&1 | grep -q "Content-Encoding: gzip" && true || echo 'Fail' +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 1' 2>&1 | grep -q "Content-Encoding: gzip" && true || echo 'Fail' # nothing in body = no gzip -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' 2>&1 | grep -q 'Content-Encoding: gzip' && echo 'Fail' || true +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' 2>&1 | grep -q 'Content-Encoding: gzip' && echo 'Fail' || true # test insertion stats @@ -26,7 +26,7 @@ ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query (record UInt32) Engine = Memory' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query_2 (record UInt32) Engine = Memory' > /dev/null 2>&1 -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]' +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]' ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query2' > /dev/null 2>&1 diff --git a/dbms/tests/queries/0_stateless/00425_count_nullable.reference b/dbms/tests/queries/0_stateless/00425_count_nullable.reference index 45d1e2fecc2..7d9a1ebd64b 100644 --- a/dbms/tests/queries/0_stateless/00425_count_nullable.reference +++ b/dbms/tests/queries/0_stateless/00425_count_nullable.reference @@ -1,12 +1,5 @@ 10 10 10 -10 -10 -10 -10 9 -9 -9 -8 0 diff --git a/dbms/tests/queries/0_stateless/00425_count_nullable.sql b/dbms/tests/queries/0_stateless/00425_count_nullable.sql index b90fc5e0f2f..e0fb491e488 100644 --- a/dbms/tests/queries/0_stateless/00425_count_nullable.sql +++ b/dbms/tests/queries/0_stateless/00425_count_nullable.sql @@ -1,16 +1,9 @@ SELECT count() FROM (SELECT number AS x FROM system.numbers LIMIT 10); SELECT count(x) FROM (SELECT number AS x FROM system.numbers LIMIT 10); -SELECT count(x, y) FROM (SELECT number AS x, number AS y FROM system.numbers LIMIT 10); SELECT count(x) FROM (SELECT CAST(number AS Nullable(UInt64)) AS x FROM system.numbers LIMIT 10); -SELECT count(x, y) FROM (SELECT CAST(number AS Nullable(UInt64)) AS x, number AS y FROM system.numbers LIMIT 10); -SELECT count(x, y) FROM (SELECT number AS x, CAST(number AS Nullable(UInt64)) AS y FROM system.numbers LIMIT 10); -SELECT count(x, y) FROM (SELECT CAST(number AS Nullable(UInt64)) AS x, CAST(number AS Nullable(UInt64)) AS y FROM system.numbers LIMIT 10); SELECT count(x) FROM (SELECT nullIf(number, 5) AS x FROM system.numbers LIMIT 10); -SELECT count(x, y) FROM (SELECT nullIf(number, 5) AS x, number AS y FROM system.numbers LIMIT 10); -SELECT count(x, y) FROM (SELECT number AS x, nullIf(number, 3) AS y FROM system.numbers LIMIT 10); -SELECT count(x, y) FROM (SELECT nullIf(number, 5) AS x, nullIf(number, 3) AS y FROM system.numbers LIMIT 10); SELECT count(NULL); diff --git a/dbms/tests/queries/0_stateless/00429_long_http_bufferization.sh b/dbms/tests/queries/0_stateless/00429_long_http_bufferization.sh index a306e7959f5..bf50d79b4b3 100755 --- a/dbms/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/dbms/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -14,7 +14,7 @@ function query { } function ch_url() { - ${CLICKHOUSE_CURL_COMMAND} -sS "$URL?max_block_size=$max_block_size&$1" -d "`query $2`" + ${CLICKHOUSE_CURL_COMMAND} -sS "$URL&max_block_size=$max_block_size&$1" -d "`query $2`" } diff --git a/dbms/tests/queries/0_stateless/00474_readonly_settings.sh b/dbms/tests/queries/0_stateless/00474_readonly_settings.sh index 0b1f0c415bf..95b4d925112 100755 --- a/dbms/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/dbms/tests/queries/0_stateless/00474_readonly_settings.sh @@ -9,10 +9,10 @@ $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" -- $CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' $CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" | grep value -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" | grep value +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" | grep value +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" | grep value -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?session_id=readonly&session_timeout=3600" -d 'SET readonly = 1' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&session_timeout=3600" -d 'SET readonly = 1' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode.' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode.' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' diff --git a/dbms/tests/queries/0_stateless/00485_http_insert_format.sh b/dbms/tests/queries/0_stateless/00485_http_insert_format.sh index c06155c3a38..cf74bceeeb6 100755 --- a/dbms/tests/queries/0_stateless/00485_http_insert_format.sh +++ b/dbms/tests/queries/0_stateless/00485_http_insert_format.sh @@ -6,12 +6,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS format" $CLICKHOUSE_CLIENT --query="CREATE TABLE format (s String, x FixedString(3)) ENGINE = Memory" -echo -ne '\tABC\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT+INTO+format+FORMAT+TabSeparated" --data-binary @- -echo -ne 'INSERT INTO format FORMAT TabSeparated\n\tDEF\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" --data-binary @- -echo -ne 'INSERT INTO format FORMAT TabSeparated hello\tGHI\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" --data-binary @- -echo -ne 'INSERT INTO format FORMAT TabSeparated\r\n\tJKL\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" --data-binary @- -echo -ne 'INSERT INTO format FORMAT TabSeparated \t\r\n\tMNO\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" --data-binary @- -echo -ne 'INSERT INTO format FORMAT TabSeparated\t\t\thello\tPQR\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}" --data-binary @- +echo -ne '\tABC\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+format+FORMAT+TabSeparated" --data-binary @- +echo -ne 'INSERT INTO format FORMAT TabSeparated\n\tDEF\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- +echo -ne 'INSERT INTO format FORMAT TabSeparated hello\tGHI\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- +echo -ne 'INSERT INTO format FORMAT TabSeparated\r\n\tJKL\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- +echo -ne 'INSERT INTO format FORMAT TabSeparated \t\r\n\tMNO\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- +echo -ne 'INSERT INTO format FORMAT TabSeparated\t\t\thello\tPQR\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- $CLICKHOUSE_CLIENT --query="SELECT * FROM format ORDER BY s, x FORMAT JSONEachRow" $CLICKHOUSE_CLIENT --query="DROP TABLE format" diff --git a/dbms/tests/queries/0_stateless/00501_http_head.sh b/dbms/tests/queries/0_stateless/00501_http_head.sh index c5e2135a91d..e235da3c192 100755 --- a/dbms/tests/queries/0_stateless/00501_http_head.sh +++ b/dbms/tests/queries/0_stateless/00501_http_head.sh @@ -3,12 +3,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh +( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=SELECT%201"; + ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "X-ClickHouse-Query-Id:" -( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}?query=SELECT%201"; - ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}?query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "X-ClickHouse-Query-Id:" - -if [[ `${CLICKHOUSE_CURL} -sS -X POST -I "${CLICKHOUSE_URL}?query=SELECT+1" | grep -c '411 Length Required'` -ne 1 ]]; then +if [[ `${CLICKHOUSE_CURL} -sS -X POST -I "${CLICKHOUSE_URL}&query=SELECT+1" | grep -c '411 Length Required'` -ne 1 ]]; then echo FAIL fi diff --git a/dbms/tests/queries/0_stateless/00564_enum_order.sh b/dbms/tests/queries/0_stateless/00564_enum_order.sh index 0424cc6a959..7cfe140a648 100755 --- a/dbms/tests/queries/0_stateless/00564_enum_order.sh +++ b/dbms/tests/queries/0_stateless/00564_enum_order.sh @@ -3,8 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL_PARAMS" -d "DROP TABLE IF EXISTS enum"; -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL_PARAMS" -d "CREATE TABLE enum (x Enum8('a' = 1, 'bcdefghijklmno' = 0)) ENGINE = Memory"; -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL_PARAMS" -d "INSERT INTO enum VALUES ('a')"; -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL_PARAMS" -d "SELECT * FROM enum"; -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL_PARAMS" -d "DROP TABLE enum"; +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "DROP TABLE IF EXISTS enum"; +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "CREATE TABLE enum (x Enum8('a' = 1, 'bcdefghijklmno' = 0)) ENGINE = Memory"; +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "INSERT INTO enum VALUES ('a')"; +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT * FROM enum"; +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "DROP TABLE enum"; diff --git a/dbms/tests/queries/0_stateless/00565_enum_order.sh b/dbms/tests/queries/0_stateless/00565_enum_order.sh index c936f05dac5..29524d68e4c 100755 --- a/dbms/tests/queries/0_stateless/00565_enum_order.sh +++ b/dbms/tests/queries/0_stateless/00565_enum_order.sh @@ -42,7 +42,7 @@ QUERY='INSERT INTO `test_log`(`date`, `datetime`, `path`, `gtid`, `query_serial` `new_fields`.`is_null`, `record_source_type`, `record_source_timestamp`, `deleted`) FORMAT TabSeparated' QUERY="$(tr -d '\n' <<<"$QUERY")" echo $QUERY -URL=$(python -c 'print "'${CLICKHOUSE_URL_PARAMS}'&query=" + __import__("urllib").quote("'"$QUERY"'")') +URL=$(python -c 'print "'${CLICKHOUSE_URL}'&query=" + __import__("urllib").quote("'"$QUERY"'")') set +e for i in 1 2 3; do diff --git a/dbms/tests/queries/0_stateless/00598_create_as_select_http.sh b/dbms/tests/queries/0_stateless/00598_create_as_select_http.sh index 25761f31f25..0294739d4c1 100755 --- a/dbms/tests/queries/0_stateless/00598_create_as_select_http.sh +++ b/dbms/tests/queries/0_stateless/00598_create_as_select_http.sh @@ -6,6 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_00598" -$CLICKHOUSE_CURL -sS -d 'CREATE TABLE test_00598 ENGINE = Memory AS SELECT 1' $CLICKHOUSE_URL_PARAMS +$CLICKHOUSE_CURL -sS -d 'CREATE TABLE test_00598 ENGINE = Memory AS SELECT 1' $CLICKHOUSE_URL $CLICKHOUSE_CLIENT --query="SELECT * FROM test_00598" $CLICKHOUSE_CLIENT --query="DROP TABLE test_00598" diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh index 9fc25291548..465183b25e1 100755 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh @@ -12,11 +12,11 @@ function wait_for_query_to_start() } -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' 2>&1 > /dev/null & +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' 2>&1 > /dev/null & wait_for_query_to_start 'hello' # Replace it -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT 0' +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 0' # Wait for it to be replaced wait @@ -28,7 +28,7 @@ wait_for_query_to_start '42' ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' 2>&1 | grep -cF 'is already running by user' # Trying to replace query of a different user -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user' +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user' $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" > /dev/null wait diff --git a/dbms/tests/queries/0_stateless/00612_http_max_query_size.sh b/dbms/tests/queries/0_stateless/00612_http_max_query_size.sh index 8e38e52941e..baf31520f40 100755 --- a/dbms/tests/queries/0_stateless/00612_http_max_query_size.sh +++ b/dbms/tests/queries/0_stateless/00612_http_max_query_size.sh @@ -3,19 +3,19 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -echo 'select 1' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL}/?max_query_size=8 -d @- 2>&1 | grep -o "Max query size exceeded" +echo 'select 1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_query_size=8" -d @- 2>&1 | grep -o "Max query size exceeded" echo - -echo 'select 1' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL}/?max_query_size=7 -d @- 2>&1 | grep -o "Max query size exceeded" +echo 'select 1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_query_size=7" -d @- 2>&1 | grep -o "Max query size exceeded" -echo "select '1'" | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL}/?max_query_size=10 -d @- 2>&1 | grep -o "Max query size exceeded" +echo "select '1'" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_query_size=10" -d @- 2>&1 | grep -o "Max query size exceeded" echo - -echo "select '11'" | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL}/?max_query_size=10 -d @- 2>&1 | grep -o "Max query size exceeded" +echo "select '11'" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_query_size=10" -d @- 2>&1 | grep -o "Max query size exceeded" -echo 'drop table if exists tab_00612_1' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL_PARAMS} -d @- -echo 'create table tab_00612_1 (key UInt64, val UInt64) engine = MergeTree order by key' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL_PARAMS} -d @- -echo 'into tab_00612_1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL_PARAMS}&max_query_size=30&query=insert" -d @- -echo 'select val from tab_00612_1 order by val' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL_PARAMS} -d @- -echo 'drop table tab_00612_1' | ${CLICKHOUSE_CURL} -sSg ${CLICKHOUSE_URL_PARAMS} -d @- +echo 'drop table if exists tab_00612_1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +echo 'create table tab_00612_1 (key UInt64, val UInt64) engine = MergeTree order by key' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +echo 'into tab_00612_1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_query_size=30&query=insert" -d @- +echo 'select val from tab_00612_1 order by val' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +echo 'drop table tab_00612_1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- echo " import requests @@ -36,7 +36,7 @@ def gen_data(q): yield pattern.format(str(i).zfill(1024 - len(pattern) + 2)) s = requests.Session() -resp = s.post(url + '/?max_query_size={}'.format(1 << 21), timeout=1, data=gen_data(q), stream=True, +resp = s.post(url + '&max_query_size={}'.format(1 << 21), timeout=1, data=gen_data(q), stream=True, headers = {'Connection': 'close'}) for line in resp.iter_lines(): diff --git a/dbms/tests/queries/0_stateless/00625_query_in_form_data.sh b/dbms/tests/queries/0_stateless/00625_query_in_form_data.sh index 50e0c97e279..4bb41f4a2a2 100755 --- a/dbms/tests/queries/0_stateless/00625_query_in_form_data.sh +++ b/dbms/tests/queries/0_stateless/00625_query_in_form_data.sh @@ -4,6 +4,6 @@ set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?query=select" -X POST --form-string 'query= 1;' 2>/dev/null +${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&query=select" -X POST --form-string 'query= 1;' 2>/dev/null -echo -ne '1,Hello\n2,World\n' | ${CLICKHOUSE_CURL} -sS -F 'file=@-' "${CLICKHOUSE_URL}?file_format=CSV&file_types=UInt8,String&query=SELE" -X POST --form-string 'query=CT * FROM file' 2>/dev/null +echo -ne '1,Hello\n2,World\n' | ${CLICKHOUSE_CURL} -sS -F 'file=@-' "${CLICKHOUSE_URL}&file_format=CSV&file_types=UInt8,String&query=SELE" -X POST --form-string 'query=CT * FROM file' 2>/dev/null diff --git a/dbms/tests/queries/0_stateless/00719_insert_block_without_column.sh b/dbms/tests/queries/0_stateless/00719_insert_block_without_column.sh index 0cad7556674..4d5a83f22bc 100755 --- a/dbms/tests/queries/0_stateless/00719_insert_block_without_column.sh +++ b/dbms/tests/queries/0_stateless/00719_insert_block_without_column.sh @@ -16,6 +16,6 @@ ${CLICKHOUSE_CLIENT} --query "create table squashed_numbers (SomeID UInt64, Diff #port=${CLICKHOUSE_PORT_HTTP} #url="${CLICKHOUSE_PORT_HTTP_PROTO}://$address:$port/" -${CLICKHOUSE_CURL} -sS --data-binary "@${CLICKHOUSE_TMP}/test_squashing_block_without_column.out" "${CLICKHOUSE_URL_PARAMS}&query=insert%20into%20squashed_numbers%20format%20Native" +${CLICKHOUSE_CURL} -sS --data-binary "@${CLICKHOUSE_TMP}/test_squashing_block_without_column.out" "${CLICKHOUSE_URL}&query=insert%20into%20squashed_numbers%20format%20Native" ${CLICKHOUSE_CLIENT} --query "select 'Still alive'" diff --git a/dbms/tests/queries/0_stateless/00728_json_each_row_parsing.sh b/dbms/tests/queries/0_stateless/00728_json_each_row_parsing.sh index 69462d72aaa..da1cb47890c 100755 --- a/dbms/tests/queries/0_stateless/00728_json_each_row_parsing.sh +++ b/dbms/tests/queries/0_stateless/00728_json_each_row_parsing.sh @@ -9,7 +9,7 @@ cur_name=${BASH_SOURCE[0]} ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS json_parse;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE json_parse (aaa String, bbb String) ENGINE = Memory;" -for n in {1..1000000}; do echo '{"aaa":"aaa","bbb":"bbb"}'; done | curl -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT%20INTO%20json_parse%20FORMAT%20JSONEachRow" -0 --data-binary @- +for n in {1..1000000}; do echo '{"aaa":"aaa","bbb":"bbb"}'; done | curl -sS "${CLICKHOUSE_URL}&query=INSERT%20INTO%20json_parse%20FORMAT%20JSONEachRow" -0 --data-binary @- ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM json_parse;" ${CLICKHOUSE_CLIENT} --query="DROP TABLE json_parse;" diff --git a/dbms/tests/queries/0_stateless/00764_max_query_size_allocation.sh b/dbms/tests/queries/0_stateless/00764_max_query_size_allocation.sh index b8490970aa1..1dbd8837fe7 100755 --- a/dbms/tests/queries/0_stateless/00764_max_query_size_allocation.sh +++ b/dbms/tests/queries/0_stateless/00764_max_query_size_allocation.sh @@ -5,4 +5,4 @@ set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?max_query_size=1000000000&max_memory_usage=10000000" -d "SELECT 1" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_query_size=1000000000&max_memory_usage=10000000" -d "SELECT 1" diff --git a/dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference index e69de29bb2d..16db301bb51 100644 --- a/dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference +++ b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference @@ -0,0 +1,3 @@ +1 +0 +1 diff --git a/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql index 1fa525eaccc..8e4a4723608 100644 --- a/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql +++ b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql @@ -1,5 +1,5 @@ SET send_logs_level = 'none'; -SELECT globalNotIn(['"wh'], [NULL]); -- { serverError 53 } -SELECT globalIn([''], [NULL]); -- { serverError 53 } -SELECT notIn([['']], [[NULL]]); -- { serverError 53 } +SELECT globalNotIn(['"wh'], [NULL]); +SELECT globalIn([''], [NULL]); +SELECT notIn([['']], [[NULL]]); diff --git a/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh b/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh index 4d4a68ccfa7..ad1f341bd7a 100755 --- a/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh +++ b/dbms/tests/queries/0_stateless/00825_http_header_query_id.sh @@ -4,4 +4,4 @@ set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL_COMMAND} -I -sSg ${CLICKHOUSE_URL}?query=SELECT%201 | grep -o X-ClickHouse-Query-Id +${CLICKHOUSE_CURL_COMMAND} -I -sSg "${CLICKHOUSE_URL}&query=SELECT%201" | grep -o X-ClickHouse-Query-Id diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.reference b/dbms/tests/queries/0_stateless/00829_bitmap_function.reference index dd99ce850ba..4b60da9d9af 100644 --- a/dbms/tests/queries/0_stateless/00829_bitmap_function.reference +++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.reference @@ -29,16 +29,10 @@ 15 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 15 -15 -[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 3 -3 -[6,8,10] [6,8,10] 10 -10 -[1,3,5,6,8,10,11,13,14,15] [1,3,5,6,8,10,11,13,14,15] 0 0 diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql index 439347f7a32..6e6fbe012c2 100644 --- a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql +++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql @@ -120,19 +120,13 @@ SELECT groupBitmapMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag SELECT arraySort(bitmapToArray(groupBitmapMergeState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -SELECT groupBitmapOrMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -SELECT arraySort(bitmapToArray(groupBitmapOrMergeState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -SELECT groupBitmapAndMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -SELECT arraySort(bitmapToArray(groupBitmapAndMergeState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -SELECT groupBitmapXorMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -SELECT arraySort(bitmapToArray(groupBitmapXorMergeState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); DROP TABLE IF EXISTS bitmap_test; DROP TABLE IF EXISTS bitmap_state_test; diff --git a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh index 3ec5bcd7791..ee56d41f357 100755 --- a/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh +++ b/dbms/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -3,10 +3,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL_PARAMS}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' +${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' for i in {1..10} do - ${CLICKHOUSE_CURL} -sS --data "SELECT count() FROM system.processes WHERE query_id = 'cancel_http_readonly_queries_on_client_close'" "${CLICKHOUSE_URL_PARAMS}" | grep '0' && break + ${CLICKHOUSE_CURL} -sS --data "SELECT count() FROM system.processes WHERE query_id = 'cancel_http_readonly_queries_on_client_close'" "${CLICKHOUSE_URL}" | grep '0' && break sleep 0.2 done diff --git a/dbms/tests/queries/0_stateless/00850_global_join_dups.sql b/dbms/tests/queries/0_stateless/00850_global_join_dups.sql index 7f1333d8303..46ff7dad472 100644 --- a/dbms/tests/queries/0_stateless/00850_global_join_dups.sql +++ b/dbms/tests/queries/0_stateless/00850_global_join_dups.sql @@ -8,7 +8,6 @@ CREATE TABLE t2_00850 (dummy UInt8) ENGINE = Distributed(test_shard_localhost, c INSERT INTO t_local VALUES (1); -SET asterisk_left_columns_only = 1; SET joined_subquery_requires_alias = 0; SELECT * FROM t1_00850 @@ -34,9 +33,6 @@ GLOBAL INNER JOIN USING dummy ) USING dummy; - -SET asterisk_left_columns_only = 0; - SELECT * FROM remote('127.0.0.2', system.one) GLOBAL INNER JOIN ( diff --git a/dbms/tests/queries/0_stateless/00851_http_insert_json_defaults.sh b/dbms/tests/queries/0_stateless/00851_http_insert_json_defaults.sh index a1b71e97b0b..533de68a43e 100755 --- a/dbms/tests/queries/0_stateless/00851_http_insert_json_defaults.sh +++ b/dbms/tests/queries/0_stateless/00851_http_insert_json_defaults.sh @@ -6,10 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS defaults" $CLICKHOUSE_CLIENT --query="CREATE TABLE defaults (x UInt32, y UInt32, a DEFAULT x + y, b Float32 DEFAULT round(log(1 + x + y), 5), c UInt32 DEFAULT 42, e MATERIALIZED x + y, f ALIAS x + y) ENGINE = Memory" -echo -ne '{"x":1, "y":1}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT%20INTO%20defaults%20FORMAT%20JSONEachRow%20SETTINGS%20input_format_defaults_for_omitted_fields=1" --data-binary @- -echo -ne '{"x":2, "y":2, "c":2}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- -echo -ne '{"x":3, "y":3, "a":3, "b":3, "c":3}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?database=${CLICKHOUSE_DATABASE}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- -echo -ne '{"x":4} {"y":5, "c":5} {"a":6, "b":6, "c":6}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?database=${CLICKHOUSE_DATABASE}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- +echo -ne '{"x":1, "y":1}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT%20INTO%20defaults%20FORMAT%20JSONEachRow%20SETTINGS%20input_format_defaults_for_omitted_fields=1" --data-binary @- +echo -ne '{"x":2, "y":2, "c":2}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- +echo -ne '{"x":3, "y":3, "a":3, "b":3, "c":3}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database=${CLICKHOUSE_DATABASE}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- +echo -ne '{"x":4} {"y":5, "c":5} {"a":6, "b":6, "c":6}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database=${CLICKHOUSE_DATABASE}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- $CLICKHOUSE_CLIENT --query="SELECT * FROM defaults ORDER BY x, y FORMAT JSONEachRow" $CLICKHOUSE_CLIENT --query="DROP TABLE defaults" diff --git a/dbms/tests/queries/0_stateless/00926_multimatch.reference b/dbms/tests/queries/0_stateless/00926_multimatch.reference index 8e3a8ec4820..4a2320de57b 100644 --- a/dbms/tests/queries/0_stateless/00926_multimatch.reference +++ b/dbms/tests/queries/0_stateless/00926_multimatch.reference @@ -600,3 +600,26 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +All tests above must return 1, all tests below return something. +[] +[1,3] +[] +[1,2,3] diff --git a/dbms/tests/queries/0_stateless/00926_multimatch.sql b/dbms/tests/queries/0_stateless/00926_multimatch.sql index 797c59f52a5..d54e4fd2280 100644 --- a/dbms/tests/queries/0_stateless/00926_multimatch.sql +++ b/dbms/tests/queries/0_stateless/00926_multimatch.sql @@ -73,10 +73,20 @@ select 1 = multiMatchAny(materialize('abcdef'), ['a......', 'a.....']) from syst select 0 = multiMatchAny(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']) from system.numbers limit 10; select 1 = multiMatchAny(materialize('abc'), ['a\0d']) from system.numbers limit 10; -select 1 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google', 'yandex1']) from system.numbers limit 10;; -select 2 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google1', 'yandex']) from system.numbers limit 10;; -select 0 != multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10;; -select 5 = multiMatchAnyIndex(materialize('vladizlvav dabe don\'t heart me no more'), ['what', 'is', 'love', 'baby', 'no mo??', 'dont', 'h.rt me']) from system.numbers limit 10;; +select 1 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google', 'yandex1']) from system.numbers limit 10; +select 2 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google1', 'yandex']) from system.numbers limit 10; +select 0 != multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10; +select 5 = multiMatchAnyIndex(materialize('vladizlvav dabe don\'t heart me no more'), ['what', 'is', 'love', 'baby', 'no mo??', 'dont', 'h.rt me']) from system.numbers limit 10; SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); SELECT 1 = multiMatchAny('фабрикант', ['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']); + +-- All indices tests +SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*'])) from system.numbers limit 5; +SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['.*goo.*', 'neverexisted', '.*yan.*'])) from system.numbers limit 5; +SELECT [] = multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['neverexisted', 'anotherone', 'andanotherone']) from system.numbers limit 5; +SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]'])); +SELECT [1] = multiMatchAllIndices(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); +SELECT [] = multiMatchAllIndices(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']); +SELECT 'All tests above must return 1, all tests below return something.'; +SELECT arraySort(multiMatchAllIndices(arrayJoin(['aaaa', 'aaaaaa', 'bbbb', 'aaaaaaaaaaaaaa']), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}'])); diff --git a/dbms/tests/queries/0_stateless/00927_disable_hyperscan.sql b/dbms/tests/queries/0_stateless/00927_disable_hyperscan.sql index 1af9c129284..009ed2629a8 100644 --- a/dbms/tests/queries/0_stateless/00927_disable_hyperscan.sql +++ b/dbms/tests/queries/0_stateless/00927_disable_hyperscan.sql @@ -2,5 +2,6 @@ SET allow_hyperscan = 1; SELECT multiMatchAny(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), ['hel+o', 'w(or)*ld']); SET allow_hyperscan = 0; SELECT multiMatchAny(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), ['hel+o', 'w(or)*ld']); -- { serverError 446 } +SELECT multiMatchAllIndices(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), ['hel+o', 'w(or)*ld']); -- { serverError 446 } SELECT multiSearchAny(arrayJoin(['hello', 'world', 'hello, world', 'abc']), ['hello', 'world']); diff --git a/dbms/tests/queries/0_stateless/00929_multi_match_edit_distance.reference b/dbms/tests/queries/0_stateless/00929_multi_match_edit_distance.reference index 16ee5335538..4600557506b 100644 --- a/dbms/tests/queries/0_stateless/00929_multi_match_edit_distance.reference +++ b/dbms/tests/queries/0_stateless/00929_multi_match_edit_distance.reference @@ -30,3 +30,5 @@ 1 1 1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00929_multi_match_edit_distance.sql b/dbms/tests/queries/0_stateless/00929_multi_match_edit_distance.sql index 5cb75a8bc3f..48b31070204 100644 --- a/dbms/tests/queries/0_stateless/00929_multi_match_edit_distance.sql +++ b/dbms/tests/queries/0_stateless/00929_multi_match_edit_distance.sql @@ -24,3 +24,6 @@ select 1 = multiFuzzyMatchAny('string', 1, ['zorro$', '^tring', 'ip$', 'how.*', select 2 = multiFuzzyMatchAnyIndex('string', 1, ['zorro$', '^tring', 'ip$', 'how.*', 'it{2}', 'works']); select 2 = multiFuzzyMatchAnyIndex('halo some wrld', 2, ['^hello.*world$', '^halo.*world$']); select 1 = multiFuzzyMatchAnyIndex('halo some wrld', 2, ['^halo.*world$', '^hello.*world$']); + +select [2, 3, 4] = arraySort(multiFuzzyMatchAllIndices('halo some wrld', 2, ['some random string', '^halo.*world$', '^halo.*world$', '^halo.*world$', '^hallllo.*world$'])); +select [] = multiFuzzyMatchAllIndices('halo some wrld', 2, ['^halllllo.*world$', 'some random string']); diff --git a/dbms/tests/queries/0_stateless/00933_ttl_simple.sql b/dbms/tests/queries/0_stateless/00933_ttl_simple.sql index 11f0055a377..3a5cf465581 100644 --- a/dbms/tests/queries/0_stateless/00933_ttl_simple.sql +++ b/dbms/tests/queries/0_stateless/00933_ttl_simple.sql @@ -57,4 +57,7 @@ create table ttl_00933_1 (d DateTime, a Int ttl 2 + 2) engine = MergeTree order create table ttl_00933_1 (d DateTime, a Int ttl toDateTime(1)) engine = MergeTree order by tuple() partition by toSecond(d); -- { serverError 450 } create table ttl_00933_1 (d DateTime, a Int ttl d - d) engine = MergeTree order by tuple() partition by toSecond(d); -- { serverError 450 } +create table ttl_00933_1 (d DateTime, a Int ttl d + interval 1 day) engine = Log; -- { serverError 36 } +create table ttl_00933_1 (d DateTime, a Int) engine = Log ttl d + interval 1 day; -- { serverError 36 } + drop table if exists ttl_00933_1; diff --git a/dbms/tests/queries/0_stateless/00937_template_output_format.sh b/dbms/tests/queries/0_stateless/00937_template_output_format.sh index 239f7d672d0..0b0cba4e2bd 100755 --- a/dbms/tests/queries/0_stateless/00937_template_output_format.sh +++ b/dbms/tests/queries/0_stateless/00937_template_output_format.sh @@ -20,3 +20,4 @@ format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \ format_template_rows_between_delimiter = ';\n'"; $CLICKHOUSE_CLIENT --query="DROP TABLE template"; +rm $CURDIR/00937_template_output_format_resultset.tmp $CURDIR/00937_template_output_format_row.tmp diff --git a/dbms/tests/queries/0_stateless/00938_template_input_format.sh b/dbms/tests/queries/0_stateless/00938_template_input_format.sh index 998fe195203..ca314db13de 100755 --- a/dbms/tests/queries/0_stateless/00938_template_input_format.sh +++ b/dbms/tests/queries/0_stateless/00938_template_input_format.sh @@ -50,3 +50,4 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM template2 ORDER BY n FORMAT CSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE template1"; $CLICKHOUSE_CLIENT --query="DROP TABLE template2"; +rm $CURDIR/00938_template_input_format_resultset.tmp $CURDIR/00938_template_input_format_row.tmp diff --git a/dbms/tests/queries/0_stateless/00952_input_function.sh b/dbms/tests/queries/0_stateless/00952_input_function.sh index 085ae496faf..374cb5f9249 100755 --- a/dbms/tests/queries/0_stateless/00952_input_function.sh +++ b/dbms/tests/queries/0_stateless/00952_input_function.sh @@ -13,7 +13,7 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM input_function_table_1 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS input_function_table_2" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE input_function_table_2 (a String, b Date, c Int32, d Int16) ENGINE=Memory()" -cat ${CLICKHOUSE_TMP}/data_for_input_function.csv | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT%20INTO%20input_function_table_2%20%28a%2C%20b%2C%20c%29%20SELECT%20a%2C%20b%2C%20c%2Ac%20FROM%20input%28%27a%20String%2C%20b%20Int32%2C%20c%20Int32%27%29%20FORMAT%20CSV" --data-binary @- +cat ${CLICKHOUSE_TMP}/data_for_input_function.csv | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT%20INTO%20input_function_table_2%20%28a%2C%20b%2C%20c%29%20SELECT%20a%2C%20b%2C%20c%2Ac%20FROM%20input%28%27a%20String%2C%20b%20Int32%2C%20c%20Int32%27%29%20FORMAT%20CSV" --data-binary @- ${CLICKHOUSE_CLIENT} --query="SELECT * FROM input_function_table_2 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS input_function_table_3" @@ -23,7 +23,7 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM input_function_table_3 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS input_function_table_4" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE input_function_table_4 (a String, b Date, c Int32, d Int16) ENGINE=Memory()" -cat ${CLICKHOUSE_TMP}/data_for_input_function.csv | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT%20INTO%20input_function_table_4%20%28a%2C%20b%2C%20c%29%20SELECT%20%2A%20FROM%20%28SELECT%20s%2C%20b%2C%20c%2Ac%20FROM%20input%28%27s%20String%2C%20b%20Int32%2C%20c%20Int32%27%29%20JOIN%20input_function_table_1%20ON%20s%3Dinput_function_table_1.a%29%20FORMAT%20CSV" --data-binary @- +cat ${CLICKHOUSE_TMP}/data_for_input_function.csv | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT%20INTO%20input_function_table_4%20%28a%2C%20b%2C%20c%29%20SELECT%20%2A%20FROM%20%28SELECT%20s%2C%20b%2C%20c%2Ac%20FROM%20input%28%27s%20String%2C%20b%20Int32%2C%20c%20Int32%27%29%20JOIN%20input_function_table_1%20ON%20s%3Dinput_function_table_1.a%29%20FORMAT%20CSV" --data-binary @- ${CLICKHOUSE_CLIENT} --query="SELECT * FROM input_function_table_4 FORMAT CSV" @@ -35,7 +35,7 @@ ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM input_function_table_5 FORMAT ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS input_function_table_6" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE input_function_table_6 (a String, b Date, c Int32, d Int16) ENGINE=Memory()" -cat ${CLICKHOUSE_TMP}/data_for_input_function.csv | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT%20INTO%20input_function_table_6%20%28a%2C%20b%2C%20c%29%20SELECT%20a%2C%20b%2C%20c%2Ac%20FROM%20input%28%27a%20String%2C%20b%20Int32%2C%20c%20Int32%27%29%20FORMAT%20CSV&max_block_size=1000" --data-binary @- +cat ${CLICKHOUSE_TMP}/data_for_input_function.csv | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT%20INTO%20input_function_table_6%20%28a%2C%20b%2C%20c%29%20SELECT%20a%2C%20b%2C%20c%2Ac%20FROM%20input%28%27a%20String%2C%20b%20Int32%2C%20c%20Int32%27%29%20FORMAT%20CSV&max_block_size=1000" --data-binary @- ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM input_function_table_6 FORMAT CSV" diff --git a/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh index e022ff65fc2..2a7b2b6e7f6 100755 --- a/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh @@ -9,13 +9,13 @@ ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String, d ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05')"; ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test', '2019-05-25')"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1" \ +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}¶m_id=1" \ -d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s, d"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world" \ +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}¶m_phrase=Hello,+world" \ -d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s, d"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_date=2019-05-25" \ +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}¶m_date=2019-05-25" \ -d "SELECT * FROM ps WHERE d = {date:Date} ORDER BY i, s, d"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2¶m_phrase=test" \ +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}¶m_id=2¶m_phrase=test" \ -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s, d"; ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql b/dbms/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql index 5f7774ac799..b9eed1e8183 100644 --- a/dbms/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql +++ b/dbms/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql @@ -11,7 +11,7 @@ SELECT hash_of_uncompressed_files, uncompressed_hash_of_compressed_files FROM system.parts -WHERE table = 'test_00961'; +WHERE table = 'test_00961' and database = currentDatabase(); DROP TABLE test_00961; diff --git a/dbms/tests/queries/0_stateless/00976_asof_join_on.reference b/dbms/tests/queries/0_stateless/00976_asof_join_on.reference index ffa8117cc75..4d1b1273363 100644 --- a/dbms/tests/queries/0_stateless/00976_asof_join_on.reference +++ b/dbms/tests/queries/0_stateless/00976_asof_join_on.reference @@ -11,3 +11,25 @@ 1 2 1 2 1 3 1 2 2 3 2 3 +- +1 1 1 2 +1 2 1 2 +1 3 1 4 +2 1 2 3 +2 2 2 3 +2 3 2 3 +- +1 1 1 2 +1 2 1 2 +1 3 1 4 +2 1 2 3 +2 2 2 3 +2 3 2 3 +- +1 3 1 2 +- +1 1 1 2 +1 2 1 4 +1 3 1 4 +2 1 2 3 +2 2 2 3 diff --git a/dbms/tests/queries/0_stateless/00976_asof_join_on.sql b/dbms/tests/queries/0_stateless/00976_asof_join_on.sql index 740287b7c30..ccecc0999c9 100644 --- a/dbms/tests/queries/0_stateless/00976_asof_join_on.sql +++ b/dbms/tests/queries/0_stateless/00976_asof_join_on.sql @@ -9,11 +9,15 @@ INSERT INTO B (b,t) VALUES (1,2),(1,4),(2,3); SELECT A.a, A.t, B.b, B.t FROM A ASOF LEFT JOIN B ON A.a == B.b AND A.t >= B.t ORDER BY (A.a, A.t); SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t; -SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b; -SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND A.t <= B.t; -- { serverError 48 } -SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND B.t >= A.t; -- { serverError 48 } -SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND A.t > B.t; -- { serverError 403 } -SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND A.t < B.t; -- { serverError 403 } +SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t); +SELECT '-'; +SELECT A.a, A.t, B.b, B.t FROM A ASOF JOIN B ON A.a == B.b AND A.t <= B.t ORDER BY (A.a, A.t); +SELECT '-'; +SELECT A.a, A.t, B.b, B.t FROM A ASOF JOIN B ON A.a == B.b AND B.t >= A.t ORDER BY (A.a, A.t); +SELECT '-'; +SELECT A.a, A.t, B.b, B.t FROM A ASOF JOIN B ON A.a == B.b AND A.t > B.t ORDER BY (A.a, A.t); +SELECT '-'; +SELECT A.a, A.t, B.b, B.t FROM A ASOF JOIN B ON A.a == B.b AND A.t < B.t ORDER BY (A.a, A.t); SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND A.t == B.t; -- { serverError 403 } SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND A.t != B.t; -- { serverError 403 } diff --git a/dbms/tests/queries/0_stateless/00981_no_virtual_columns_in_system_columns.reference b/dbms/tests/queries/0_stateless/00981_no_virtual_columns.reference similarity index 50% rename from dbms/tests/queries/0_stateless/00981_no_virtual_columns_in_system_columns.reference rename to dbms/tests/queries/0_stateless/00981_no_virtual_columns.reference index a7ec77dc030..c1df99e5f94 100644 --- a/dbms/tests/queries/0_stateless/00981_no_virtual_columns_in_system_columns.reference +++ b/dbms/tests/queries/0_stateless/00981_no_virtual_columns.reference @@ -1 +1,2 @@ default merge_ab x UInt8 0 0 0 0 0 0 0 +default as_kafka x UInt8 0 0 0 0 0 0 0 diff --git a/dbms/tests/queries/0_stateless/00981_no_virtual_columns.sql b/dbms/tests/queries/0_stateless/00981_no_virtual_columns.sql new file mode 100644 index 00000000000..43c08b71b97 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00981_no_virtual_columns.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS merge_a; +DROP TABLE IF EXISTS merge_b; +DROP TABLE IF EXISTS merge_ab; +DROP TABLE IF EXISTS kafka; +DROP TABLE IF EXISTS as_kafka; + +CREATE TABLE merge_a (x UInt8) ENGINE = StripeLog; +CREATE TABLE merge_b (x UInt8) ENGINE = StripeLog; +CREATE TABLE merge_ab AS merge(currentDatabase(), '^merge_[ab]$'); + +CREATE TABLE kafka (x UInt8) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka', + kafka_topic_list = 'topic', + kafka_group_name = 'group', + kafka_format = 'CSV'; +CREATE TABLE as_kafka AS kafka ENGINE = Memory; + +SELECT * FROM system.columns WHERE database = currentDatabase() AND table = 'merge_ab'; +SELECT * FROM system.columns WHERE database = currentDatabase() AND table = 'as_kafka'; + +DROP TABLE merge_a; +DROP TABLE merge_b; +DROP TABLE merge_ab; +DROP TABLE kafka; +DROP TABLE as_kafka; diff --git a/dbms/tests/queries/0_stateless/00981_no_virtual_columns_in_system_columns.sql b/dbms/tests/queries/0_stateless/00981_no_virtual_columns_in_system_columns.sql deleted file mode 100644 index 476377b4ddf..00000000000 --- a/dbms/tests/queries/0_stateless/00981_no_virtual_columns_in_system_columns.sql +++ /dev/null @@ -1,13 +0,0 @@ -DROP TABLE IF EXISTS merge_a; -DROP TABLE IF EXISTS merge_b; -DROP TABLE IF EXISTS merge_ab; - -CREATE TABLE merge_a (x UInt8) ENGINE = StripeLog; -CREATE TABLE merge_b (x UInt8) ENGINE = StripeLog; -CREATE TABLE merge_ab AS merge(currentDatabase(), '^merge_[ab]$'); - -SELECT * FROM system.columns WHERE database = currentDatabase() AND table = 'merge_ab'; - -DROP TABLE merge_a; -DROP TABLE merge_b; -DROP TABLE merge_ab; diff --git a/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.sh b/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.sh index a3149294c40..0a2e3b4951e 100755 --- a/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.sh +++ b/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.sh @@ -11,11 +11,11 @@ $CLICKHOUSE_CLIENT --query="create table tab_str (x String) engine = MergeTree o $CLICKHOUSE_CLIENT --query="create table tab_str_lc (x LowCardinality(String)) engine = MergeTree order by tuple()"; $CLICKHOUSE_CLIENT --query="insert into tab_str values ('abc')"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=select+x+from+tab_str+format+Native" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT+INTO+tab_str_lc+FORMAT+Native" --data-binary @- +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=select+x+from+tab_str+format+Native" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+tab_str_lc+FORMAT+Native" --data-binary @- $CLICKHOUSE_CLIENT --query="select x from tab_str_lc"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=select+x+from+tab_str_lc+format+Native" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT+INTO+tab_str+FORMAT+Native" --data-binary @- +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=select+x+from+tab_str_lc+format+Native" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+tab_str+FORMAT+Native" --data-binary @- $CLICKHOUSE_CLIENT --query="select '----'"; $CLICKHOUSE_CLIENT --query="select x from tab_str"; diff --git a/dbms/tests/queries/0_stateless/01010_pmj_on_disk.reference b/dbms/tests/queries/0_stateless/01010_pmj_on_disk.reference new file mode 100644 index 00000000000..d2ae3db9528 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_pmj_on_disk.reference @@ -0,0 +1,12 @@ +0 10 +1 0 +2 11 +3 0 +0 10 +1 0 +2 11 +3 0 +0 10 +1 0 +2 11 +3 0 diff --git a/dbms/tests/queries/0_stateless/01010_pmj_on_disk.sql b/dbms/tests/queries/0_stateless/01010_pmj_on_disk.sql new file mode 100644 index 00000000000..d84d5b1c52a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_pmj_on_disk.sql @@ -0,0 +1,35 @@ +SET partial_merge_join = 0; + +SELECT number as n, j FROM numbers(4) +ANY LEFT JOIN ( + SELECT number * 2 AS n, number + 10 AS j + FROM numbers(4000) +) js2 +USING n; + +SET max_rows_in_join = 1000; + +SELECT number as n, j FROM numbers(4) +ANY LEFT JOIN ( + SELECT number * 2 AS n, number + 10 AS j + FROM numbers(4000) +) js2 +USING n; -- { serverError 191 } + +SET partial_merge_join = 1; + +SELECT number as n, j FROM numbers(4) +ANY LEFT JOIN ( + SELECT number * 2 AS n, number + 10 AS j + FROM numbers(4000) +) js2 +USING n; + +SET partial_merge_join_optimizations = 1; + +SELECT number as n, j FROM numbers(4) +ANY LEFT JOIN ( + SELECT number * 2 AS n, number + 10 AS j + FROM numbers(4000) +) js2 +USING n; diff --git a/dbms/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.reference b/dbms/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.reference new file mode 100644 index 00000000000..1b420f7358e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.reference @@ -0,0 +1,15 @@ +0 0 +200000 100000 +400000 200000 +600000 300000 +800000 400000 +0 0 +200000 100000 +400000 200000 +600000 300000 +800000 400000 +0 0 +200000 100000 +400000 200000 +600000 300000 +800000 400000 diff --git a/dbms/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/dbms/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql new file mode 100644 index 00000000000..b28b32ff787 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql @@ -0,0 +1,57 @@ +SET max_memory_usage = 32000000; + +SELECT number * 200000 as n, j FROM numbers(5) +ANY LEFT JOIN ( + SELECT number * 2 AS n, number AS j + FROM numbers(1000000) +) js2 +USING n; -- { serverError 241 } + +SET partial_merge_join = 1; +SET default_max_bytes_in_join = 0; + +SELECT number * 200000 as n, j FROM numbers(5) +ANY LEFT JOIN ( + SELECT number * 2 AS n, number AS j + FROM numbers(1000000) +) js2 +USING n; -- { serverError 12 } + +SELECT number * 200000 as n, j FROM numbers(5) +ANY LEFT JOIN ( + SELECT number * 2 AS n, number AS j + FROM numbers(1000000) +) js2 +USING n +SETTINGS max_bytes_in_join = 30000000; -- { serverError 241 } + +SELECT number * 200000 as n, j FROM numbers(5) +ANY LEFT JOIN ( + SELECT number * 2 AS n, number AS j + FROM numbers(1000000) +) js2 +USING n +ORDER BY n +SETTINGS max_bytes_in_join = 10000000; + +SET partial_merge_join_optimizations = 1; +SET partial_merge_join_rows_in_left_blocks = 100000; + +SELECT number * 200000 as n, j FROM numbers(5) +LEFT JOIN ( + SELECT number * 2 AS n, number AS j + FROM numbers(1000000) +) js2 +USING n +ORDER BY n +SETTINGS max_rows_in_join = 100000; + +SET default_max_bytes_in_join = 10000000; + +SELECT number * 200000 as n, j FROM numbers(5) +JOIN ( + SELECT number * 2 AS n, number AS j + FROM numbers(1000000) +) js2 +USING n +ORDER BY n; diff --git a/dbms/tests/queries/0_stateless/01014_lazy_database_basic.sh b/dbms/tests/queries/0_stateless/01014_lazy_database_basic.sh index 041948a21eb..9cb47ed65d4 100755 --- a/dbms/tests/queries/0_stateless/01014_lazy_database_basic.sh +++ b/dbms/tests/queries/0_stateless/01014_lazy_database_basic.sh @@ -3,6 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +${CLICKHOUSE_CLIENT} -n -q "DROP DATABASE IF EXISTS testlazy" ${CLICKHOUSE_CLIENT} -n -q " CREATE DATABASE testlazy ENGINE = Lazy(1); @@ -11,6 +12,8 @@ ${CLICKHOUSE_CLIENT} -n -q " CREATE TABLE testlazy.tlog (a UInt64, b UInt64) ENGINE = TinyLog; " +${CLICKHOUSE_CLIENT} -q "SELECT * FROM system.parts WHERE database = 'testlazy'"; + sleep 1.5 ${CLICKHOUSE_CLIENT} -q " diff --git a/dbms/tests/queries/0_stateless/01015_array_split.reference b/dbms/tests/queries/0_stateless/01015_array_split.reference new file mode 100644 index 00000000000..ea9d36a95b2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01015_array_split.reference @@ -0,0 +1,16 @@ +[[1,2,3],[4,5]] +[[1],[2,3,4],[5]] +[[1,2,3,4,5]] +[[1,2,3,4,5]] +[[1],[2],[3],[4],[5]] +[[1],[2],[3],[4],[5]] +[[1,2],[3,4],[5]] +[[1],[2,3],[4,5]] +[[]] +[[]] +[] +[] +[[1]] +[[1]] +[[2]] +[[2]] diff --git a/dbms/tests/queries/0_stateless/01015_array_split.sql b/dbms/tests/queries/0_stateless/01015_array_split.sql new file mode 100644 index 00000000000..64d456ed724 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01015_array_split.sql @@ -0,0 +1,19 @@ +SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]); +SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]); + +SELECT arraySplit(x -> 0, [1, 2, 3, 4, 5]); +SELECT arrayReverseSplit(x -> 0, [1, 2, 3, 4, 5]); +SELECT arraySplit(x -> 1, [1, 2, 3, 4, 5]); +SELECT arrayReverseSplit(x -> 1, [1, 2, 3, 4, 5]); +SELECT arraySplit(x -> x % 2 = 1, [1, 2, 3, 4, 5]); +SELECT arrayReverseSplit(x -> x % 2 = 1, [1, 2, 3, 4, 5]); + +SELECT arraySplit(x -> 0, []); +SELECT arrayReverseSplit(x -> 0, []); +SELECT arraySplit(x -> 1, []); +SELECT arrayReverseSplit(x -> 1, []); + +SELECT arraySplit(x -> x % 2 = 1, [1]); +SELECT arrayReverseSplit(x -> x % 2 = 1, [1]); +SELECT arraySplit(x -> x % 2 = 1, [2]); +SELECT arrayReverseSplit(x -> x % 2 = 1, [2]); diff --git a/dbms/tests/queries/0_stateless/01015_empty_in_inner_right_join.reference b/dbms/tests/queries/0_stateless/01015_empty_in_inner_right_join.reference index efb9cf2762e..f41483b25da 100644 --- a/dbms/tests/queries/0_stateless/01015_empty_in_inner_right_join.reference +++ b/dbms/tests/queries/0_stateless/01015_empty_in_inner_right_join.reference @@ -7,13 +7,11 @@ RIGHT JOIN empty set 0 RIGHT JOIN non-empty set 1 LEFT JOIN empty set 10 LEFT JOIN non-empty set 10 -IN empty set not in WHERE clause nan -IN empty set not in WHERE clause nan -IN non-empty set not in WHERE clause 0.1 -NOT IN empty set not in WHERE clause 1 multiple sets IN empty set OR IN non-empty set 1 multiple sets IN empty set OR NOT IN non-empty set 9 multiple sets NOT IN empty set AND IN non-empty set 1 multiple sets INNER JOIN empty set AND IN empty set 0 multiple sets INNER JOIN empty set AND IN non-empty set 0 multiple sets INNER JOIN non-empty set AND IN non-empty set 1 +IN empty set equals 0 10 +IN empty set sum if 10 diff --git a/dbms/tests/queries/0_stateless/01015_empty_in_inner_right_join.sql b/dbms/tests/queries/0_stateless/01015_empty_in_inner_right_join.sql index dcb85cf3389..e6343befabd 100644 --- a/dbms/tests/queries/0_stateless/01015_empty_in_inner_right_join.sql +++ b/dbms/tests/queries/0_stateless/01015_empty_in_inner_right_join.sql @@ -11,11 +11,6 @@ SELECT 'RIGHT JOIN non-empty set',count() FROM (SELECT number FROM system.number SELECT 'LEFT JOIN empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 LEFT JOIN (SELECT toUInt64(1) AS x WHERE 0) ON t1.number = x; SELECT 'LEFT JOIN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 LEFT JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x; -SELECT 'IN empty set not in WHERE clause',avg(number IN (SELECT toUInt64(1) WHERE 0)) FROM system.numbers; -SELECT 'IN empty set not in WHERE clause',avg(number IN (SELECT toUInt64(1) WHERE 0) AND number > 1) FROM system.numbers; -SELECT 'IN non-empty set not in WHERE clause',avg(number IN (SELECT toUInt64(1) WHERE 1)) FROM (SELECT number FROM system.numbers limit 10); -SELECT 'NOT IN empty set not in WHERE clause',avg(number NOT IN (SELECT toUInt64(1) WHERE 0)) FROM (SELECT number FROM system.numbers LIMIT 10); - SELECT 'multiple sets IN empty set OR IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number IN (SELECT toUInt64(1) WHERE 0) OR number IN (SELECT toUInt64(1) WHERE 1); SELECT 'multiple sets IN empty set OR NOT IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number IN (SELECT toUInt64(1) WHERE 0) OR number NOT IN (SELECT toUInt64(1) WHERE 1); SELECT 'multiple sets NOT IN empty set AND IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number NOT IN (SELECT toUInt64(1) WHERE 0) AND number IN (SELECT toUInt64(1) WHERE 1); @@ -23,4 +18,5 @@ SELECT 'multiple sets INNER JOIN empty set AND IN empty set',count() FROM system SELECT 'multiple sets INNER JOIN empty set AND IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 INNER JOIN (SELECT toUInt64(1) AS x WHERE 0) ON t1.number = x WHERE t1.number IN (SELECT toUInt64(1) WHERE 1); SELECT 'multiple sets INNER JOIN non-empty set AND IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 INNER JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x WHERE t1.number IN (SELECT toUInt64(1) WHERE 1); -SELECT number FROM system.numbers WHERE NOT ignore(number IN (SELECT toUInt64(1) WHERE 0)); +SELECT 'IN empty set equals 0', count() FROM numbers(10) WHERE (number IN (SELECT toUInt64(1) WHERE 0)) = 0; +SELECT 'IN empty set sum if', sum(if(number IN (SELECT toUInt64(1) WHERE 0), 2, 1)) FROM numbers(10); diff --git a/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.reference b/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.reference index e98738050e5..c887e5feb5f 100644 --- a/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.reference +++ b/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.reference @@ -2,3 +2,4 @@ 1 worldparam [0.2,0.3] 2 testparam [0.3] 3 paramparam [] +4 evaluateparam [0.2] diff --git a/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.sh b/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.sh index 2fd06535ebe..8edda6629b0 100755 --- a/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.sh +++ b/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.sh @@ -6,12 +6,15 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS insert_values_parametrized"; $CLICKHOUSE_CLIENT --query="CREATE TABLE insert_values_parametrized (n UInt8, s String, a Array(Float32)) ENGINE = Memory"; -$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=1 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES +$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=1 --input_format_values_interpret_expressions=0 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES (1 + {p_n:Int8}, lower(concat('Hello', {p_s:String})), arraySort(arrayIntersect([], {p_a:Array(Nullable(Float32))}))),\ (2 + {p_n:Int8}, lower(concat('world', {p_s:String})), arraySort(arrayIntersect([0.1,0.2,0.3], {p_a:Array(Nullable(Float32))}))),\ (3 + {p_n:Int8}, lower(concat('TEST', {p_s:String})), arraySort(arrayIntersect([0.1,0.3,0.4], {p_a:Array(Nullable(Float32))}))),\ (4 + {p_n:Int8}, lower(concat('PaRaM', {p_s:String})), arraySort(arrayIntersect([0.5], {p_a:Array(Nullable(Float32))})))"; +$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=0 --input_format_values_interpret_expressions=1 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES \ +(5 + {p_n:Int8}, lower(concat('Evaluate', {p_s:String})), arrayIntersect([0, 0.2, 0.6], {p_a:Array(Nullable(Float32))}))" + $CLICKHOUSE_CLIENT --query="SELECT * FROM insert_values_parametrized ORDER BY n"; $CLICKHOUSE_CLIENT --query="DROP TABLE insert_values_parametrized"; diff --git a/dbms/tests/queries/0_stateless/01016_index_tuple_field_type.reference b/dbms/tests/queries/0_stateless/01016_index_tuple_field_type.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/01016_index_tuple_field_type.sql b/dbms/tests/queries/0_stateless/01016_index_tuple_field_type.sql new file mode 100644 index 00000000000..1c5e6d81a90 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01016_index_tuple_field_type.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS tuple_01016; + +CREATE TABLE tuple_01016(a Tuple(DateTime, Int32)) ENGINE = MergeTree() ORDER BY a; + +-- repeat a couple of times, because it doesn't always reproduce well +INSERT INTO tuple_01016 VALUES (('2018-01-01 00:00:00', 1)); +SELECT * FROM tuple_01016 WHERE a < tuple(toDateTime('2019-01-01 00:00:00'), 0) format Null; +INSERT INTO tuple_01016 VALUES (('2018-01-01 00:00:00', 1)); +SELECT * FROM tuple_01016 WHERE a < tuple(toDateTime('2019-01-01 00:00:00'), 0) format Null; +INSERT INTO tuple_01016 VALUES (('2018-01-01 00:00:00', 1)); +SELECT * FROM tuple_01016 WHERE a < tuple(toDateTime('2019-01-01 00:00:00'), 0) format Null; +INSERT INTO tuple_01016 VALUES (('2018-01-01 00:00:00', 1)); +SELECT * FROM tuple_01016 WHERE a < tuple(toDateTime('2019-01-01 00:00:00'), 0) format Null; +INSERT INTO tuple_01016 VALUES (('2018-01-01 00:00:00', 1)); +SELECT * FROM tuple_01016 WHERE a < tuple(toDateTime('2019-01-01 00:00:00'), 0) format Null; + +DROP TABLE tuple_01016; diff --git a/dbms/tests/queries/0_stateless/01016_input_null_as_default.reference b/dbms/tests/queries/0_stateless/01016_input_null_as_default.reference new file mode 100644 index 00000000000..ba9657bf16e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01016_input_null_as_default.reference @@ -0,0 +1,30 @@ +CSV +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +TSV +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +TSKV +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +JSONEachRow +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +Template (Quoted) +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +Values +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) diff --git a/dbms/tests/queries/0_stateless/01016_input_null_as_default.sh b/dbms/tests/queries/0_stateless/01016_input_null_as_default.sh new file mode 100755 index 00000000000..f7fdefc26df --- /dev/null +++ b/dbms/tests/queries/0_stateless/01016_input_null_as_default.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS null_as_default"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE null_as_default (i Int8, s String DEFAULT 'Hello', n UInt64 DEFAULT 42, d Date DEFAULT '2019-06-19', a Array(UInt8) DEFAULT [1, 2, 3], t Tuple(String, Float64) DEFAULT ('default', i / 4)) ENGINE = Memory"; + +echo 'CSV' +echo '\N, 1, \N, "2019-07-22", "[10, 20, 30]", \N +1, world, 3, "2019-07-23", \N, tuple, 3.14 +2, \N, 123, \N, "[]", test, 2.71828 +3, \N, \N, \N, \N, \N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT CSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'TSV' +echo -e '\N\t1\t\N\t2019-07-22\t[10, 20, 30]\t\N +1\tworld\t3\t2019-07-23\t\N\t('\''tuple'\'', 3.14) +2\t\N\t123\t\N\t[]\t('\''test'\'', 2.71828) +3\t\N\t\N\t\N\t\N\t\N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'TSKV' +echo -e 'i=\N\ts=1\tn=\N\td=2019-07-22\ta=[10, 20, 30]\tt=\N +i=1\ts=world\tn=3\td=2019-07-23\ta=\N\tt=('\''tuple'\'', 3.14) +i=2\ts=\N\tn=123\td=\N\ta=[]\tt=('\''test'\'', 2.71828) +i=3\ts=\N\tn=\N\td=\N\ta=\N\tt=\N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT TSKV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'JSONEachRow' +echo '{"i": null, "s": "1", "n": null, "d": "2019-07-22", "a": [10, 20, 30], "t": null} +{"i": 1, "s": "world", "n": 3, "d": "2019-07-23", "a": null, "t": ["tuple", 3.14]} +{"i": 2, "s": null, "n": 123, "d": null, "a": [], "t": ["test", 2.71828]} +{"i": 3, "s": null, "n": null, "d": null, "a": null, "t": null}' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT JSONEachRow"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'Template (Quoted)' +echo 'NULL, '\''1'\'', null, '\''2019-07-22'\'', [10, 20, 30], NuLl +1, '\''world'\'', 3, '\''2019-07-23'\'', NULL, ('\''tuple'\'', 3.14) +2, null, 123, null, [], ('\''test'\'', 2.71828) +3, null, null, null, null, null' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --format_custom_escaping_rule=Quoted --format_custom_field_delimiter=', ' --query="INSERT INTO null_as_default FORMAT CustomSeparated"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'Values' +echo '(NULL, '\''1'\'', (null), '\''2019-07-22'\'', ([10, 20, 30]), (NuLl)), +(1, '\''world'\'', (3), '\''2019-07-23'\'', (NULL), (('\''tuple'\'', 3.14))), +(2, null, (123), null, ([]), (('\''test'\'', 2.71828))), +(3, null, (null), null, (null), (null))' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --input_format_values_deduce_templates_of_expressions=1 --query="INSERT INTO null_as_default VALUES"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="DROP TABLE null_as_default"; diff --git a/dbms/tests/queries/0_stateless/01017_in_unconvertible_complex_type.reference b/dbms/tests/queries/0_stateless/01017_in_unconvertible_complex_type.reference new file mode 100644 index 00000000000..f7eb44d66e0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_in_unconvertible_complex_type.reference @@ -0,0 +1,6 @@ +0 +0 +0 +0 +0 +0 diff --git a/dbms/tests/queries/0_stateless/01017_in_unconvertible_complex_type.sql b/dbms/tests/queries/0_stateless/01017_in_unconvertible_complex_type.sql new file mode 100644 index 00000000000..d675c195726 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_in_unconvertible_complex_type.sql @@ -0,0 +1,13 @@ +-- When left and right element types are compatible, but the particular value +-- on the right is not in the range of the left type, it should be ignored. +select (toUInt8(1)) in (-1); +select (toUInt8(0)) in (-1); +select (toUInt8(255)) in (-1); + +select [toUInt8(1)] in [-1]; +select [toUInt8(0)] in [-1]; +select [toUInt8(255)] in [-1]; + +-- When left and right element types are not compatible, we should get an error. +select (toUInt8(1)) in ('a'); -- { serverError 53 } +select [toUInt8(1)] in ['a']; -- { serverError 53 } diff --git a/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference b/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference new file mode 100644 index 00000000000..b462a5a7baa --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference @@ -0,0 +1,4 @@ +OK +OK +OK +OK diff --git a/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh b/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh new file mode 100755 index 00000000000..ac66dbc352a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + +R1=table_1017_1 +R2=table_1017_2 +T1=table_1017_merge + +${CLICKHOUSE_CLIENT} -n -q " + DROP TABLE IF EXISTS $R1; + DROP TABLE IF EXISTS $R2; + + CREATE TABLE $R1 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r1') ORDER BY x; + CREATE TABLE $R2 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r2') ORDER BY x; + CREATE TABLE $T1 (x UInt32, y UInt32) ENGINE MergeTree() ORDER BY x; + + INSERT INTO $R1 VALUES (0, 1)(1, 2)(2, 3)(3, 4); + INSERT INTO $T1 VALUES (0, 1)(1, 2)(2, 3)(3, 4); +" + +# Check that in mutations of replicated tables predicates do not contain non-deterministic functions +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE ignore(rand())" 2>&1 \ +| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 \ +| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' + + +# For regular tables we do not enforce deterministic functions +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 DELETE WHERE rand() = 0" 2>&1 > /dev/null \ +&& echo 'OK' || echo 'FAIL' + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 > /dev/null \ +&& echo 'OK' || echo 'FAIL' + + +${CLICKHOUSE_CLIENT} -n -q " + DROP TABLE IF EXISTS $R2; + DROP TABLE IF EXISTS $R1; + DROP TABLE IF EXISTS $T1; +" diff --git a/dbms/tests/queries/0_stateless/01017_tsv_empty_as_default.reference b/dbms/tests/queries/0_stateless/01017_tsv_empty_as_default.reference new file mode 100644 index 00000000000..feca2ec6484 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_tsv_empty_as_default.reference @@ -0,0 +1,4 @@ + 1 2019-06-19 +abcd 100 2016-01-01 +default 1 2019-06-19 +default-eof 1 2019-06-19 diff --git a/dbms/tests/queries/0_stateless/01017_tsv_empty_as_default.sh b/dbms/tests/queries/0_stateless/01017_tsv_empty_as_default.sh new file mode 100755 index 00000000000..cdaa5016d3e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_tsv_empty_as_default.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS empty_as_default"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE empty_as_default (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; + +echo -ne 'abcd\t100\t2016-01-01 +default\t\t +\t\t +default-eof\t\t' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_tsv_empty_as_default=1 --query="INSERT INTO empty_as_default FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM empty_as_default ORDER BY s"; +$CLICKHOUSE_CLIENT --query="DROP TABLE empty_as_default"; diff --git a/dbms/tests/queries/0_stateless/01018_empty_aggregation_filling.reference b/dbms/tests/queries/0_stateless/01018_empty_aggregation_filling.reference new file mode 100644 index 00000000000..2ab7017657f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_empty_aggregation_filling.reference @@ -0,0 +1,48 @@ +--- Int Empty --- +0 +\N +0 +\N +0 +\N +0 +\N +0 +\N +0 +\N +--- Int Non-empty --- +1 +1 +nan +nan +1 +1 +1 +1 +nan +nan +1 +1 +--- Other Types Empty --- + +\N +\N +\N +0.00 +\N +0 +\N +0.00 +\N +--- Other Types Non-empty --- +hello +hello +2011-04-05 14:19:19 +2011-04-05 14:19:19 +-123.45 +-123.45 +inf +inf +-123.45 +-123.45 diff --git a/dbms/tests/queries/0_stateless/01018_empty_aggregation_filling.sql b/dbms/tests/queries/0_stateless/01018_empty_aggregation_filling.sql new file mode 100644 index 00000000000..bf575641656 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_empty_aggregation_filling.sql @@ -0,0 +1,61 @@ +SELECT '--- Int Empty ---'; + +SELECT arrayReduce('avgOrDefault', arrayPopBack([1])); +SELECT arrayReduce('avgOrNull', arrayPopBack([1])); +SELECT arrayReduce('stddevSampOrDefault', arrayPopBack([1])); +SELECT arrayReduce('stddevSampOrNull', arrayPopBack([1])); +SELECT arrayReduce('maxOrDefault', arrayPopBack([1])); +SELECT arrayReduce('maxOrNull', arrayPopBack([1])); + +SELECT avgOrDefaultIf(x, x > 1) FROM (SELECT 1 AS x); +SELECT avgOrNullIf(x, x > 1) FROM (SELECT 1 AS x); +SELECT stddevSampOrDefaultIf(x, x > 1) FROM (SELECT 1 AS x); +SELECT stddevSampOrNullIf(x, x > 1) FROM (SELECT 1 AS x); +SELECT maxOrDefaultIf(x, x > 1) FROM (SELECT 1 AS x); +SELECT maxOrNullIf(x, x > 1) FROM (SELECT 1 AS x); + +SELECT '--- Int Non-empty ---'; + +SELECT arrayReduce('avgOrDefault', [1]); +SELECT arrayReduce('avgOrNull', [1]); +SELECT arrayReduce('stddevSampOrDefault', [1]); +SELECT arrayReduce('stddevSampOrNull', [1]); +SELECT arrayReduce('maxOrDefault', [1]); +SELECT arrayReduce('maxOrNull', [1]); + +SELECT avgOrDefaultIf(x, x > 0) FROM (SELECT 1 AS x); +SELECT avgOrNullIf(x, x > 0) FROM (SELECT 1 AS x); +SELECT stddevSampOrDefaultIf(x, x > 0) FROM (SELECT 1 AS x); +SELECT stddevSampOrNullIf(x, x > 0) FROM (SELECT 1 AS x); +SELECT maxOrDefaultIf(x, x > 0) FROM (SELECT 1 AS x); +SELECT maxOrNullIf(x, x > 0) FROM (SELECT 1 AS x); + +SELECT '--- Other Types Empty ---'; + +SELECT arrayReduce('maxOrDefault', arrayPopBack(['hello'])); +SELECT arrayReduce('maxOrNull', arrayPopBack(['hello'])); + +SELECT arrayReduce('maxOrDefault', arrayPopBack(arrayPopBack([toDateTime('2011-04-05 14:19:19'), null]))); +SELECT arrayReduce('maxOrNull', arrayPopBack(arrayPopBack([toDateTime('2011-04-05 14:19:19'), null]))); + +SELECT arrayReduce('avgOrDefault', arrayPopBack([toDecimal128(-123.45, 2)])); +SELECT arrayReduce('avgOrNull', arrayPopBack([toDecimal128(-123.45, 2)])); +SELECT arrayReduce('stddevSampOrDefault', arrayPopBack([toDecimal128(-123.45, 2)])); +SELECT arrayReduce('stddevSampOrNull', arrayPopBack([toDecimal128(-123.45, 2)])); +SELECT arrayReduce('maxOrDefault', arrayPopBack([toDecimal128(-123.45, 2)])); +SELECT arrayReduce('maxOrNull', arrayPopBack([toDecimal128(-123.45, 2)])); + +SELECT '--- Other Types Non-empty ---'; + +SELECT arrayReduce('maxOrDefault', ['hello']); +SELECT arrayReduce('maxOrNull', ['hello']); + +SELECT arrayReduce('maxOrDefault', [toDateTime('2011-04-05 14:19:19'), null]); +SELECT arrayReduce('maxOrNull', [toDateTime('2011-04-05 14:19:19'), null]); + +SELECT arrayReduce('avgOrDefault', [toDecimal128(-123.45, 2)]); +SELECT arrayReduce('avgOrNull', [toDecimal128(-123.45, 2)]); +SELECT arrayReduce('stddevSampOrDefault', [toDecimal128(-123.45, 2)]); +SELECT arrayReduce('stddevSampOrNull', [toDecimal128(-123.45, 2)]); +SELECT arrayReduce('maxOrDefault', [toDecimal128(-123.45, 2)]); +SELECT arrayReduce('maxOrNull', [toDecimal128(-123.45, 2)]); diff --git a/dbms/tests/queries/0_stateless/01018_insert_multiple_blocks_with_defaults.reference b/dbms/tests/queries/0_stateless/01018_insert_multiple_blocks_with_defaults.reference new file mode 100644 index 00000000000..d598f5d498a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_insert_multiple_blocks_with_defaults.reference @@ -0,0 +1,2 @@ +1 hello +2 world diff --git a/dbms/tests/queries/0_stateless/01018_insert_multiple_blocks_with_defaults.sh b/dbms/tests/queries/0_stateless/01018_insert_multiple_blocks_with_defaults.sh new file mode 100755 index 00000000000..c6931fd68ee --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_insert_multiple_blocks_with_defaults.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS defaults" +$CLICKHOUSE_CLIENT --query="CREATE TABLE defaults (n UInt8, s String DEFAULT 'hello') ENGINE = Memory" +echo '{"n": 1} {"n": 2, "s":"world"}' | $CLICKHOUSE_CLIENT --max_insert_block_size=1 --query="INSERT INTO defaults FORMAT JSONEachRow" +$CLICKHOUSE_CLIENT --query="SELECT * FROM defaults ORDER BY n" +$CLICKHOUSE_CLIENT --query="DROP TABLE defaults" diff --git a/dbms/tests/queries/0_stateless/01018_optimize_read_in_order_with_in_subquery.reference b/dbms/tests/queries/0_stateless/01018_optimize_read_in_order_with_in_subquery.reference new file mode 100644 index 00000000000..d80fc78e03d --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_optimize_read_in_order_with_in_subquery.reference @@ -0,0 +1,4 @@ +1 +0 +1 +0 diff --git a/dbms/tests/queries/0_stateless/01018_optimize_read_in_order_with_in_subquery.sql b/dbms/tests/queries/0_stateless/01018_optimize_read_in_order_with_in_subquery.sql new file mode 100644 index 00000000000..6d2bf5f4863 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01018_optimize_read_in_order_with_in_subquery.sql @@ -0,0 +1,15 @@ +SET max_threads = 2; +SET optimize_read_in_order = 1; + +DROP TABLE IF EXISTS TESTTABLE4; +CREATE TABLE TESTTABLE4 (_id UInt64, pt String, l String ) +ENGINE = MergeTree() PARTITION BY (pt) ORDER BY (_id); +INSERT INTO TESTTABLE4 VALUES (0,'1','1'), (1,'0','1'); + +SELECT _id FROM TESTTABLE4 PREWHERE l IN (select '1') ORDER BY _id DESC LIMIT 10; + +SET experimental_use_processors=1; + +SELECT _id FROM TESTTABLE4 PREWHERE l IN (select '1') ORDER BY _id DESC LIMIT 10; + +DROP TABLE TESTTABLE4; \ No newline at end of file diff --git a/dbms/tests/queries/shell_config.sh b/dbms/tests/queries/shell_config.sh index b3058a6cdbe..da047025407 100644 --- a/dbms/tests/queries/shell_config.sh +++ b/dbms/tests/queries/shell_config.sh @@ -38,9 +38,25 @@ export CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:="8123"} export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=`${CLICKHOUSE_EXTRACT_CONFIG} --try --key=https_port 2>/dev/null`} 2>/dev/null export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:="8443"} export CLICKHOUSE_PORT_HTTP_PROTO=${CLICKHOUSE_PORT_HTTP_PROTO:="http"} + +# Add database to url params +if [ -n "${CLICKHOUSE_URL_PARAMS}" ] +then + export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&database=${CLICKHOUSE_DATABASE}" +else + export CLICKHOUSE_URL_PARAMS="database=${CLICKHOUSE_DATABASE}" +fi + export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"} export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"} -export CLICKHOUSE_URL_PARAMS=${CLICKHOUSE_URL_PARAMS:="${CLICKHOUSE_URL}?database=${CLICKHOUSE_DATABASE}"} + +# Add url params to url +if [ -n "${CLICKHOUSE_URL_PARAMS}" ] +then + export CLICKHOUSE_URL="${CLICKHOUSE_URL}?${CLICKHOUSE_URL_PARAMS}" + export CLICKHOUSE_URL_HTTPS="${CLICKHOUSE_URL_HTTPS}?${CLICKHOUSE_URL_PARAMS}" +fi + export CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:=`${CLICKHOUSE_EXTRACT_CONFIG} --try --key=interserver_http_port 2>/dev/null`} 2>/dev/null export CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:="9009"} export CLICKHOUSE_URL_INTERSERVER=${CLICKHOUSE_URL_INTERSERVER:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_INTERSERVER}/"} diff --git a/docs/.yaspellerrc b/docs/.yaspellerrc new file mode 100644 index 00000000000..e789565eda7 --- /dev/null +++ b/docs/.yaspellerrc @@ -0,0 +1,25 @@ +{ + "checkYo": false, + "excludeFiles": [], + "fileExtensions": [], + "format": "auto", + "ignoreTags": [ + "code", + "kbd", + "object", + "samp", + "script", + "style", + "var" + ], + "maxRequests": 2, + "lang": "en,ru", + "report": ["console"], + "dictionary": [ + "(C|c)lick(H|h)ouse", + "CatBoost", + "(Ш|ш)ард(ы|ов|а|у|е|ам|ирование|ированы|ах)?", + "логир(ование|уются|ования)?", + "конфиг(а|е|ом|у)" + ] +} \ No newline at end of file diff --git a/docs/en/getting_started/example_datasets/metrica.md b/docs/en/getting_started/example_datasets/metrica.md index 88d4c86430f..34d4e0c9d75 100644 --- a/docs/en/getting_started/example_datasets/metrica.md +++ b/docs/en/getting_started/example_datasets/metrica.md @@ -1,5 +1,5 @@ # Anonymized Yandex.Metrica Data -Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. Each of the tables can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at `https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz` and as prepared partitions at `https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz`. +Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. Each of the tables can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as [TSV](https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz) and as [prepared partitions](https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz). ## Obtaining Tables from Prepared Partitions **Download and import hits:** diff --git a/docs/en/guides/apply_catboost_model.md b/docs/en/guides/apply_catboost_model.md new file mode 100644 index 00000000000..4665809bfa0 --- /dev/null +++ b/docs/en/guides/apply_catboost_model.md @@ -0,0 +1,230 @@ +# Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse} + +[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at [Yandex](https://yandex.com/company/) for machine learning. + +With this instruction, you will learn to apply pre-trained models in ClickHouse: as a result, you run the model inference from SQL. + +To apply a CatBoost model in ClickHouse: + +1. [Create a Table](#create-table). +2. [Insert the Data to the Table](#insert-data-to-table). +3. [Integrate CatBoost into ClickHouse](#integrate-catboost-into-clickhouse) (Optional step). +4. [Run the Model Inference from SQL](#run-model-inference). + +For more information about training CatBoost models, see [Training and applying models](https://catboost.ai/docs/features/training.html#training). + +## Prerequisites {#prerequisites} + +If you don't have the [Docker](https://docs.docker.com/install/) yet, install it. + +!!! note "Note" + [Docker](https://www.docker.com) is a software platform that allows you to create containers that isolate a CatBoost and ClickHouse installation from the rest of the system. + +Before applying a CatBoost model: + +**1.** Pull the [Docker image](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) from the registry: + +```bash +$ docker pull yandex/tutorial-catboost-clickhouse +``` + +This Docker image contains everything you need to run CatBoost and ClickHouse: code, runtime, libraries, environment variables, and configuration files. + +**2.** Make sure the Docker image has been successfully pulled: + +```bash +$ docker image ls +REPOSITORY TAG IMAGE ID CREATED SIZE +yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB +``` + +**3.** Start a Docker container based on this image: + +```bash +$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse +``` + +## 1. Create a Table {#create-table} + +To create a ClickHouse table for the train sample: + +**1.** Start ClickHouse console client in interactive mode: + +```bash +$ clickhouse client +``` + +!!! note "Note" + The ClickHouse server is already running inside the Docker container. + +**2.** Create the table using the command: + +```sql +:) CREATE TABLE amazon_train +( + date Date MATERIALIZED today(), + ACTION UInt8, + RESOURCE UInt32, + MGR_ID UInt32, + ROLE_ROLLUP_1 UInt32, + ROLE_ROLLUP_2 UInt32, + ROLE_DEPTNAME UInt32, + ROLE_TITLE UInt32, + ROLE_FAMILY_DESC UInt32, + ROLE_FAMILY UInt32, + ROLE_CODE UInt32 +) +ENGINE = MergeTree() +``` + +**3.** Exit from ClickHouse console client: + +```sql +:) exit +``` + +## 2. Insert the Data to the Table {#insert-data-to-table} + +To insert the data: + +**1.** Run the following command: + +```bash +$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv +``` + +**2.** Start ClickHouse console client in interactive mode: + +```bash +$ clickhouse client +``` + +**3.** Make sure the data has been uploaded: + +```sql +:) SELECT count() FROM amazon_train + +SELECT count() +FROM amazon_train + ++-count()-+ +| 65538 | ++---------+ +``` + +## 3. Integrate CatBoost into ClickHouse {#integrate-catboost-into-clickhouse} + +!!! note "Note" + **Optional step.** The Docker image contains everything you need to run CatBoost and ClickHouse. + +To integrate CatBoost into ClickHouse: + +**1.** Build the evaluation library. + +The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.` library. For more information about how to build the library, see [CatBoost documentation](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). + +**2.** Create a new directory anywhere and with any name, for example, `data` and put the created library in it. The Docker image already contains the library `data/libcatboostmodel.so`. + +**3.** Create a new directory for config model anywhere and with any name, for example, `models`. + +**4.** Create a model configuration file with any name, for example, `models/amazon_model.xml`. + +**5.** Describe the model configuration: + +```xml + + + + catboost + + amazon + + /home/catboost/tutorial/catboost_model.bin + + 0 + + +``` + +**6.** Add the path to CatBoost and the model configuration to the ClickHouse configuration: + +```xml + +/home/catboost/data/libcatboostmodel.so +/home/catboost/models/*_model.xml +``` + +## 4. Run the Model Inference from SQL {#run-model-inference} + +For test model run the ClickHouse client `$ clickhouse client`. + +Let's make sure that the model is working: + +```sql +:) SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) > 0 AS prediction, + ACTION AS target +FROM amazon_train +LIMIT 10 +``` + +!!! note "Note" + Function [modelEvaluate](../query_language/functions/other_functions.md#function-modelevaluate) returns tuple with per-class raw predictions for multiclass models. + +Let's predict probability: + +```sql +:) SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) AS prediction, + 1. / (1 + exp(-prediction)) AS probability, + ACTION AS target +FROM amazon_train +LIMIT 10 +``` + +!!! note "Note" + More info about [exp()](../query_language/functions/math_functions.md) function. + +Let's calculate LogLoss on the sample: + +```sql +:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss +FROM +( + SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) AS prediction, + 1. / (1. + exp(-prediction)) AS prob, + ACTION AS tg + FROM amazon_train +) +``` + +!!! note "Note" + More info about [avg()](../query_language/agg_functions/reference.md#agg_function-avg) and [log()](../query_language/functions/math_functions.md) functions. \ No newline at end of file diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md new file mode 100644 index 00000000000..32c2da1ad2f --- /dev/null +++ b/docs/en/guides/index.md @@ -0,0 +1,5 @@ +# ClickHouse Guides + +Detailed step-by-step instructions that will help you solve various tasks using ClickHouse. + +- [Applying a CatBoost Model in ClickHouse](apply_catboost_model.md) \ No newline at end of file diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 100dbe6b68a..eebdf10702d 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -983,3 +983,11 @@ should be located in the directory specified in [format_schema_path](../operatio in the server configuration. [Original article](https://clickhouse.yandex/docs/en/interfaces/formats/) + +## Skipping Errors {#skippingerrors} + +Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and +[input_format_allow_errors_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings. +Limitations: + - In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly. + - `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty. diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index 15a233079f7..eaa0ffdd406 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -28,7 +28,7 @@ ClickHouse collects: - Different metrics of how the server uses computational resources. - Common statistics on query processing. -You can find metrics in the [system.metrics](#system_tables-metrics), [system.events](#system_tables-events), and [system.asynchronous_metrics](#system_tables-asynchronous_metrics) tables. +You can find metrics in the [system.metrics](system_tables.md#system_tables-metrics), [system.events](system_tables.md#system_tables-events), and [system.asynchronous_metrics](system_tables.md#system_tables-asynchronous_metrics) tables. You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html). diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 70e46629c32..436e0bdad8a 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -140,10 +140,10 @@ Settings: - interval – The interval for sending, in seconds. - timeout – The timeout for sending data, in seconds. - root_path – Prefix for keys. -- metrics – Sending data from a :ref:`system_tables-system.metrics` table. -- events – Sending deltas data accumulated for the time period from a :ref:`system_tables-system.events` table -- events_cumulative – Sending cumulative data from a :ref:`system_tables-system.events` table -- asynchronous_metrics – Sending data from a :ref:`system_tables-system.asynchronous_metrics` table. +- metrics – Sending data from a [system.metrics](../system_tables.md#system_tables-metrics) table. +- events – Sending deltas data accumulated for the time period from a [system.events](../system_tables.md#system_tables-events) table. +- events_cumulative – Sending cumulative data from a [system.events](../system_tables.md#system_tables-events) table. +- asynchronous_metrics – Sending data from a [system.asynchronous_metrics](../system_tables.md#system_tables-asynchronous_metrics) table. You can configure multiple `` clauses. For instance, you can use this for sending different data at different intervals. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3f94d8e45c5..ef1b664272e 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -130,30 +130,30 @@ Possible values: Default value: 0. -## input_format_allow_errors_num +## input_format_allow_errors_num {#settings-input_format_allow_errors_num} Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). The default value is 0. -Always pair it with `input_format_allow_errors_ratio`. To skip errors, both settings must be greater than 0. +Always pair it with `input_format_allow_errors_ratio`. If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. -If `input_format_allow_errors_num` is exceeded, ClickHouse throws an exception. +If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. -## input_format_allow_errors_ratio +## input_format_allow_errors_ratio {#settings-input_format_allow_errors_ratio} Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). The percentage of errors is set as a floating-point number between 0 and 1. The default value is 0. -Always pair it with `input_format_allow_errors_num`. To skip errors, both settings must be greater than 0. +Always pair it with `input_format_allow_errors_num`. If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. -If `input_format_allow_errors_ratio` is exceeded, ClickHouse throws an exception. +If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} @@ -227,7 +227,7 @@ Enabled by default. ## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields} -When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow) and [CSV](../../interfaces/formats.md#csv) formats. +When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats. !!! note "Note" When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. @@ -239,9 +239,15 @@ Possible values: Default value: 1. +## input_format_tsv_empty_as_default {#settings-input_format_tsv_empty_as_default} + +When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Disabled by default. + ## input_format_null_as_default {#settings-input_format_null_as_default} -Enables or disables using default values if input data contain `NULL`, but data type of corresponding column in not `Nullable(T)` (for CSV format). +Enables or disables using default values if input data contain `NULL`, but data type of corresponding column in not `Nullable(T)` (for text input formats). ## input_format_skip_unknown_fields {#settings-input_format_skip_unknown_fields} diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index a8d4d62f2d0..7ae6c1b6834 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -1,6 +1,6 @@ # MergeTree {#table_engines-mergetree} -The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHousе table engines. +The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHouse table engines. Engines in the `MergeTree` family are designed for inserting a very large amount of data into a table. The data is quickly written to the table part by part, then rules are applied for merging the parts in the background. This method is much more efficient than continually rewriting the data in storage during insert. @@ -460,3 +460,135 @@ When ClickHouse see that data is expired, it performs an off-schedule merge. To If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) query before `SELECT`. [Original article](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/) + + +## Using multiple block devices for data storage {#table_engine-mergetree-multiple-volumes} + +### General + +Tables of the MergeTree family are able to store their data on multiple block devices, which may be useful when, for instance, the data of a certain table are implicitly split into "hot" and "cold". The most recent data is regularly requested but requires only a small amount of space. On the contrary, the fat-tailed historical data is requested rarely. If several disks are available, the "hot" data may be located on fast disks (NVMe SSDs or even in memory), while the "cold" data - on relatively slow ones (HDD). + +Part is the minimum movable unit for MergeTree tables. The data belonging to one part are stored on one disk. Parts can be moved between disks in the background (according to user settings) as well as by means of the [ALTER](../../query_language/alter.md#alter_move-partition) queries. + +### Terms +* Disk — a block device mounted to the filesystem. +* Default disk — a disk that contains the path specified in the `` tag in `config.xml`. +* Volume — an ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)). +* Storage policy — a number of volumes together with the rules for moving data between them. + +The names given to the described entities can be found in the system tables, [system.storage_policies](../system_tables.md#system_tables-storage_policies) and [system.disks](../system_tables.md#system_tables-disks). Storage policy name can be used as a parameter for tables of the MergeTree family. + +### Configuration {#table_engine-mergetree-multiple-volumes_configure} + +Disks, volumes and storage policies should be declared inside the `` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory. This section in a configuration file has the following structure: + +```xml + + + /mnt/fast_ssd/clickhouse + + + /mnt/hdd1/clickhouse + 10485760_ + + + /mnt/hdd2/clickhouse + 10485760_ + + + ... + +``` + +where + +* the disk name is given as a tag name. +* `path` — path under which a server will store data (`data` and `shadow` folders), should be terminated with '/'. +* `keep_free_space_bytes` — the amount of free disk space to be reserved. + +The order of the disk definition is not important. + +Storage policies configuration: + +```xml + + + + + disk1 + disk2 + + + + + + + + fast_ssd + 1073741824 + + + disk1 + + + 0.2 + + +``` + +where + +* volume and storage policy names are given as tag names. +* `disk` — a disk within a volume. +* `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume's disks. +* `move_factor` — when the amount of available space gets lower than this factor, data automatically start to move on the next volume if any (by default, 0.1). + + +In the given example, the `hdd_in_order` policy implements the [round-robin](https://en.wikipedia.org/wiki/Round-robin_scheduling) approach. Since the policy defines only one volume (`single`), the data are stored on all its disks in circular order. Such a policy can be quite useful if there are several similar disks mounted to the system. If there are different disks, the policy `moving_from_ssd_to_hdd` can be used instead. +The volume `hot` consists of an SSD disk (`fast_ssd`), and the maximum size of a part that can be stored on this volume is 1GB. All the parts with the size larger than 1GB will be stored directly on the `cold` volume, which contains an HDD disk `disk1`. +Also, once the disk `fast_ssd` gets filled by more than 80%, data will be transferred to the `disk1` by a background process. + +The order of volume enumeration within a storage policy is important. Once a volume is overfilled, data are moved to the next one. The order of disk enumeration is important as well because data are stored on them in turns. + +When creating a table, one can apply one of the configured storage policies to it: + +```sql +CREATE TABLE table_with_non_default_policy ( + EventDate Date, + OrderID UInt64, + BannerID UInt64, + SearchPhrase String +) ENGINE = MergeTree +ORDER BY (OrderID, BannerID) +PARTITION BY toYYYYMM(EventDate) +SETTINGS storage_policy = 'moving_from_ssd_to_hdd' +``` + +The `default` storage policy implies using only one volume, which consists of only one disk given in ``. Once a table is created, its storage policy cannot be changed. + +### Details + +In the case of MergeTree tables, data is getting to disk in different ways: + +* as a result of an insert (`INSERT` query). +* during background merges and [mutations](../../query_language/alter.md#alter-mutations). +* when downloading from another replica. +* as a result of partition freezing [ALTER TABLE ... FREEZE PARTITION](../../query_language/alter.md#alter_freeze-partition). + +In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy: + +1. The first volume (in the order of definition) that has enough disk space for storing a part (`unreserved_space > current_part_size`) and allows for storing parts of a given size (`max_data_part_size_bytes > current_part_size`) is chosen. +2. Within this volume, that disk is chosen that follows the one, which was used for storing the previous chunk of data, and that has free space more than the part size (`unreserved_space - keep_free_space_bytes > current_part_size`). + +Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones. + +In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file. +Data is never transferred from the last one and into the first one. One may use system tables [system.part_log](../system_tables.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../system_tables.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs. + +User can force moving a part or a partition from one volume to another using the query [ALTER TABLE ... MOVE PART|PARTITION ... TO VOLUME|DISK ...](../../query_language/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. + +Moving data does not interfere with data replication. Therefore, different storage policies can be specified for the same table on different replicas. + +After the completion of background merges and mutations, old parts are removed only after a certain amount of time (`old_parts_lifetime`). +During this time, they are not moved to other volumes or disks. Therefore, until the parts are finally removed, they are still taken into account for evaluation of the occupied disk space. + diff --git a/docs/en/query_language/agg_functions/combinators.md b/docs/en/query_language/agg_functions/combinators.md index ccad56083c4..6a1f347cc8a 100644 --- a/docs/en/query_language/agg_functions/combinators.md +++ b/docs/en/query_language/agg_functions/combinators.md @@ -10,7 +10,7 @@ Examples: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTi With conditional aggregate functions, you can calculate aggregates for several conditions at once, without using subqueries and `JOIN`s. For example, in Yandex.Metrica, conditional aggregate functions are used to implement the segment comparison functionality. -## -Array +## -Array {#agg-functions-combinator-array} The -Array suffix can be appended to any aggregate function. In this case, the aggregate function takes arguments of the 'Array(T)' type (arrays) instead of 'T' type arguments. If the aggregate function accepts multiple arguments, this must be arrays of equal lengths. When processing arrays, the aggregate function works like the original aggregate function across all array elements. @@ -18,9 +18,9 @@ Example 1: `sumArray(arr)` - Totals all the elements of all 'arr' arrays. In thi Example 2: `uniqArray(arr)` – Counts the number of unique elements in all 'arr' arrays. This could be done an easier way: `uniq(arrayJoin(arr))`, but it's not always possible to add 'arrayJoin' to a query. --If and -Array can be combined. However, 'Array' must come first, then 'If'. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the 'cond' argument can't be an array. +-If and -Array can be combined. However, 'Array' must come first, then 'If'. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the 'cond' argument won't be an array. -## -State +## -State {#agg-functions-combinator-state} If you apply this combinator, the aggregate function doesn't return the resulting value (such as the number of unique values for the [uniq](reference.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later. @@ -40,10 +40,51 @@ If you apply this combinator, the aggregate function takes the intermediate aggr Merges the intermediate aggregation states in the same way as the -Merge combinator. However, it doesn't return the resulting value, but an intermediate aggregation state, similar to the -State combinator. -## -ForEach +## -ForEach {#agg-functions-combinator-foreach} Converts an aggregate function for tables into an aggregate function for arrays that aggregates the corresponding array items and returns an array of results. For example, `sumForEach` for the arrays `[1, 2]`, `[3, 4, 5]`and`[6, 7]`returns the result `[10, 13, 5]` after adding together the corresponding array items. +## -OrDefault {#agg-functions-combinator-ordefault} + +Fills the default value of the aggregate function's return type if there is nothing to aggregate. + +```sql +SELECT avg(number), avgOrDefault(number) FROM numbers(0) +``` +```text +┌─avg(number)─┬─avgOrDefault(number)─┐ +│ nan │ 0 │ +└─────────────┴──────────────────────┘ +``` + +## -OrNull {#agg-functions-combinator-ornull} + +Fills `null` if there is nothing to aggregate. The return column will be nullable. + +```sql +SELECT avg(number), avgOrNull(number) FROM numbers(0) +``` +```text +┌─avg(number)─┬─avgOrNull(number)─┐ +│ nan │ ᴺᵁᴸᴸ │ +└─────────────┴───────────────────┘ +``` + +-OrDefault and -OrNull can be combined with other combinators. It is useful when the aggregate function does not accept the empty input. + +```sql +SELECT avgOrNullIf(x, x > 10) +FROM +( + SELECT toDecimal32(1.23, 2) AS x +) +``` +```text +┌─avgOrNullIf(x, greater(x, 10))─┐ +│ ᴺᵁᴸᴸ │ +└────────────────────────────────┘ +``` + ## -Resample {#agg_functions-combinator-resample} Lets you divide data into groups, and then separately aggregates the data in those groups. Groups are created by splitting the values from one column into intervals. @@ -54,7 +95,7 @@ Lets you divide data into groups, and then separately aggregates the data in tho **Parameters** -- `start` — Starting value of the whole required interval for `resampling_key` values. +- `start` — Starting value of the whole required interval for `resampling_key` values. - `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval doesn't include the `stop` value `[start, stop)`. - `step` — Step for separating the whole interval into subintervals. The `aggFunction` is executed over each of those subintervals independently. - `resampling_key` — Column whose values are used for separating data into intervals. @@ -85,7 +126,7 @@ Let's get the names of the people whose age lies in the intervals of `[30,60)` a To aggregate names in an array, we use the [groupArray](reference.md#agg_function-grouparray) aggregate function. It takes one argument. In our case, it's the `name` column. The `groupArrayResample` function should use the `age` column to aggregate names by age. To define the required intervals, we pass the `30, 75, 30` arguments into the `groupArrayResample` function. ```sql -SELECT groupArrayResample(30, 75, 30)(name, age) from people +SELECT groupArrayResample(30, 75, 30)(name, age) FROM people ``` ```text ┌─groupArrayResample(30, 75, 30)(name, age)─────┐ diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index 5e1bfaf5570..dc6f18af26a 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -581,7 +581,7 @@ Function: - Provides the result deterministically (it doesn't depend on the query processing order). -!! note "Note" +!!! note "Note" Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](#agg_function-uniqcombined64) Compared to the [uniq](#agg_function-uniq) function, the `uniqCombined`: @@ -881,7 +881,7 @@ The calculation is accurate if: Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. -!! note "Note" +!!! note "Note" For calculating page loading time quantiles, this function is more effective and accurate than [quantile](#agg_function-quantile). **Returned value** @@ -1210,11 +1210,6 @@ SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%' │ 3 │ └───────────────────┘ -SELECT groupBitmapAndMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapAnd(z)─┐ -│ 3 │ -└───────────────────┘ - SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); ┌─arraySort(bitmapToArray(groupBitmapAndState(z)))─┐ │ [6,8,10] │ @@ -1260,11 +1255,6 @@ SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%') │ 15 │ └──────────────────┘ -SELECT groupBitmapOrMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapOrMerge(z)─┐ -│ 15 │ -└───────────────────────┘ - SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); ┌─arraySort(bitmapToArray(groupBitmapOrState(z)))─┐ │ [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] │ @@ -1310,11 +1300,6 @@ SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%' │ 10 │ └───────────────────┘ -SELECT groupBitmapXorMerge(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapXorMerge(z)─┐ -│ 10 │ -└────────────────────────┘ - SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); ┌─arraySort(bitmapToArray(groupBitmapXorState(z)))─┐ │ [1,3,5,6,8,10,11,13,14,15] │ diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index 0b400adca30..9a0f694fc42 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -55,7 +55,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine] Creates a table with the same structure as another table. You can specify a different engine for the table. If the engine is not specified, the same engine will be used as for the `db2.name2` table. ```sql -CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_fucntion() +CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function() ``` Creates a table with the structure and data returned by a [table function](table_functions/index.md). diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index 8101aed12d4..f6139741849 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -4,6 +4,9 @@ Returns a string with the name of the host that this function was performed on. For distributed processing, this is the name of the remote server host, if the function is performed on a remote server. +## FQDN(), fullHostName() +Returns the Fully qualified domain name aka [FQDN](https://en.wikipedia.org/wiki/Fully_qualified_domain_name). + ## basename Extracts the trailing part of a string after the last slash or backslash. This function if often used to extract the filename from a path. @@ -765,8 +768,8 @@ Gets data from [Join](../../operations/table_engines/join.md) tables using the s Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` statement. -## modelEvaluate(model_name, ...) -Evaluate model. +## modelEvaluate(model_name, ...) {#function-modelevaluate} +Evaluate external model. Accepts a model name and model arguments. Returns Float64. ## throwIf(x\[, custom_message\]) diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index 723b8edc154..bcf0689eb2c 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -64,6 +64,10 @@ The same as `match`, but returns 0 if none of the regular expressions are matche The same as `multiMatchAny`, but returns any index that matches the haystack. +## multiMatchAllIndices(haystack, [pattern1, pattern2, ..., patternn]) + +The same as `multiMatchAny`, but returns the array of all indicies that match the haystack in any order. + ## multiFuzzyMatchAny(haystack, distance, [pattern1, pattern2, ..., patternn]) The same as `multiMatchAny`, but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). This function is also in an experimental mode and can be extremely slow. For more information see [hyperscan documentation](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching). @@ -72,6 +76,10 @@ The same as `multiMatchAny`, but returns 1 if any pattern matches the haystack w The same as `multiFuzzyMatchAny`, but returns any index that matches the haystack within a constant edit distance. +## multiFuzzyMatchAllIndices(haystack, distance, [pattern1, pattern2, ..., patternn]) + +The same as `multiFuzzyMatchAny`, but returns the array of all indices in any order that match the haystack within a constant edit distance. + !!! note "Note" `multiFuzzyMatch*` functions do not support UTF-8 regular expressions, and such expressions are treated as bytes because of hyperscan restriction. diff --git a/docs/fa/database_engines/lazy.md b/docs/fa/database_engines/lazy.md new file mode 120000 index 00000000000..66830dcdb2f --- /dev/null +++ b/docs/fa/database_engines/lazy.md @@ -0,0 +1 @@ +../../en/database_engines/lazy.md \ No newline at end of file diff --git a/docs/ru/data_types/decimal.md b/docs/ru/data_types/decimal.md index 110eb84f31f..d6faa745723 100644 --- a/docs/ru/data_types/decimal.md +++ b/docs/ru/data_types/decimal.md @@ -42,7 +42,7 @@ При операциях между Decimal и целыми числами результатом является Decimal, аналогичный аргументу. -Операции между Decimal и Float32/64 не определены. Для осуществления таких операций нужно явно привести один из агруметнов функциями: toDecimal32, toDecimal64, toDecimal128, или toFloat32, toFloat64. Это сделано из двух соображений. Во-первых, результат операции будет с потерей точности. Во-вторых, преобразование типа - дорогая операция, из-за ее наличия пользовательский запрос может работать в несколько раз дольше. +Операции между Decimal и Float32/64 не определены. Для осуществления таких операций нужно явно привести один из аргументов функциями: toDecimal32, toDecimal64, toDecimal128, или toFloat32, toFloat64. Это сделано из двух соображений. Во-первых, результат операции будет с потерей точности. Во-вторых, преобразование типа - дорогая операция, из-за ее наличия пользовательский запрос может работать в несколько раз дольше. Часть функций над Decimal возвращают Float64 (например, var, stddev). Для некоторых из них промежуточные операции проходят в Decimal. Для таких функций результат над одинаковыми данными во Float64 и Decimal может отличаться, несмотря на одинаковый тип результата. diff --git a/docs/ru/data_types/float.md b/docs/ru/data_types/float.md index 3eb9f4b8078..eb4ada0e24c 100644 --- a/docs/ru/data_types/float.md +++ b/docs/ru/data_types/float.md @@ -7,7 +7,7 @@ - `Float32` - `float`; - `Float64` - `double`. -Рекомендуется хранить данные в целочисленноми виде всегда, когда это возможно. Например, переводите в целочисленные значения числа с фиксированной точностью, такие как денежные суммы или времена загрузки страниц в миллисекундах. +Рекомендуется хранить данные в целочисленном виде всегда, когда это возможно. Например, переводите в целочисленные значения числа с фиксированной точностью, такие как денежные суммы или времена загрузки страниц в миллисекундах. ## Особенности использования чисел с плавающей запятой diff --git a/docs/ru/data_types/nested_data_structures/nested.md b/docs/ru/data_types/nested_data_structures/nested.md index 58c7c48da3e..8b4f7d247cf 100644 --- a/docs/ru/data_types/nested_data_structures/nested.md +++ b/docs/ru/data_types/nested_data_structures/nested.md @@ -90,7 +90,7 @@ LIMIT 10 Вы не можете сделать SELECT целой вложенной структуры данных. Можно лишь явно перечислить отдельные столбцы - её составляющие. -При запросе INSERT, вы должны передать все составляющие столбцы-массивы вложенной структуры данных по-отдельности (как если бы это были отдельные столбцы-массивы). При вставке проверяется, что они имеют одинаковые длины. +При запросе INSERT, вы должны передать все составляющие столбцы-массивы вложенной структуры данных по отдельности (как если бы это были отдельные столбцы-массивы). При вставке проверяется, что они имеют одинаковые длины. При запросе DESCRIBE, столбцы вложенной структуры данных перечисляются так же по отдельности. diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index d72a11d1d49..139e69f375f 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -263,7 +263,7 @@ void executeQuery( **8.** Однострочные комментарии начинаются с трёх слешей: `///` , многострочные с `/**`. Такие комментарии считаются «документирующими». -Замечание: такие комментарии могут использоваться для генерации документации с помощью Doxygen. Но, фактически, Doxygen не используется, так как для навигации по коду гораздо удобне использовать возможности IDE. +Замечание: такие комментарии могут использоваться для генерации документации с помощью Doxygen. Но, фактически, Doxygen не используется, так как для навигации по коду гораздо удобнее использовать возможности IDE. **9.** В начале и конце многострочного комментария, не должно быть пустых строк (кроме строки, на которой закрывается многострочный комментарий). @@ -399,7 +399,7 @@ enum class CompressionMethod }; ``` -**15.** Все имена - по английски. Транслит с русского использовать нельзя. +**15.** Все имена - по-английски. Транслит с русского использовать нельзя. ```text не Stroka @@ -505,7 +505,7 @@ if (0 != close(fd)) - Сделайте функцию (`done()` или `finalize()`), которая позволяет заранее выполнить всю работу, в процессе которой может возникнуть исключение. Если эта функция была вызвана, то затем в деструкторе не должно возникать исключений. - Слишком сложную работу (например, отправку данных по сети) можно вообще не делать в деструкторе, рассчитывая, что пользователь заранее позовёт метод для завершения работы. - Если в деструкторе возникло исключение, желательно не "проглатывать" его, а вывести информацию в лог (если в этом месте доступен логгер). -- В простых программах, если соответствующие исключения не ловятся, и приводят к завершению работы с записью информации в лог, можно не беспокоиться об исключениях, вылетающих из деструкторов, так как вызов `std::terminate` (в случае `noexcept` по умолчанию в C++11), является приемлимым способом обработки исключения. +- В простых программах, если соответствующие исключения не ловятся, и приводят к завершению работы с записью информации в лог, можно не беспокоиться об исключениях, вылетающих из деструкторов, так как вызов `std::terminate` (в случае `noexcept` по умолчанию в C++11), является приемлемым способом обработки исключения. **6.** Отдельные блоки кода. @@ -630,17 +630,17 @@ Loader() {} Везде используется UTF-8. Используется `std::string`, `char *`. Не используется `std::wstring`, `wchar_t`. -**19.** Логгирование. +**19.** Логирование. См. примеры везде в коде. -Перед коммитом, удалите всё бессмысленное и отладочное логгирование, и другие виды отладочного вывода. +Перед коммитом, удалите всё бессмысленное и отладочное логирование, и другие виды отладочного вывода. -Не должно быть логгирования на каждую итерацию внутреннего цикла, даже уровня Trace. +Не должно быть логирования на каждую итерацию внутреннего цикла, даже уровня Trace. -При любом уровне логгирования, логи должно быть возможно читать. +При любом уровне логирования, логи должно быть возможно читать. -Логгирование следует использовать, в основном, только в прикладном коде. +Логирование следует использовать, в основном, только в прикладном коде. Сообщения в логе должны быть написаны на английском языке. diff --git a/docs/ru/getting_started/example_datasets/metrica.md b/docs/ru/getting_started/example_datasets/metrica.md index 096c48e9d30..3aaa4db952a 100644 --- a/docs/ru/getting_started/example_datasets/metrica.md +++ b/docs/ru/getting_started/example_datasets/metrica.md @@ -1,5 +1,6 @@ # Анонимизированные данные Яндекс.Метрики -Датасет состоит из двух таблиц, содержащих анонимизированные данные о хитах (`hits_v1`) и визитах (`visits_v1`) Яндекс.Метрики. Каждую из таблиц можно скачать в виде сжатого `.tsv.xz`-файла или в виде уже готовых партиций. + +Датасет состоит из двух таблиц, содержащих анонимизированные данные о хитах (`hits_v1`) и визитах (`visits_v1`) Яндекс.Метрики. Каждую из таблиц можно скачать в виде сжатого `.tsv.xz`-файла или в виде уже готовых партиций. Также можно скачать расширенную версию таблицы `hits`, содержащую 100 миллионов строк в виде [архива c файлами TSV](https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz) и в виде [готовых партиций](https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz). ## Получение таблиц из партиций **Скачивание и импортирование партиций hits:** diff --git a/docs/ru/getting_started/example_datasets/nyc_taxi.md b/docs/ru/getting_started/example_datasets/nyc_taxi.md index 1b23636c5c8..5f8935b14ef 100644 --- a/docs/ru/getting_started/example_datasets/nyc_taxi.md +++ b/docs/ru/getting_started/example_datasets/nyc_taxi.md @@ -373,7 +373,7 @@ Q3: 0.051 sec. Q4: 0.072 sec. В этом случае, время выполнения запросов определяется в первую очередь сетевыми задержками. -Мы выполняли запросы с помощью клиента, расположенного в датацентре Яндекса в Мянтсяля (Финляндия), на кластер в России, что добавляет порядка 20 мс задержки. +Мы выполняли запросы с помощью клиента, расположенного в дата-центре Яндекса в Мянтсяля (Финляндия), на кластер в России, что добавляет порядка 20 мс задержки. ## Резюме diff --git a/docs/ru/getting_started/index.md b/docs/ru/getting_started/index.md index 822955df9eb..c03ac58f24b 100644 --- a/docs/ru/getting_started/index.md +++ b/docs/ru/getting_started/index.md @@ -61,11 +61,11 @@ $ sudo yum install clickhouse-server clickhouse-client Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты. -### Из исходникого кода +### Из исходного кода Для компиляции ClickHouse вручную, используйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build_osx.md). -Можно скомпилировать пакеты и установить их, либо использовать программы без установки пакетов. Также при ручой сборке можно отключить необходимость поддержки набора инструкций SSE 4.2 или собрать под процессоры архитектуры AArch64. +Можно скомпилировать пакеты и установить их, либо использовать программы без установки пакетов. Также при ручной сборке можно отключить необходимость поддержки набора инструкций SSE 4.2 или собрать под процессоры архитектуры AArch64. ```text Client: dbms/programs/clickhouse-client diff --git a/docs/ru/guides/apply_catboost_model.md b/docs/ru/guides/apply_catboost_model.md new file mode 100644 index 00000000000..9f93aacbd22 --- /dev/null +++ b/docs/ru/guides/apply_catboost_model.md @@ -0,0 +1,230 @@ +# Применение модели CatBoost в ClickHouse {#applying-catboost-model-in-clickhouse} + +[CatBoost](https://catboost.ai) — открытая программная библиотека разработанная компанией [Яндекс](https://yandex.ru/company/) для машинного обучения, которая использует схему градиентного бустинга. + +С помощью этой инструкции вы научитесь применять предобученные модели в ClickHouse: в результате вы запустите вывод модели из SQL. + +Чтобы применить модель CatBoost в ClickHouse: + +1. [Создайте таблицу](#create-table). +2. [Вставьте данные в таблицу](#insert-data-to-table). +3. [Интегрируйте CatBoost в ClickHouse](#integrate-catboost-into-clickhouse) (Опциональный шаг). +4. [Запустите вывод модели из SQL](#run-model-inference). + +Подробнее об обучении моделей в CatBoost, см. [Обучение и применение моделей](https://catboost.ai/docs/features/training.html#training). + +## Перед началом работы {#prerequisites} + +Если у вас еще нет [Docker](https://docs.docker.com/install/), установите его. + +!!! note "Примечание" + [Docker](https://www.docker.com) – это программная платформа для создания контейнеров, которые изолируют установку CatBoost и ClickHouse от остальной части системы. + +Перед применением модели CatBoost: + +**1.** Скачайте [Docker-образ](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) из реестра: + +```bash +$ docker pull yandex/tutorial-catboost-clickhouse +``` + +Данный Docker-образ содержит все необходимое для запуска CatBoost и ClickHouse: код, среду выполнения, библиотеки, переменные окружения и файлы конфигурации. + +**2.** Проверьте, что Docker-образ успешно скачался: + +```bash +$ docker image ls +REPOSITORY TAG IMAGE ID CREATED SIZE +yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB +``` + +**3.** Запустите Docker-контейнер основанный на данном образе: + +```bash +$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse +``` + +## 1. Создайте таблицу {#create-table} + +Чтобы создать таблицу для обучающей выборки: + +**1.** Запустите клиент ClickHouse: + +```bash +$ clickhouse client +``` + +!!! note "Примечание" + Сервер ClickHouse уже запущен внутри Docker-контейнера. + +**2.** Создайте таблицу в ClickHouse с помощью следующей команды: + +```sql +:) CREATE TABLE amazon_train +( + date Date MATERIALIZED today(), + ACTION UInt8, + RESOURCE UInt32, + MGR_ID UInt32, + ROLE_ROLLUP_1 UInt32, + ROLE_ROLLUP_2 UInt32, + ROLE_DEPTNAME UInt32, + ROLE_TITLE UInt32, + ROLE_FAMILY_DESC UInt32, + ROLE_FAMILY UInt32, + ROLE_CODE UInt32 +) +ENGINE = MergeTree() +``` + +**3.** Выйдите из клиента ClickHouse: + +```sql +:) exit +``` + +## 2. Вставьте данные в таблицу {#insert-data-to-table} + +Чтобы вставить данные: + +**1.** Выполните следующую команду: + +```bash +$ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv +``` + +**2.** Запустите клиент ClickHouse: + +```bash +$ clickhouse client +``` + +**3.** Проверьте, что данные успешно загрузились: + +```sql +:) SELECT count() FROM amazon_train + +SELECT count() +FROM amazon_train + ++-count()-+ +| 65538 | ++---------+ +``` + +## 3. Интегрируйте CatBoost в ClickHouse {#integrate-catboost-into-clickhouse} + +!!! note "Примечание" + **Опциональный шаг.** Docker-образ содержит все необходимое для запуска CatBoost и ClickHouse. + +Чтобы интегрировать CatBoost в ClickHouse: + +**1.** Создайте библиотеку для оценки модели. + +Наиболее быстрый способ оценить модель CatBoost — это скомпилировать библиотеку `libcatboostmodel.`. Подробнее о том, как скомпилировать библиотеку, читайте в [документации CatBoost](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). + +**2.** Создайте в любом месте новую директорию с произвольным названием, например `data` и поместите в нее созданную библиотеку. Docker-образ уже содержит библиотеку `data/libcatboostmodel.so`. + +**3.** Создайте в любом месте новую директорию для конфигурации модели с произвольным названием, например `models`. + +**4.** Создайте файл конфигурации модели с произвольным названием, например `models/amazon_model.xml`. + +**5.** Опишите конфигурацию модели: + +```xml + + + + catboost + + amazon + + /home/catboost/tutorial/catboost_model.bin + + 0 + + +``` + +**6.** Добавьте в конфигурацию ClickHouse путь к CatBoost и конфигурации модели: + +```xml + +/home/catboost/data/libcatboostmodel.so +/home/catboost/models/*_model.xml +``` + +## 4. Запустите вывод модели из SQL {#run-model-inference} + +Для тестирования модели запустите клиент ClickHouse `$ clickhouse client`. + +Проверьте, что модель работает: + +```sql +:) SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) > 0 AS prediction, + ACTION AS target +FROM amazon_train +LIMIT 10 +``` + +!!! note "Примечание" + Функция [modelEvaluate](../query_language/functions/other_functions.md#function-modelevaluate) возвращает кортежи (tuple) с исходными прогнозами по классам для моделей с несколькими классами. + +Спрогнозируйте вероятность: + +```sql +:) SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) AS prediction, + 1. / (1 + exp(-prediction)) AS probability, + ACTION AS target +FROM amazon_train +LIMIT 10 +``` + +!!! note "Примечание" + Подробнее про функцию [exp()](../query_language/functions/math_functions.md). + +Посчитайте логистическую функцию потерь (LogLoss) на всей выборке: + +```sql +:) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss +FROM +( + SELECT + modelEvaluate('amazon', + RESOURCE, + MGR_ID, + ROLE_ROLLUP_1, + ROLE_ROLLUP_2, + ROLE_DEPTNAME, + ROLE_TITLE, + ROLE_FAMILY_DESC, + ROLE_FAMILY, + ROLE_CODE) AS prediction, + 1. / (1. + exp(-prediction)) AS prob, + ACTION AS tg + FROM amazon_train +) +``` + +!!! note "Примечание" + Подробнее про функции [avg()](../query_language/agg_functions/reference.md#agg_function-avg), [log()](../query_language/functions/math_functions.md). \ No newline at end of file diff --git a/docs/ru/guides/index.md b/docs/ru/guides/index.md new file mode 100644 index 00000000000..d8bad70482d --- /dev/null +++ b/docs/ru/guides/index.md @@ -0,0 +1,5 @@ +# Руководства + +Подробные пошаговые инструкции, которые помогут вам решать различные задачи с помощью ClickHouse. + +- [Применение модели CatBoost в ClickHouse](apply_catboost_model.md) \ No newline at end of file diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index a2b624c6f21..f2040c4af1b 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -15,7 +15,7 @@ Connected to ClickHouse server version 0.0.26176. ## Использование {#cli_usage} -Клиент может быть использован в интерактивном и неинтерактивном (batch) режиме. +Клиент может быть использован в интерактивном и не интерактивном (batch) режиме. Чтобы использовать batch режим, укажите параметр query, или отправьте данные в stdin (проверяется, что stdin - не терминал), или и то, и другое. Аналогично HTTP интерфейсу, при использовании одновременно параметра query и отправке данных в stdin, запрос составляется из конкатенации параметра query, перевода строки, и данных в stdin. Это удобно для больших INSERT запросов. @@ -140,4 +140,5 @@ $ clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM False ``` + [Оригинальная статья](https://clickhouse.yandex/docs/ru/interfaces/cli/) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 00fcba23300..36f7ae462c4 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -939,7 +939,7 @@ ClickHouse поддерживает настраиваемую точность Неподдержанные типы данных Parquet: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствущих полей файла в формате Parquet. При вставке данных, ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы. +Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных, ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы. ### Inserting and Selecting Data diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index a00239b1579..5ab706da67b 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -13,9 +13,10 @@ - [clickhousedb_fdw](https://github.com/Percona-Lab/clickhousedb_fdw) - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (использует [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - [pg2ch](https://github.com/mkabilov/pg2ch) + - [clickhouse_fdw](https://github.com/adjust/clickhouse_fdw) - [MSSQL](https://en.wikipedia.org/wiki/Microsoft_SQL_Server) - [ClickHouseMightrator](https://github.com/zlzforever/ClickHouseMigrator) -- Очереди ообщений +- Очереди сообщений - [Kafka](https://kafka.apache.org) - [clickhouse_sinker](https://github.com/housepower/clickhouse_sinker) (использует [Go client](https://github.com/kshvakov/clickhouse/)) - Хранилища объектов diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 1f7402fd25e..6892483565e 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -15,7 +15,7 @@ ## Снимки файловой системы -Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](table_engines/distributed.md) таблиц, используемых для запросов `SELECT`. Cнимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным. +Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](table_engines/distributed.md) таблиц, используемых для запросов `SELECT`. Снимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным. ## clickhouse-copier diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md index 3fe59c92573..da24c7e960b 100644 --- a/docs/ru/operations/monitoring.md +++ b/docs/ru/operations/monitoring.md @@ -17,18 +17,18 @@ ClickHouse не отслеживает состояние аппаратных - Использование системы хранения, оперативной памяти и сети. -## Метрики сервера ClickHouse. +## Метрики сервера ClickHouse Сервер ClickHouse имеет встроенные инструменты мониторинга. -Для отслеживания событий на сервере используйте логи. Подробнее смотрите в разделе конфигурационного файла [logger](#server_settings-logger). +Для отслеживания событий на сервере используйте логи. Подробнее смотрите в разделе конфигурационного файла [logger](server_settings/settings.md#server_settings-logger). ClickHouse собирает: - Различные метрики того, как сервер использует вычислительные ресурсы. - Общую статистику обработки запросов. -Метрики находятся в таблицах [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics). +Метрики находятся в таблицах [system.metrics](system_tables.md#system_tables-metrics), [system.events](system_tables.md#system_tables-events) и [system.asynchronous_metrics](system_tables.md#system_tables-asynchronous_metrics). Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server_settings/settings.md#server_settings-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html). diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index fa90d61c876..e72d97bdc46 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -130,7 +130,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ## graphite {#server_settings-graphite} -Отправка даных в [Graphite](https://github.com/graphite-project). +Отправка данных в [Graphite](https://github.com/graphite-project). Настройки: @@ -139,10 +139,10 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat - interval - Период отправки в секундах. - timeout - Таймаут отправки данных в секундах. - root_path - Префикс для ключей. -- metrics - Отправка данных из таблицы :ref:`system_tables-system.metrics`. -- events - Отправка дельты данных, накопленной за промежуток времени из таблицы :ref:`system_tables-system.events` -- events_cumulative - Отправка суммарных данных из таблицы :ref:`system_tables-system.events` -- asynchronous_metrics - Отправка данных из таблицы :ref:`system_tables-system.asynchronous_metrics`. +- metrics - Отправка данных из таблицы [system.metrics](../system_tables.md#system_tables-metrics). +- events - Отправка дельты данных, накопленной за промежуток времени из таблицы [system.events](../system_tables.md#system_tables-events). +- events_cumulative - Отправка суммарных данных из таблицы [system.events](../system_tables.md#system_tables-events). +- asynchronous_metrics - Отправка данных из таблицы [system.asynchronous_metrics](../system_tables.md#system_tables-asynchronous_metrics). Можно определить несколько секций ``, например, для передачи различных данных с различной частотой. @@ -213,7 +213,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat **Пример** -Показывает `https://tabix.io/` при обращенинии к `http://localhost:http_port`. +Показывает `https://tabix.io/` при обращении к `http://localhost:http_port`. ```xml @@ -305,11 +305,11 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ## logger {#server_settings-logger} -Настройки логгирования. +Настройки логирования. Ключи: -- level - Уровень логгирования. Допустимые значения: ``trace``, ``debug``, ``information``, ``warning``, ``error``. +- level - Уровень логирования. Допустимые значения: ``trace``, ``debug``, ``information``, ``warning``, ``error``. - log - Файл лога. Содержит все записи согласно ``level``. - errorlog - Файл лога ошибок. - size - Размер файла. Действует для ``log`` и ``errorlog``. Как только файл достиг размера ``size``, ClickHouse архивирует и переименовывает его, а на его месте создает новый файл лога. @@ -407,7 +407,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat По умолчанию - `maximum`. -Рекомендуется использовать в Mac OS X, поскольу функция `getrlimit()` возвращает некорректное значение. +Рекомендуется использовать в Mac OS X, поскольку функция `getrlimit()` возвращает некорректное значение. **Пример** @@ -458,25 +458,25 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat Ключи настроек сервера/клиента: -- privateKeyFile - Путь к файлу с секретным ключем сертификата в формате PEM. Файл может содержать ключ и сертификат одновременно. +- privateKeyFile - Путь к файлу с секретным ключом сертификата в формате PEM. Файл может содержать ключ и сертификат одновременно. - certificateFile - Путь к файлу сертификата клиента/сервера в формате PEM. Можно не указывать, если ``privateKeyFile`` содержит сертификат. - caConfig - Путь к файлу или каталогу, которые содержат доверенные корневые сертификаты. - verificationMode - Способ проверки сертификатов узла. Подробности находятся в описании класса [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h). Допустимые значения: ``none``, ``relaxed``, ``strict``, ``once``. -- verificationDepth - Максимальная длина верификационой цепи. Верификация завершится ошибкой, если длина цепи сертификатов превысит установленное значение. +- verificationDepth - Максимальная длина верификационной цепи. Верификация завершится ошибкой, если длина цепи сертификатов превысит установленное значение. - loadDefaultCAFile - Признак того, что будут использоваться встроенные CA-сертификаты для OpenSSL. Допустимые значения: ``true``, ``false``. | - cipherList - Поддерживаемые OpenSSL-шифры. Например, ``ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH``. - cacheSessions - Включение/выключение кеширования сессии. Использовать обязательно вместе с ``sessionIdContext``. Допустимые значения: ``true``, ``false``. - sessionIdContext - Уникальный набор произвольных символов, которые сервер добавляет к каждому сгенерированному идентификатору. Длина строки не должна превышать ``SSL_MAX_SSL_SESSION_ID_LENGTH``. Рекомендуется к использованию всегда, поскольку позволяет избежать проблем как в случае, если сервер кеширует сессию, так и если клиент затребовал кеширование. По умолчанию ``${application.name}``. - sessionCacheSize - Максимальное количество сессий, которые кэширует сервер. По умолчанию - 1024\*20. 0 - неограниченное количество сессий. -- sessionTimeout - Время кеширования сессии на севрере. +- sessionTimeout - Время кеширования сессии на сервере. - extendedVerification - Автоматическая расширенная проверка сертификатов после завершении сессии. Допустимые значения: ``true``, ``false``. - requireTLSv1 - Требование соединения TLSv1. Допустимые значения: ``true``, ``false``. - requireTLSv1_1 - Требование соединения TLSv1.1. Допустимые значения: ``true``, ``false``. - requireTLSv1_2 - Требование соединения TLSv1.2. Допустимые значения: ``true``, ``false``. - fips - Активация режима OpenSSL FIPS. Поддерживается, если версия OpenSSL, с которой собрана библиотека поддерживает fips. - privateKeyPassphraseHandler - Класс (подкласс PrivateKeyPassphraseHandler)запрашивающий кодовую фразу доступа к секретному ключу. Например, ````, ``KeyFileHandler``, ``test``, ````. -- invalidCertificateHandler - Класс (подкласс CertificateHandler) для подтвеждения невалидных сертификатов. Например, `` ConsoleCertificateHandler ``. -- disableProtocols - Запрещенные к искользованию протоколы. +- invalidCertificateHandler - Класс (подкласс CertificateHandler) для подтверждения не валидных сертификатов. Например, `` ConsoleCertificateHandler ``. +- disableProtocols - Запрещенные к использованию протоколы. - preferServerCiphers - Предпочтение серверных шифров на клиенте. **Пример настройки:** @@ -512,11 +512,11 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ## part_log {#server_settings-part-log} -Логгирование событий, связанных с данными типа [MergeTree](../../operations/table_engines/mergetree.md). Например, события добавления или мержа данных. Лог можно использовать для симуляции алгоритмов слияния, чтобы сравнивать их характеристики. Также, можно визуализировать процесс слияния. +Логирование событий, связанных с данными типа [MergeTree](../../operations/table_engines/mergetree.md). Например, события добавления или мержа данных. Лог можно использовать для симуляции алгоритмов слияния, чтобы сравнивать их характеристики. Также, можно визуализировать процесс слияния. -Запросы логгируются не в отдельный файл, а в таблицу [system.part_log](../system_tables.md#system_tables-part-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). +Запросы логируются не в отдельный файл, а в таблицу [system.part_log](../system_tables.md#system_tables-part-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). -При настройке логгирования используются следующие параметры: +При настройке логирования используются следующие параметры: - `database` — имя базы данных; - `table` — имя таблицы; @@ -650,7 +650,7 @@ TCP порт для защищённого обмена данными с кли Размер кеша (в байтах) для несжатых данных, используемых движками таблиц семейства [MergeTree](../../operations/table_engines/mergetree.md). -Кеш единый для сервера. Память выделяется по-требованию. Кеш используется в том случае, если включена опция [use_uncompressed_cache](../settings/settings.md). +Кеш единый для сервера. Память выделяется по требованию. Кеш используется в том случае, если включена опция [use_uncompressed_cache](../settings/settings.md). Несжатый кеш выгодно использовать для очень коротких запросов в отдельных случаях. diff --git a/docs/ru/operations/settings/index.md b/docs/ru/operations/settings/index.md index b9b211e9394..9ace3824ac4 100644 --- a/docs/ru/operations/settings/index.md +++ b/docs/ru/operations/settings/index.md @@ -12,11 +12,11 @@ - Настройки для сессии. Из консольного клиента ClickHouse в интерактивном режиме отправьте запрос `SET setting=value`. - Аналогично можно использовать ClickHouse-сессии в HTTP-протоколе, для этого необходимо указывать HTTP-праметр `session_id`. + Аналогично можно использовать ClickHouse-сессии в HTTP-протоколе, для этого необходимо указывать HTTP-параметр `session_id`. - Настройки для запроса. - - При запуске консольного клиента ClickHouse в неинтерактивном режиме установите параметр запуска `--setting=value`. + - При запуске консольного клиента ClickHouse в не интерактивном режиме установите параметр запуска `--setting=value`. - При использовании HTTP API передавайте cgi-параметры (`URL?setting_1=value&setting_2=value...`). Настройки, которые можно задать только в конфигурационном файле сервера, в разделе не рассматриваются. diff --git a/docs/ru/operations/settings/query_complexity.md b/docs/ru/operations/settings/query_complexity.md index 864764add48..d1372d8c38f 100644 --- a/docs/ru/operations/settings/query_complexity.md +++ b/docs/ru/operations/settings/query_complexity.md @@ -3,7 +3,7 @@ Ограничения на сложность запроса - часть настроек. Используются, чтобы обеспечить более безопасное исполнение запросов из пользовательского интерфейса. Почти все ограничения действуют только на SELECT-ы. -При распределённой обработке запроса, ограничения действуют на каждом сервере по-отдельности. +При распределённой обработке запроса, ограничения действуют на каждом сервере по отдельности. Ограничения проверяются на каждый блок обработанных данных, а не на каждую строку. В связи с этим, ограничения могут быть превышены на размер блока. @@ -53,7 +53,7 @@ ## max_rows_to_read Следующие ограничения могут проверяться на каждый блок (а не на каждую строку). То есть, ограничения могут быть немного нарушены. -При выполнении запроса в несколько потоков, следующие ограничения действуют в каждом потоке по-отдельности. +При выполнении запроса в несколько потоков, следующие ограничения действуют в каждом потоке по отдельности. Максимальное количество строчек, которое можно прочитать из таблицы при выполнении запроса. @@ -257,7 +257,7 @@ **Смотрите также** - [Секция JOIN](../../query_language/select.md#select-join) -- [Движо таблиц Join](../table_engines/join.md) +- [Движок таблиц Join](../table_engines/join.md) ## max_partitions_per_insert_block diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index ef541c16c0a..90803a7d27e 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -207,6 +207,7 @@ Ok. - [JSONEachRow](../../interfaces/formats.md#jsoneachrow) - [CSV](../../interfaces/formats.md#csv) +- [TabSeparated](../../interfaces/formats.md#tabseparated) !!! note "Примечание" Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность. @@ -220,7 +221,7 @@ Ok. ## input_format_null_as_default {#settings-input_format_null_as_default} -Включает или отключает использование значений по-умолчанию в случаях, когда во входных данных содержится `NULL`, но тип соответствующего столбца не `Nullable(T)` (для фомата CSV). +Включает или отключает использование значений по умолчанию в случаях, когда во входных данных содержится `NULL`, но тип соответствующего столбца не `Nullable(T)` (для текстовых форматов). ## input_format_skip_unknown_fields {#settings-input_format_skip_unknown_fields} @@ -375,7 +376,7 @@ Ok. ## preferred_block_size_bytes -Служит для тех же целей что и `max_block_size`, но задает рекомедуемый размер блоков в байтах, выбирая адаптивное количество строк в блоке. +Служит для тех же целей что и `max_block_size`, но задает рекомендуемый размер блоков в байтах, выбирая адаптивное количество строк в блоке. При этом размер блока не может быть более `max_block_size` строк. По умолчанию: 1,000,000. Работает только при чтении из MergeTree-движков. @@ -445,9 +446,9 @@ ClickHouse использует этот параметр при чтении д ## log_queries {#settings-log-queries} -Установка логгирования запроса. +Установка логирования запроса. -Запросы, переданные в ClickHouse с этой установкой, логгируются согласно правилам конфигурационного параметра сервера [query_log](../server_settings/settings.md#server_settings-query-log). +Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../server_settings/settings.md#server_settings-query-log). **Пример** : @@ -469,7 +470,7 @@ ClickHouse использует этот параметр при чтении д Отключает отстающие реплики при распределенных запросах. Смотрите "[Репликация](../../operations/table_engines/replication.md)". -Устанавливает время в секундах. Если оставание реплики больше установленного значения, то реплика не используется. +Устанавливает время в секундах. Если отставание реплики больше установленного значения, то реплика не используется. Значение по умолчанию: 300. @@ -643,9 +644,9 @@ load_balancing = in_order load_balancing = first_or_random ``` -Алгоритм выбирает первую реплику или случайную реплику, если первая недоступна. Он эффективен в топологиях с перекрестной репликацей, но бесполезен в других конфигурациях. +Алгоритм выбирает первую реплику или случайную реплику, если первая недоступна. Он эффективен в топологиях с перекрестной репликацией, но бесполезен в других конфигурациях. -Алгоритм `first or random` решает проблему алгоритма `in order`. При использовании `in order`, если одна реплика перестаёт ответчать, то следующая за ней принимает двойную нагрузку, в то время как все остальные обрабатываю свой обычный трафик. Алгоритм `first or random` равномерно распределяет нагрузку между репликами. +Алгоритм `first or random` решает проблему алгоритма `in order`. При использовании `in order`, если одна реплика перестаёт отвечать, то следующая за ней принимает двойную нагрузку, в то время как все остальные обрабатываю свой обычный трафик. Алгоритм `first or random` равномерно распределяет нагрузку между репликами. ## prefer_localhost_replica {#settings-prefer_localhost_replica} diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index ee50dfddce9..eb757480774 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -77,7 +77,7 @@ user String — имя пользователя, которого использ - `marks_bytes` (UInt64) — размер засечек в байтах. - `comment` (String) — комментарий к столбцу или пустая строка. - `is_in_partition_key` (UInt8) — флаг, показывающий включение столбца в ключ партиционирования. -- `is_in_sorting_key` (UInt8) — флаг, показываюший включение столбца в ключ сортировки. +- `is_in_sorting_key` (UInt8) — флаг, показывающий включение столбца в ключ сортировки. - `is_in_primary_key` (UInt8) — флаг, показывающий включение столбца в первичный ключ. - `is_in_sampling_key` (UInt8) — флаг, показывающий включение столбца в ключ выборки. @@ -242,6 +242,8 @@ SELECT * FROM system.events LIMIT 5 - `value` ([Int64](../data_types/int_uint.md)) — значение метрики. - `description` ([String](../data_types/string.md)) — описание метрики. +Список поддержанных метрик смотрите в файле [dbms/src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/src/Common/CurrentMetrics.cpp). + **Пример** ```sql @@ -394,9 +396,9 @@ query_id String - идентификатор запроса, если !!! note "Внимание" Таблица не содержит входных данных для запросов `INSERT`. -ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query_log](server_settings/settings.md#server_settings-query-log). Параметр задаёт правила ведения лога, такие как интервал логгирования или имя таблицы, в которую будут логгироваться запросы. +ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query_log](server_settings/settings.md#server_settings-query-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. -Чтобы включить логгирование, задайте значение параметра [log_queries](settings/settings.md#settings-log-queries) равным 1. Подробности смотрите в разделе [Настройки](settings/settings.md). +Чтобы включить логирование, задайте значение параметра [log_queries](settings/settings.md#settings-log-queries) равным 1. Подробности смотрите в разделе [Настройки](settings/settings.md). Таблица `system.query_log` содержит информацию о двух видах запросов: @@ -459,7 +461,7 @@ ClickHouse создаёт таблицу только в том случае, к - Количество сетевых ошибок. - Время, потраченное на ожидание, когда пропускная способность сети ограничена. - `ProfileEvents.Values` (Array(UInt64)) — метрики, перечисленные в столбце `ProfileEvents.Names`. -- `Settings.Names` (Array(String)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логгирование изменений настроек, установите параметр `log_query_settings` равным 1. +- `Settings.Names` (Array(String)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. - `Settings.Values` (Array(String)) — Значения настроек, которые перечислены в столбце `Settings.Names`. Каждый запрос создаёт одну или две строки в таблице `query_log`, в зависимости от статуса запроса: @@ -468,7 +470,7 @@ ClickHouse создаёт таблицу только в том случае, к 2. Если во время обработки запроса произошла ошибка, создаются два события с типами 1 и 4. 3. Если ошибка произошла до запуска запроса, создается одно событие с типом 3. -По умолчанию, строки добавляются в таблицу логгирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query_log](server_settings/settings.md#server_settings-query-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. +По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query_log](server_settings/settings.md#server_settings-query-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. Если таблицу удалить вручную, она пересоздастся автоматически "на лету". При этом все логи на момент удаления таблицы будут удалены. diff --git a/docs/ru/operations/table_engines/distributed.md b/docs/ru/operations/table_engines/distributed.md index ecb409a49d4..ceea785d84e 100644 --- a/docs/ru/operations/table_engines/distributed.md +++ b/docs/ru/operations/table_engines/distributed.md @@ -65,7 +65,7 @@ logs - имя кластера в конфигурационном файле с - `port` - TCP-порт для межсерверного взаимодействия (в конфиге - tcp_port, обычно 9000). Не перепутайте с http_port. - `user` - имя пользователя для соединения с удалённым сервером. по умолчанию - default. Этот пользователь должен иметь доступ для соединения с указанным сервером. Доступы настраиваются в файле users.xml, подробнее смотрите в разделе [Права доступа](../../operations/access_rights.md). - `password` - пароль для соединения с удалённым сервером, в открытом виде. по умолчанию - пустая строка. - - `secure` - Использовать шифрованое соединение ssl, Обычно используется с портом `port` = 9440. Сервер должен слушать порт 9440 с корректными настройками сертификатов. + - `secure` - Использовать шифрованное соединение ssl, Обычно используется с портом `port` = 9440. Сервер должен слушать порт 9440 с корректными настройками сертификатов. - `compression` - Использовать сжатие данных. По умолчанию: true. diff --git a/docs/ru/operations/table_engines/external_data.md b/docs/ru/operations/table_engines/external_data.md index 358ad7ff9c9..5d297b246b0 100644 --- a/docs/ru/operations/table_engines/external_data.md +++ b/docs/ru/operations/table_engines/external_data.md @@ -6,7 +6,7 @@ ClickHouse позволяет отправить на сервер данные, Если вам нужно будет выполнить более одного запроса с достаточно большими внешними данными - лучше не использовать эту функциональность, а загрузить данные в БД заранее. -Внешние данные могут быть загружены как с помощью клиента командной строки (в неинтерактивном режиме), так и через HTTP-интерфейс. +Внешние данные могут быть загружены как с помощью клиента командной строки (в не интерактивном режиме), так и через HTTP-интерфейс. В клиенте командной строки, может быть указана секция параметров вида diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 61bdbc76457..931a969f076 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -1,6 +1,6 @@ # MergeTree {#table_engines-mergetree} -Движок `MergeTree`, а также другие движки этого семейства (`*MergeTree`) — это наиболее функциональные движки таблиц ClickHousе. +Движок `MergeTree`, а также другие движки этого семейства (`*MergeTree`) — это наиболее функциональные движки таблиц ClickHouse. Основная идея, заложенная в основу движков семейства `MergeTree` следующая. Когда у вас есть огромное количество данных, которые должны быть вставлены в таблицу, вы должны быстро записать их по частям, а затем объединить части по некоторым правилам в фоновом режиме. Этот метод намного эффективнее, чем постоянная перезапись данных в хранилище при вставке. @@ -74,7 +74,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Должно зависеть от столбца `Date` или `DateTime` и возвращать столбец `Date` или `DateTime`. Пример:`TTL date + INTERVAL 1 DAY` - Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](mergetree.md) + Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl) - `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`: @@ -465,7 +465,7 @@ ALTER TABLE example_table ### Конфигурация {#table_engine-mergetree-multiple-volumes_configure} -Диски, тома и политики хранения задаются в корне конфигурации (внутри тега ``) в основном файле `config.xml` или в отдельном файле в директории `config.d`. Правила составления данной секции конфигурации имеет следующую структуру: +Диски, тома и политики хранения задаются внутри тега `` в основном файле `config.xml` или в отдельном файле в директории `config.d`. Правила составления данной секции конфигурации имеет следующую структуру: ```xml @@ -514,9 +514,9 @@ ALTER TABLE example_table disk1 - - 0.2 + + 0.2 ``` @@ -532,7 +532,7 @@ ALTER TABLE example_table Порядок томов в политиках хранения важен, при достижении условий на переполнение тома данные переносятся на следующий. Порядок дисков в томах так же важен, данные пишутся по очереди на каждый из них. -После задания конфигурации дисков и политик хранения их можно использовать, как настройку при создании таблиц: +После задания конфигурации политик хранения их можно использовать, как настройку при создании таблиц: ```sql CREATE TABLE table_with_non_default_policy ( diff --git a/docs/ru/operations/table_engines/replication.md b/docs/ru/operations/table_engines/replication.md index 3076225feb3..61a5cf3b56f 100644 --- a/docs/ru/operations/table_engines/replication.md +++ b/docs/ru/operations/table_engines/replication.md @@ -61,9 +61,9 @@ Блоки данных дедуплицируются. При многократной записи одного и того же блока данных (блоков данных одинакового размера, содержащих одни и те же строчки в одном и том же порядке), блок будет записан только один раз. Это сделано для того, чтобы в случае сбоя в сети, когда клиентское приложение не может понять, были ли данные записаны в БД, можно было просто повторить запрос `INSERT`. При этом не имеет значения, на какую реплику будут отправлены INSERT-ы с одинаковыми данными. Запрос `INSERT` идемпотентный. Параметры дедуплицирования регулируются настройками сервера [merge_tree](../server_settings/settings.md#server_settings-merge_tree) -При репликации, по сети передаются только исходные вставляемые данные. Дальнейшие преобразования данных (слияния) координируются и делаются на всех репликах одинаковым образом. За счёт этого минимизируется использование сети, и благодаря этому, репликация хорошо работает при расположении реплик в разных датацентрах. (Стоит заметить, что дублирование данных в разных датацентрах, по сути, является основной задачей репликации). +При репликации, по сети передаются только исходные вставляемые данные. Дальнейшие преобразования данных (слияния) координируются и делаются на всех репликах одинаковым образом. За счёт этого минимизируется использование сети, и благодаря этому, репликация хорошо работает при расположении реплик в разных дата-центрах. (Стоит заметить, что дублирование данных в разных дата-центрах, по сути, является основной задачей репликации). -Количество реплик одних и тех же данных может быть произвольным. В Яндекс.Метрике в продакшене используется двухкратная репликация. На каждом сервере используется RAID-5 или RAID-6, в некоторых случаях RAID-10. Это является сравнительно надёжным и удобным для эксплуатации решением. +Количество реплик одних и тех же данных может быть произвольным. В Яндекс.Метрике в продакшене используется двукратная репликация. На каждом сервере используется RAID-5 или RAID-6, в некоторых случаях RAID-10. Это является сравнительно надёжным и удобным для эксплуатации решением. Система следит за синхронностью данных на репликах и умеет восстанавливаться после сбоя. Восстановление после сбоя автоматическое (в случае небольших различий в данных) или полуавтоматическое (когда данные отличаются слишком сильно, что может свидетельствовать об ошибке конфигурации). diff --git a/docs/ru/operations/table_engines/summingmergetree.md b/docs/ru/operations/table_engines/summingmergetree.md index 05d20ba60a6..f1fbdac92f4 100644 --- a/docs/ru/operations/table_engines/summingmergetree.md +++ b/docs/ru/operations/table_engines/summingmergetree.md @@ -31,7 +31,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] **Секции запроса** -При создании таблицы `SummingMergeTree` использутся те же [секции](mergetree.md) запроса, что и при создании таблицы `MergeTree`. +При создании таблицы `SummingMergeTree` используются те же [секции](mergetree.md) запроса, что и при создании таблицы `MergeTree`.
Устаревший способ создания таблицы diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md index 7803f5f9141..821d7d735aa 100644 --- a/docs/ru/operations/tips.md +++ b/docs/ru/operations/tips.md @@ -11,7 +11,7 @@ $ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gov ## Ограничение CPU Процессоры могут перегреваться. С помощью `dmesg` можно увидеть, если тактовая частота процессора была ограничена из-за перегрева. -Также ограничение может устанавливаться снаружи на уровне датацентра. С помощью `turbostat` можно за этим наблюдать под нагрузкой. +Также ограничение может устанавливаться снаружи на уровне дата-центра. С помощью `turbostat` можно за этим наблюдать под нагрузкой. ## Оперативная память diff --git a/docs/ru/operations/troubleshooting.md b/docs/ru/operations/troubleshooting.md index 844bd24fc37..bdde1b21d8f 100644 --- a/docs/ru/operations/troubleshooting.md +++ b/docs/ru/operations/troubleshooting.md @@ -128,7 +128,7 @@ $ curl 'http://localhost:8123/' --data-binary "SELECT a" Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there are no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception ``` -Если вы запускаете `clickhouse-client` c параметром `stack-trace`, то ClickHouse возвращает описание ошибки и соответствущий стек вызовов функций на сервере. +Если вы запускаете `clickhouse-client` c параметром `stack-trace`, то ClickHouse возвращает описание ошибки и соответствующий стек вызовов функций на сервере. Может появиться сообщение о разрыве соединения. В этом случае необходимо повторить запрос. Если соединение прерывается каждый раз при выполнении запроса, следует проверить журналы сервера на наличие ошибок. diff --git a/docs/ru/query_language/agg_functions/index.md b/docs/ru/query_language/agg_functions/index.md index fad195991db..54d1bdb6f73 100644 --- a/docs/ru/query_language/agg_functions/index.md +++ b/docs/ru/query_language/agg_functions/index.md @@ -5,7 +5,7 @@ ClickHouse поддерживает также: - [Параметрические агрегатные функции](parametric_functions.md#aggregate_functions_parametric), которые помимо столбцов принимаю и другие параметры. -- [Комбинаторы](combinators.md#aggregate_functions_combinators), которые изменяют поведение агрегатных фунций. +- [Комбинаторы](combinators.md#aggregate_functions_combinators), которые изменяют поведение агрегатных функций. ## Обработка NULL @@ -40,7 +40,7 @@ SELECT sum(y) FROM t_null_big Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`. -Теперь с помощью фукции `groupArray` сформируем массив из стобца `y`: +Теперь с помощью функции `groupArray` сформируем массив из столбца `y`: ```sql SELECT groupArray(y) FROM t_null_big diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/query_language/agg_functions/parametric_functions.md index b0ece3ced11..62c5181a42e 100644 --- a/docs/ru/query_language/agg_functions/parametric_functions.md +++ b/docs/ru/query_language/agg_functions/parametric_functions.md @@ -182,7 +182,7 @@ ORDER BY level Рекомендуется использовать для маленьких N - до 10. Максимальное значение N - 100. Для состояния агрегатной функции используется количество оперативки равное 1 + N \* размер одного значения байт. -Для строк запоминается некриптографический хэш, имеющий размер 8 байт. То есть, для строк вычисление приближённое. +Для строк запоминается не криптографический хэш, имеющий размер 8 байт. То есть, для строк вычисление приближённое. Функция также работает для нескольких аргументов. diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md index 66d97c8b7ee..a48f510faa4 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/query_language/agg_functions/reference.md @@ -509,7 +509,7 @@ FROM ( ## timeSeriesGroupRateSum(uid, ts, val) {#agg_function-timeseriesgroupratesum} Аналогично timeSeriesGroupRateSum, timeSeriesGroupRateSum будет вычислять производные по timestamp для рядов, а затем суммировать полученные производные для всех рядов для одного значения timestamp. -Также ряды должны быть отсотированы по возрастанию timestamp. +Также ряды должны быть отсортированы по возрастанию timestamp. Для пример из описания timeSeriesGroupRateSum результат будет следующим: @@ -592,7 +592,7 @@ uniqCombined(HLL_precision)(x[, ...]) - Результат детерминирован (не зависит от порядка выполнения запроса). -!! note "Note" +!!! note "Note" Так как используется 32-битный хэш для не-`String` типов, результат будет иметь очень очень большую ошибку для количества разичных элементов существенно больше `UINT_MAX` (ошибка быстро растёт начиная с нескольких десятков миллиардов различных значений), таким образом в этом случае нужно использовать [uniqCombined64](#agg_function-uniqcombined64) По сравнению с функцией [uniq](#agg_function-uniq), `uniqCombined`: @@ -898,7 +898,7 @@ quantileTiming(level)(expr) В противном случае, результат рассчетов округляется до ближайшего числа, кратного 16мс. -!! note "Примечание" +!!! note "Примечание" Для расчёта квантилей времени загрузки страниц, функция работает эффективней и с более высокой точностью, чем функция [quantile](#agg_function-quantile). **Возвращаемое значение** @@ -1083,7 +1083,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD') ### Использование {#agg_functions-stochasticlinearregression-usage} -`stochasticLinearRegression` используется на двух этапах: постоение модели и предсказание новых данных. Чтобы постоить модель и сохранить её состояние для дальнейшего использования, мы используем комбинатор `-State`. +`stochasticLinearRegression` используется на двух этапах: построение модели и предсказание новых данных. Чтобы построить модель и сохранить её состояние для дальнейшего использования, мы используем комбинатор `-State`. Для прогнозирования мы используем функцию [evalMLMethod](../functions/machine_learning_functions.md#machine_learning_methods-evalmlmethod), которая принимает в качестве аргументов состояние и свойства для прогнозирования. diff --git a/docs/ru/query_language/create.md b/docs/ru/query_language/create.md index 81994580022..71fe71abbf2 100644 --- a/docs/ru/query_language/create.md +++ b/docs/ru/query_language/create.md @@ -52,10 +52,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine] Создаёт таблицу с такой же структурой, как другая таблица. Можно указать другой движок для таблицы. Если движок не указан, то будет выбран такой же движок, как у таблицы `db2.name2`. ```sql -CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_fucntion() +CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function() ``` -Создаёт таблицу с такой же структурой и данными, как результат соотвествующей табличной функцией. +Создаёт таблицу с такой же структурой и данными, как результат соответствующей табличной функцией. ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... diff --git a/docs/ru/query_language/dicts/external_dicts_dict_layout.md b/docs/ru/query_language/dicts/external_dicts_dict_layout.md index 22fd6ec0d91..e038a5c0610 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_layout.md +++ b/docs/ru/query_language/dicts/external_dicts_dict_layout.md @@ -6,7 +6,7 @@ Размещение с кэшированием не рекомендуется использовать из-за потенциально низкой производительности и сложностей в подборе оптимальных параметров. Читайте об этом подробнее в разделе "[cache](#cache)". -Повысить производительнось словарей можно следующими способами: +Повысить производительность словарей можно следующими способами: - Вызывать функцию для работы со словарём после `GROUP BY`. - Помечать извлекаемые атрибуты как инъективные. Атрибут называется инъективным, если разным ключам соответствуют разные значения атрибута. Тогда при использовании в `GROUP BY` функции, достающей значение атрибута по ключу, эта функция автоматически выносится из `GROUP BY`. @@ -281,6 +281,6 @@ dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) Никакие другие типы не поддерживаются. Функция возвращает атрибут для префикса, соответствующего данному IP-адресу. Если есть перекрывающиеся префиксы, возвращается наиболее специфический. -Данные хранятся в побитовом дереве (`trie`), он должены полностью помещаться в оперативной памяти. +Данные хранятся в побитовом дереве (`trie`), он должен полностью помещаться в оперативной памяти. [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/dicts/external_dicts_dict_layout/) diff --git a/docs/ru/query_language/dicts/external_dicts_dict_sources.md b/docs/ru/query_language/dicts/external_dicts_dict_sources.md index f6ce79cc094..65c98fa18b9 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/ru/query_language/dicts/external_dicts_dict_sources.md @@ -122,12 +122,12 @@ ClickHouse получает от ODBC-драйвера информацию о квотировании и квотирует настройки в запросах к драйверу, поэтому имя таблицы нужно указывать в соответствии с регистром имени таблицы в базе данных. -Если у вас есть проблемы с кодировками при использовании Oracle, ознакомьтесь с соответствущим разделом [FAQ](../../faq/general.md#oracle-odbc-encodings). +Если у вас есть проблемы с кодировками при использовании Oracle, ознакомьтесь с соответствующим разделом [FAQ](../../faq/general.md#oracle-odbc-encodings). ### Выявленная уязвимость в функционировании ODBC словарей !!! attention - При соединении с базой данных через ODBC можно заменить параметр соединения `Servername`. В этом случае, значения `USERNAME` и `PASSWORD` из `odbc.ini` отправляются на удаленный сервер и могут быть скомпроментированы. + При соединении с базой данных через ODBC можно заменить параметр соединения `Servername`. В этом случае, значения `USERNAME` и `PASSWORD` из `odbc.ini` отправляются на удаленный сервер и могут быть скомпрометированы. **Пример небезопасного использования** diff --git a/docs/ru/query_language/dicts/external_dicts_dict_structure.md b/docs/ru/query_language/dicts/external_dicts_dict_structure.md index b553a6fdbea..6423451c1be 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_structure.md +++ b/docs/ru/query_language/dicts/external_dicts_dict_structure.md @@ -56,10 +56,10 @@ ClickHouse поддерживает следующие виды ключей: ### Составной ключ -Ключoм может быть кортеж (`tuple`) из полей произвольных типов. В этом случае [layout](external_dicts_dict_layout.md) должен быть `complex_key_hashed` или `complex_key_cache`. +Ключом может быть кортеж (`tuple`) из полей произвольных типов. В этом случае [layout](external_dicts_dict_layout.md) должен быть `complex_key_hashed` или `complex_key_cache`. !!! tip "Совет" - Cоставной ключ может состоять из одного элемента. Это даёт возможность использовать в качестве ключа, например, строку. + Составной ключ может состоять из одного элемента. Это даёт возможность использовать в качестве ключа, например, строку. Структура ключа задаётся в элементе ``. Поля ключа задаются в том же формате, что и [атрибуты](external_dicts_dict_structure.md) словаря. Пример: diff --git a/docs/ru/query_language/functions/ext_dict_functions.md b/docs/ru/query_language/functions/ext_dict_functions.md index c07f8adf84c..8ccf9a31e91 100644 --- a/docs/ru/query_language/functions/ext_dict_functions.md +++ b/docs/ru/query_language/functions/ext_dict_functions.md @@ -16,13 +16,13 @@ dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr) - `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal). - `attr_name` — имя столбца словаря. [Строковый литерал](../syntax.md#syntax-string-literal). - `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../data_types/int_uint.md) или [Tuple](../../data_types/tuple.md) в зависимости от конфигурации словаря. -- `default_value_expr` — значение, возвращаемое в том случае, когда словарь не содержит строки с заданным ключем `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращающее значение с типом данных, сконфигурированным для атрибута `attr_name`. +- `default_value_expr` — значение, возвращаемое в том случае, когда словарь не содержит строки с заданным ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращающее значение с типом данных, сконфигурированным для атрибута `attr_name`. **Возвращаемое значение** - Значение атрибута, соответствующее ключу `id_expr`, если ClickHouse смог привести это значение к [заданному типу данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes). -- Если ключа, соответствущего `id_expr` в словаре нет, то: +- Если ключа, соответствующего `id_expr` в словаре нет, то: - `dictGet` возвращает содержимое элемента ``, указанного для атрибута в конфигурации словаря. - `dictGetOrDefault` возвращает атрибут `default_value_expr`. @@ -177,7 +177,7 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) - `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal). - `attr_name` — имя столбца словаря. [Строковый литерал](../syntax.md#syntax-string-literal). - `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../data_types/int_uint.md). -- `default_value_expr` — значение, возвращаемое в том случае, когда словарь не содержит строки с заданным ключем `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращающее значение с типом данных, сконфигурированным для атрибута `attr_name`. +- `default_value_expr` — значение, возвращаемое в том случае, когда словарь не содержит строки с заданным ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращающее значение с типом данных, сконфигурированным для атрибута `attr_name`. **Возвращаемое значение** diff --git a/docs/ru/query_language/functions/functions_for_nulls.md b/docs/ru/query_language/functions/functions_for_nulls.md index 65457fa81b7..948b2d41f60 100644 --- a/docs/ru/query_language/functions/functions_for_nulls.md +++ b/docs/ru/query_language/functions/functions_for_nulls.md @@ -1,4 +1,4 @@ -# Функции для работы с Nullable-агрументами +# Функции для работы с Nullable-аргументами ## isNull diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index 55789c93ab4..74ea1e12219 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -80,7 +80,7 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...) - `(x, y)` — координаты точки на плоскости. Тип данных — [Tuple](../../data_types/tuple.md) — кортеж из двух чисел. - `[(a, b), (c, d) ...]` — вершины многоугольника. Тип данных — [Array](../../data_types/array.md). Каждая вершина представлена парой координат `(a, b)`. Вершины следует указывать в порядке обхода по или против часовой стрелки. Минимальное количество вершин — 3. Многоугольник должен быть константным. -- функция поддерживает также многоугольники с дырками (вырезанными кусками). Для этого случая, добавьте многоугольники, описывающие вырезанные куски, дополнительными аргументами функции. Функция не поддерживает неодносвязные многоугольники. +- функция поддерживает также многоугольники с дырками (вырезанными кусками). Для этого случая, добавьте многоугольники, описывающие вырезанные куски, дополнительными аргументами функции. Функция не поддерживает не односвязные многоугольники. **Возвращаемые значения** diff --git a/docs/ru/query_language/functions/hash_functions.md b/docs/ru/query_language/functions/hash_functions.md index e66cee3b344..f7d2237a071 100644 --- a/docs/ru/query_language/functions/hash_functions.md +++ b/docs/ru/query_language/functions/hash_functions.md @@ -122,7 +122,7 @@ SELECT groupBitXor(cityHash64(*)) FROM table ## intHash32 Вычисляет 32-битный хэш-код от целого числа любого типа. -Это сравнительно быстрая некриптографическая хэш-функция среднего качества для чисел. +Это сравнительно быстрая не криптографическая хэш-функция среднего качества для чисел. ## intHash64 @@ -142,7 +142,7 @@ SELECT groupBitXor(cityHash64(*)) FROM table ## URLHash(url\[, N\]) -Быстрая некриптографическая хэш-функция неплохого качества для строки, полученной из URL путём некоторой нормализации. +Быстрая не криптографическая хэш-функция неплохого качества для строки, полученной из URL путём некоторой нормализации. `URLHash(s)` - вычислить хэш от строки без одного завершающего символа `/`, `?` или `#` на конце, если там такой есть. `URLHash(s, N)` - вычислить хэш от строки до N-го уровня в иерархии URL, без одного завершающего символа `/`, `?` или `#` на конце, если там такой есть. Уровни аналогичные URLHierarchy. Функция специфична для Яндекс.Метрики. @@ -270,7 +270,7 @@ SELECT metroHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00: ## jumpConsistentHash Вычисляет JumpConsistentHash от значения типа UInt64. -Принимает аргумент типа UInt64. Возвращает значение типа Int32. +Имеет два параметра: ключ типа UInt64 и количество бакетов. Возвращает значение типа Int32. Дополнительные сведения смотрите по ссылке: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf) ## murmurHash2_32, murmurHash2_64 diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md index c03486379ff..a7e6051e541 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/query_language/functions/other_functions.md @@ -782,3 +782,9 @@ SELECT identity(42) Используется для отладки и тестирования, позволяет "сломать" доступ по индексу, и получить результат и производительность запроса для полного сканирования. [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/other_functions/) + +## modelEvaluate(model_name, ...) {#function-modelevaluate} + +Оценивает внешнюю модель. + +Принимает на вход имя и аргументы модели. Возвращает Float64. diff --git a/docs/ru/query_language/functions/random_functions.md b/docs/ru/query_language/functions/random_functions.md index b29f50dfd4e..1af271bc8de 100644 --- a/docs/ru/query_language/functions/random_functions.md +++ b/docs/ru/query_language/functions/random_functions.md @@ -1,6 +1,6 @@ # Функции генерации псевдослучайных чисел -Используются некриптографические генераторы псевдослучайных чисел. +Используются не криптографические генераторы псевдослучайных чисел. Все функции принимают ноль аргументов или один аргумент. В случае, если передан аргумент - он может быть любого типа, и его значение никак не используется. diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/query_language/functions/string_functions.md index 68bb92add86..193da6f2753 100644 --- a/docs/ru/query_language/functions/string_functions.md +++ b/docs/ru/query_language/functions/string_functions.md @@ -76,7 +76,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') ## format(pattern, s0, s1, ...) -Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` -- упрощенная версия шаблона в языке Python. Шаблон содержит "заменяющие поля", которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранивать двойной скобкой `{{` или `}}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). +Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` -- упрощенная версия шаблона в языке Python. Шаблон содержит "заменяющие поля", которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{` или `}}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). ```sql SELECT format('{1} {0} {1}', 'World', 'Hello') diff --git a/docs/ru/query_language/functions/string_search_functions.md b/docs/ru/query_language/functions/string_search_functions.md index 0f86554b552..1a35947811b 100644 --- a/docs/ru/query_language/functions/string_search_functions.md +++ b/docs/ru/query_language/functions/string_search_functions.md @@ -1,6 +1,6 @@ # Функции поиска в строках -Во всех функциях, поиск регистрозависимый по-умолчанию. Существуют варианты функций для регистронезависимого поиска. +Во всех функциях, поиск регистрозависимый по умолчанию. Существуют варианты функций для регистронезависимого поиска. ## position(haystack, needle) Поиск подстроки `needle` в строке `haystack`. @@ -57,6 +57,10 @@ То же, что и `multiMatchAny`, только возвращает любой индекс подходящего регулярного выражения. +## multiMatchAllIndices(haystack, [pattern1, pattern2, ..., patternn]) + +То же, что и `multiMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке. + ## multiFuzzyMatchAny(haystack, distance, [pattern1, pattern2, ..., patternn]) То же, что и `multiMatchAny`, но возвращает 1 если любой pattern соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция также находится в экспериментальном режиме и может быть очень медленной. За подробностями обращайтесь к [документации hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching). @@ -65,6 +69,10 @@ То же, что и `multiFuzzyMatchAny`, только возвращает любой индекс подходящего регулярного выражения в пределах константного редакционного расстояния. +## multiFuzzyMatchAllIndices(haystack, distance, [pattern1, pattern2, ..., patternn]) + +То же, что и `multiFuzzyMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке в пределах константного редакционного расстояния. + !!! note "Примечание" `multiFuzzyMatch*` функции не поддерживают UTF-8 закодированные регулярные выражения, и такие выражения рассматриваются как байтовые из-за ограничения hyperscan. @@ -95,7 +103,7 @@ ## ngramDistance(haystack, needle) -Вычисление 4-граммного расстояния между `haystack` и `needle`: считается симметрическая разность между двумя мультимножествами 4-грамм и нормализается на сумму их мощностей. Возвращает число float от 0 до 1 -- чем ближе к нулю, тем больше строки похожи друг на друга. Если константный `needle` или `haystack` больше чем 32КБ, кидается исключение. Если некоторые строки из неконстантного `haystack` или `needle` больше 32КБ, расстояние всегда равно единице. +Вычисление 4-граммного расстояния между `haystack` и `needle`: считается симметрическая разность между двумя мультимножествами 4-грамм и нормализуется на сумму их мощностей. Возвращает число float от 0 до 1 -- чем ближе к нулю, тем больше строки похожи друг на друга. Если константный `needle` или `haystack` больше чем 32КБ, кидается исключение. Если некоторые строки из неконстантного `haystack` или `needle` больше 32КБ, расстояние всегда равно единице. Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8`. diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index 61854066f32..49e2c0692ef 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -527,7 +527,7 @@ FROM Если запрос содержит секцию `WHERE`, ClickHouse пытается пробросить фильтры из этой секции в промежуточный `JOIN`. Если он не может пробросить фильтр в каждый промежуточный `JOIN`, ClickHouse применяет фильтры после того, как все `JOIN` будут выполнены. -Для создания запросов мы рекомендуем использоват синтаксис `JOIN ON` или `JOIN USING`. Например: +Для создания запросов мы рекомендуем использовать синтаксис `JOIN ON` или `JOIN USING`. Например: ```sql SELECT * FROM t1 JOIN t2 ON t1.a = t2.a JOIN t3 ON t1.a = t3.a @@ -1082,7 +1082,7 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c В отличие от MySQL, файл создаётся на стороне клиента. Если файл с таким именем уже существует, это приведёт к ошибке. Функциональность доступна в клиенте командной строки и clickhouse-local (попытка выполнить запрос с INTO OUTFILE через HTTP интерфейс приведёт к ошибке). -Формат вывода по умолчанию - TabSeparated, как и в неинтерактивном режиме клиента командной строки. +Формат вывода по умолчанию - TabSeparated, как и в не интерактивном режиме клиента командной строки. ### Секция FORMAT @@ -1289,10 +1289,10 @@ SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL Это гораздо более оптимально, чем при использовании обычного IN. Но при этом, следует помнить о нескольких вещах: 1. При создании временной таблицы данные не уникализируются. Чтобы уменьшить объём передаваемых по сети данных, укажите в подзапросе DISTINCT (для обычного IN-а этого делать не нужно). -2. Временная таблица будет передана на все удалённые серверы. Передача не учитывает топологию сети. Например, если 10 удалённых серверов расположены в удалённом относительно сервера-инициатора запроса датацентре, то по каналу в удалённый датацентр данные будет переданы 10 раз. Старайтесь не использовать большие множества при использовании GLOBAL IN. +2. Временная таблица будет передана на все удалённые серверы. Передача не учитывает топологию сети. Например, если 10 удалённых серверов расположены в удалённом относительно сервера-инициатора запроса дата-центре, то по каналу в удалённый дата-центр данные будет переданы 10 раз. Старайтесь не использовать большие множества при использовании GLOBAL IN. 3. При передаче данных на удалённые серверы не настраивается ограничение использования сетевой полосы. Вы можете перегрузить сеть. 4. Старайтесь распределять данные по серверам так, чтобы в GLOBAL IN-ах не было частой необходимости. -5. Если в GLOBAL IN есть частая необходимость, то спланируйте размещение кластера ClickHouse таким образом, чтобы в каждом датацентре была хотя бы одна реплика каждого шарда, и среди них была быстрая сеть - чтобы запрос целиком можно было бы выполнить, передавая данные в пределах одного датацентра. +5. Если в GLOBAL IN есть частая необходимость, то спланируйте размещение кластера ClickHouse таким образом, чтобы в каждом дата-центре была хотя бы одна реплика каждого шарда, и среди них была быстрая сеть - чтобы запрос целиком можно было бы выполнить, передавая данные в пределах одного дата-центра. В секции `GLOBAL IN` также имеет смысл указывать локальную таблицу - в случае, если эта локальная таблица есть только на сервере-инициаторе запроса, и вы хотите воспользоваться данными из неё на удалённых серверах. diff --git a/docs/ru/query_language/system.md b/docs/ru/query_language/system.md index 998cf7fc682..474574b0b19 100644 --- a/docs/ru/query_language/system.md +++ b/docs/ru/query_language/system.md @@ -56,7 +56,7 @@ SELECT name, status FROM system.dictionaries; ## Управление распределёнными таблицами {#query_language-system-distributed} -ClickHouse может оперировать [распределёнными](../operations/table_engines/distributed.md) таблицами. Когда пользователь вставляет данные в эти таблицы, ClickHouse сначала формирует очередь из данных, которые должны быть отправлены на узлы кластера, а затем асинхронно отправляет подготовленные данные. Вы пожете управлять очередью с помощью запросов [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) и [FLUSH DISTRIBUTED](#query_language-system-flush-distributed). Также есть возможность синхронно вставлять распределенные данные с помощью настройки `insert_distributed_sync`. +ClickHouse может оперировать [распределёнными](../operations/table_engines/distributed.md) таблицами. Когда пользователь вставляет данные в эти таблицы, ClickHouse сначала формирует очередь из данных, которые должны быть отправлены на узлы кластера, а затем асинхронно отправляет подготовленные данные. Вы можете управлять очередью с помощью запросов [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) и [FLUSH DISTRIBUTED](#query_language-system-flush-distributed). Также есть возможность синхронно вставлять распределенные данные с помощью настройки `insert_distributed_sync`. ### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends} diff --git a/docs/ru/query_language/table_functions/jdbc.md b/docs/ru/query_language/table_functions/jdbc.md index 6b18edd13df..50c3bf67292 100644 --- a/docs/ru/query_language/table_functions/jdbc.md +++ b/docs/ru/query_language/table_functions/jdbc.md @@ -3,7 +3,7 @@ `jdbc(jdbc_connection_uri, schema, table)` - возвращает таблицу, соединение с которой происходит через JDBC-драйвер. -Для работы этой табличной функциии требуется отдельно запускать приложение clickhouse-jdbc-bridge. +Для работы этой табличной функции требуется отдельно запускать приложение clickhouse-jdbc-bridge. Данная функция поддерживает Nullable типы (на основании DDL таблицы к которой происходит запрос). diff --git a/docs/ru/query_language/table_functions/remote.md b/docs/ru/query_language/table_functions/remote.md index a19b6ce5cd5..6bf86502b47 100644 --- a/docs/ru/query_language/table_functions/remote.md +++ b/docs/ru/query_language/table_functions/remote.md @@ -72,6 +72,6 @@ example01-{01..02}-{1|2} Если пользователь не задан,то используется `default`. Если пароль не задан, то используется пустой пароль. -`remoteSecure` - аналогично функции `remote`, но с соединением по шифрованому каналу. Порт по умолчанию - `tcp_port_secure` из конфига или 9440. +`remoteSecure` - аналогично функции `remote`, но с соединением по шифрованному каналу. Порт по умолчанию - `tcp_port_secure` из конфига или 9440. [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/table_functions/remote/) diff --git a/docs/ru/security_changelog.md b/docs/ru/security_changelog.md index 3ba1d1af25c..77408e7ba30 100644 --- a/docs/ru/security_changelog.md +++ b/docs/ru/security_changelog.md @@ -30,7 +30,7 @@ unixODBC позволял указать путь для подключения ### CVE-2018-14670 -Некоррректная конфигурация в deb пакете могла привести к неавторизованному доступу к базе данных. +Некорректная конфигурация в deb пакете могла привести к неавторизованному доступу к базе данных. Обнаружено благодаря: the UK's National Cyber Security Centre (NCSC) [Оригинальная статья](https://clickhouse.yandex/docs/ru/security_changelog/) diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 34d2317ffad..46cef8feeeb 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -204,6 +204,10 @@ nav: - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md' +- 'Guides': + - 'Overview': 'guides/index.md' + - 'Applying CatBoost Models': 'guides/apply_catboost_model.md' + - 'Development': - 'hidden': 'development/index.md' - 'Overview of ClickHouse Architecture': 'development/architecture.md' diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index fb412f45c9d..afbe8709a47 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -64,6 +64,7 @@ nav: - 'Database Engines': - 'Introduction': 'database_engines/index.md' - 'MySQL': 'database_engines/mysql.md' + - 'Lazy': 'database_engines/lazy.md' - 'Table Engines': - 'Introduction': 'operations/table_engines/index.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 98c7b27a746..ce3f87e92fc 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -142,7 +142,7 @@ nav: - 'Функции для реализации оператора IN.': 'query_language/functions/in_functions.md' - 'Функция arrayJoin': 'query_language/functions/array_join.md' - 'Функции для работы с географическими координатами': 'query_language/functions/geo.md' - - 'Функции c Nullable агрументами': 'query_language/functions/functions_for_nulls.md' + - 'Функции c Nullable аргументами': 'query_language/functions/functions_for_nulls.md' - 'Функции машинного обучения': 'query_language/functions/machine_learning_functions.md' - 'Прочие функции': 'query_language/functions/other_functions.md' - 'Агрегатные функции': @@ -203,6 +203,10 @@ nav: - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md' +- 'Руководства': + - 'Обзор': 'guides/index.md' + - 'Применение CatBoost моделей': 'guides/apply_catboost_model.md' + - 'F.A.Q.': - 'Общие вопросы': 'faq/general.md' diff --git a/libs/libcommon/cmake/find_jemalloc.cmake b/libs/libcommon/cmake/find_jemalloc.cmake index ec0841d1535..6508f1b675f 100644 --- a/libs/libcommon/cmake/find_jemalloc.cmake +++ b/libs/libcommon/cmake/find_jemalloc.cmake @@ -1,11 +1,11 @@ -if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE) +if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_32 AND NOT ARCH_PPC64LE) set(ENABLE_JEMALLOC_DEFAULT ${ENABLE_LIBRARIES}) else () set(ENABLE_JEMALLOC_DEFAULT 0) endif () option (ENABLE_JEMALLOC "Set to TRUE to use jemalloc" ${ENABLE_JEMALLOC_DEFAULT}) -if (OS_LINUX AND NOT ARCH_ARM) +if (OS_LINUX) option (USE_INTERNAL_JEMALLOC_LIBRARY "Set to FALSE to use system jemalloc library instead of bundled" ${NOT_UNBUNDLED}) else() option (USE_INTERNAL_JEMALLOC_LIBRARY "Set to FALSE to use system jemalloc library instead of bundled" OFF) @@ -22,7 +22,7 @@ if (ENABLE_JEMALLOC) find_package (JeMalloc) endif () - if ((NOT JEMALLOC_LIBRARIES OR NOT JEMALLOC_INCLUDE_DIR) AND NOT MISSING_INTERNAL_JEMALLOC_LIBRARY AND NOT ARCH_ARM) + if ((NOT JEMALLOC_LIBRARIES OR NOT JEMALLOC_INCLUDE_DIR) AND NOT MISSING_INTERNAL_JEMALLOC_LIBRARY ) set (JEMALLOC_LIBRARIES "jemalloc") set (JEMALLOC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc-cmake/include" "${ClickHouse_SOURCE_DIR}/contrib/jemalloc-cmake/include_linux_x86_64") set (USE_INTERNAL_JEMALLOC_LIBRARY 1) diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 9ceec6a8dee..2dbec5fa772 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -12,7 +12,15 @@ if (GLIBC_COMPATIBILITY) add_headers_and_sources(glibc_compatibility .) add_headers_and_sources(glibc_compatibility musl) - list(APPEND glibc_compatibility_sources musl/syscall.s musl/longjmp.s) + if (ARCH_ARM) + list (APPEND glibc_compatibility_sources musl/aarch64/syscall.s musl/aarch64/longjmp.s) + set (musl_arch_include_dir musl/aarch64) + elseif (ARCH_AMD64) + list (APPEND glibc_compatibility_sources musl/x86_64/syscall.s musl/x86_64/longjmp.s) + set (musl_arch_include_dir musl/x86_64) + else () + message (FATAL_ERROR "glibc_compatibility can only be used on x86_64 or aarch64.") + endif () list(REMOVE_ITEM glibc_compatibility_sources musl/getentropy.c) if(HAVE_SYS_RANDOM_H) @@ -25,7 +33,7 @@ if (GLIBC_COMPATIBILITY) add_library(glibc-compatibility STATIC ${glibc_compatibility_sources}) - target_include_directories(glibc-compatibility PRIVATE libcxxabi) + target_include_directories(glibc-compatibility PRIVATE libcxxabi ${musl_arch_include_dir}) if (NOT USE_STATIC_LIBRARIES AND NOT MAKE_STATIC_LIBRARIES) target_compile_options(glibc-compatibility PRIVATE -fPIC) diff --git a/libs/libglibc-compatibility/musl/aarch64/atomic_arch.h b/libs/libglibc-compatibility/musl/aarch64/atomic_arch.h new file mode 100644 index 00000000000..40fefc25bb1 --- /dev/null +++ b/libs/libglibc-compatibility/musl/aarch64/atomic_arch.h @@ -0,0 +1,82 @@ +#define a_ll a_ll +static inline int a_ll(volatile int *p) +{ + int v; + __asm__ __volatile__ ("ldaxr %w0,%1" : "=r"(v) : "Q"(*p)); + return v; +} + +#define a_sc a_sc +static inline int a_sc(volatile int *p, int v) +{ + int r; + __asm__ __volatile__ ("stlxr %w0,%w2,%1" : "=&r"(r), "=Q"(*p) : "r"(v) : "memory"); + return !r; +} + +#define a_barrier a_barrier +static inline void a_barrier() +{ + __asm__ __volatile__ ("dmb ish" : : : "memory"); +} + +#define a_cas a_cas +static inline int a_cas(volatile int *p, int t, int s) +{ + int old; + do { + old = a_ll(p); + if (old != t) { + a_barrier(); + break; + } + } while (!a_sc(p, s)); + return old; +} + +#define a_ll_p a_ll_p +static inline void *a_ll_p(volatile void *p) +{ + void *v; + __asm__ __volatile__ ("ldaxr %0, %1" : "=r"(v) : "Q"(*(void *volatile *)p)); + return v; +} + +#define a_sc_p a_sc_p +static inline int a_sc_p(volatile int *p, void *v) +{ + int r; + __asm__ __volatile__ ("stlxr %w0,%2,%1" : "=&r"(r), "=Q"(*(void *volatile *)p) : "r"(v) : "memory"); + return !r; +} + +#define a_cas_p a_cas_p +static inline void *a_cas_p(volatile void *p, void *t, void *s) +{ + void *old; + do { + old = a_ll_p(p); + if (old != t) { + a_barrier(); + break; + } + } while (!a_sc_p(p, s)); + return old; +} + +#define a_ctz_64 a_ctz_64 +static inline int a_ctz_64(uint64_t x) +{ + __asm__( + " rbit %0, %1\n" + " clz %0, %0\n" + : "=r"(x) : "r"(x)); + return x; +} + +#define a_clz_64 a_clz_64 +static inline int a_clz_64(uint64_t x) +{ + __asm__("clz %0, %1" : "=r"(x) : "r"(x)); + return x; +} diff --git a/libs/libglibc-compatibility/musl/aarch64/longjmp.s b/libs/libglibc-compatibility/musl/aarch64/longjmp.s new file mode 100644 index 00000000000..19f04e7eebb --- /dev/null +++ b/libs/libglibc-compatibility/musl/aarch64/longjmp.s @@ -0,0 +1,21 @@ +.global musl_glibc_longjmp +.type musl_glibc_longjmp,@function +musl_glibc_longjmp: + // IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers + ldp x19, x20, [x0,#0] + ldp x21, x22, [x0,#16] + ldp x23, x24, [x0,#32] + ldp x25, x26, [x0,#48] + ldp x27, x28, [x0,#64] + ldp x29, x30, [x0,#80] + ldr x2, [x0,#104] + mov sp, x2 + ldp d8 , d9, [x0,#112] + ldp d10, d11, [x0,#128] + ldp d12, d13, [x0,#144] + ldp d14, d15, [x0,#160] + + mov x0, x1 + cbnz x1, 1f + mov x0, #1 +1: br x30 diff --git a/libs/libglibc-compatibility/musl/aarch64/syscall.s b/libs/libglibc-compatibility/musl/aarch64/syscall.s new file mode 100644 index 00000000000..845986bf787 --- /dev/null +++ b/libs/libglibc-compatibility/musl/aarch64/syscall.s @@ -0,0 +1,14 @@ +.global __syscall +.hidden __syscall +.type __syscall,%function +__syscall: + uxtw x8,w0 + mov x0,x1 + mov x1,x2 + mov x2,x3 + mov x3,x4 + mov x4,x5 + mov x5,x6 + mov x6,x7 + svc 0 + ret diff --git a/libs/libglibc-compatibility/musl/atomic_arch.h b/libs/libglibc-compatibility/musl/x86_64/atomic_arch.h similarity index 100% rename from libs/libglibc-compatibility/musl/atomic_arch.h rename to libs/libglibc-compatibility/musl/x86_64/atomic_arch.h diff --git a/libs/libglibc-compatibility/musl/longjmp.s b/libs/libglibc-compatibility/musl/x86_64/longjmp.s similarity index 100% rename from libs/libglibc-compatibility/musl/longjmp.s rename to libs/libglibc-compatibility/musl/x86_64/longjmp.s diff --git a/libs/libglibc-compatibility/musl/syscall.s b/libs/libglibc-compatibility/musl/x86_64/syscall.s similarity index 100% rename from libs/libglibc-compatibility/musl/syscall.s rename to libs/libglibc-compatibility/musl/x86_64/syscall.s diff --git a/utils/iotest/iotest.cpp b/utils/iotest/iotest.cpp index fac48aae00d..8c17163778e 100644 --- a/utils/iotest/iotest.cpp +++ b/utils/iotest/iotest.cpp @@ -150,7 +150,7 @@ int mainImpl(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < threads; ++i) - pool.schedule(std::bind(thread, fd, mode, min_offset, max_offset, block_size, count)); + pool.scheduleOrThrowOnError(std::bind(thread, fd, mode, min_offset, max_offset, block_size, count)); pool.wait(); fsync(fd); diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp index 8fb7459fd3b..038bc600ec6 100644 --- a/utils/iotest/iotest_aio.cpp +++ b/utils/iotest/iotest_aio.cpp @@ -175,7 +175,7 @@ int mainImpl(int argc, char ** argv) Stopwatch watch; for (size_t i = 0; i < threads_count; ++i) - pool.schedule(std::bind(thread, fd, mode, min_offset, max_offset, block_size, buffers_count, count)); + pool.scheduleOrThrowOnError(std::bind(thread, fd, mode, min_offset, max_offset, block_size, buffers_count, count)); pool.wait(); watch.stop();