diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/10_question.md similarity index 93% rename from .github/ISSUE_TEMPLATE/question.md rename to .github/ISSUE_TEMPLATE/10_question.md index a5015de8217..6e23fbdc605 100644 --- a/.github/ISSUE_TEMPLATE/question.md +++ b/.github/ISSUE_TEMPLATE/10_question.md @@ -1,6 +1,6 @@ --- name: Question -about: Ask question about ClickHouse +about: Ask a question about ClickHouse title: '' labels: question assignees: '' diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/20_feature-request.md similarity index 100% rename from .github/ISSUE_TEMPLATE/feature-request.md rename to .github/ISSUE_TEMPLATE/20_feature-request.md diff --git a/.github/ISSUE_TEMPLATE/unexpected-behaviour.md b/.github/ISSUE_TEMPLATE/30_unexpected-behaviour.md similarity index 94% rename from .github/ISSUE_TEMPLATE/unexpected-behaviour.md rename to .github/ISSUE_TEMPLATE/30_unexpected-behaviour.md index 27ab217ca33..3630d95ba33 100644 --- a/.github/ISSUE_TEMPLATE/unexpected-behaviour.md +++ b/.github/ISSUE_TEMPLATE/30_unexpected-behaviour.md @@ -1,6 +1,6 @@ --- name: Unexpected behaviour -about: Create a report to help us improve ClickHouse +about: Some feature is working in non-obvious way title: '' labels: unexpected behaviour assignees: '' diff --git a/.github/ISSUE_TEMPLATE/35_incomplete_implementation.md b/.github/ISSUE_TEMPLATE/35_incomplete_implementation.md new file mode 100644 index 00000000000..6a014ce3c29 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/35_incomplete_implementation.md @@ -0,0 +1,30 @@ +--- +name: Incomplete implementation +about: Implementation of existing feature is not finished +title: '' +labels: unfinished code +assignees: '' + +--- + +(you don't have to strictly follow this form) + +**Describe the unexpected behaviour** +A clear and concise description of what works not as it is supposed to. + +**How to reproduce** +* Which ClickHouse server version to use +* Which interface to use, if matters +* Non-default settings, if any +* `CREATE TABLE` statements for all tables involved +* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary +* Queries to run that lead to unexpected result + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Error message and/or stacktrace** +If applicable, add screenshots to help explain your problem. + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/40_bug-report.md similarity index 100% rename from .github/ISSUE_TEMPLATE/bug-report.md rename to .github/ISSUE_TEMPLATE/40_bug-report.md diff --git a/.github/ISSUE_TEMPLATE/usability-issue.md b/.github/ISSUE_TEMPLATE/45_usability-issue.md similarity index 93% rename from .github/ISSUE_TEMPLATE/usability-issue.md rename to .github/ISSUE_TEMPLATE/45_usability-issue.md index 6a084a72619..b03b11606c1 100644 --- a/.github/ISSUE_TEMPLATE/usability-issue.md +++ b/.github/ISSUE_TEMPLATE/45_usability-issue.md @@ -1,6 +1,6 @@ --- name: Usability issue -about: Create a report to help us improve ClickHouse +about: Report something can be made more convenient to use title: '' labels: usability assignees: '' diff --git a/.github/ISSUE_TEMPLATE/build-issue.md b/.github/ISSUE_TEMPLATE/50_build-issue.md similarity index 100% rename from .github/ISSUE_TEMPLATE/build-issue.md rename to .github/ISSUE_TEMPLATE/50_build-issue.md diff --git a/.github/ISSUE_TEMPLATE/documentation-issue.md b/.github/ISSUE_TEMPLATE/60_documentation-issue.md similarity index 100% rename from .github/ISSUE_TEMPLATE/documentation-issue.md rename to .github/ISSUE_TEMPLATE/60_documentation-issue.md diff --git a/.github/ISSUE_TEMPLATE/performance-issue.md b/.github/ISSUE_TEMPLATE/70_performance-issue.md similarity index 100% rename from .github/ISSUE_TEMPLATE/performance-issue.md rename to .github/ISSUE_TEMPLATE/70_performance-issue.md diff --git a/.github/ISSUE_TEMPLATE/backward-compatibility.md b/.github/ISSUE_TEMPLATE/80_backward-compatibility.md similarity index 90% rename from .github/ISSUE_TEMPLATE/backward-compatibility.md rename to .github/ISSUE_TEMPLATE/80_backward-compatibility.md index 8f87197e73d..a13e9508f70 100644 --- a/.github/ISSUE_TEMPLATE/backward-compatibility.md +++ b/.github/ISSUE_TEMPLATE/80_backward-compatibility.md @@ -1,6 +1,6 @@ --- name: Backward compatibility issue -about: Create a report to help us improve ClickHouse +about: Report the case when the behaviour of a new version can break existing use cases title: '' labels: backward compatibility assignees: '' diff --git a/.github/ISSUE_TEMPLATE/90_fuzzing-report.md b/.github/ISSUE_TEMPLATE/90_fuzzing-report.md new file mode 100644 index 00000000000..1d9a8a75d28 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/90_fuzzing-report.md @@ -0,0 +1,16 @@ +--- +name: Assertion found via fuzzing +about: Potential issue has been found via Fuzzer or Stress tests +title: '' +labels: fuzz +assignees: '' + +--- + +(you don't have to strictly follow this form) + +**Describe the bug** +A link to the report + +**How to reproduce** +Try to reproduce the report and copy the tables and queries involved. diff --git a/CMakeLists.txt b/CMakeLists.txt index 7716fe82677..853b2df7aca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,6 +214,19 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") endif () endif() +# Create BuildID when using lld. For other linkers it is created by default. +if (LINKER_NAME MATCHES "lld$") + # SHA1 is not cryptographically secure but it is the best what lld is offering. + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1") +endif () + +# Add a section with the hash of the compiled machine code for integrity checks. +# Only for official builds, because adding a section can be time consuming (rewrite of several GB). +# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary) +if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) + set (USE_BINARY_HASH 1) +endif () + cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd diff --git a/PreLoad.cmake b/PreLoad.cmake index d66bcd68a10..0a25a55e7bf 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -12,7 +12,7 @@ # https://youtrack.jetbrains.com/issue/CPP-2659 # https://youtrack.jetbrains.com/issue/CPP-870 -if (NOT DEFINED ENV{CLION_IDE}) +if (NOT DEFINED ENV{CLION_IDE} AND NOT DEFINED ENV{XCODE_IDE}) find_program(NINJA_PATH ninja) if (NINJA_PATH) set(CMAKE_GENERATOR "Ninja" CACHE INTERNAL "" FORCE) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 39506186732..830e7857a1f 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -56,6 +56,9 @@ #include #include #include +#include +#include +#include #if !defined(ARCADIA_BUILD) # include @@ -80,16 +83,6 @@ static void call_default_signal_handler(int sig) raise(sig); } -const char * msan_strsignal(int sig) -{ - // Apparently strsignal is not instrumented by MemorySanitizer, so we - // have to unpoison it to avoid msan reports inside fmt library when we - // print it. - const char * signal_name = strsignal(sig); - __msan_unpoison_string(signal_name); - return signal_name; -} - static constexpr size_t max_query_id_size = 127; static const size_t signal_pipe_buf_size = @@ -294,13 +287,13 @@ private: { LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, - thread_num, msan_strsignal(sig), sig); + thread_num, strsignal(sig), sig); } else { LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})", VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, - thread_num, query_id, msan_strsignal(sig), sig); + thread_num, query_id, strsignal(sig), sig); } String error_message; @@ -328,6 +321,32 @@ private: /// Write symbolized stack trace line by line for better grep-ability. stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); +#if defined(__linux__) + /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace. + String calculated_binary_hash = getHashOfLoadedBinaryHex(); + if (daemon.stored_binary_hash.empty()) + { + LOG_FATAL(log, "Calculated checksum of the binary: {}." + " There is no information about the reference checksum.", calculated_binary_hash); + } + else if (calculated_binary_hash == daemon.stored_binary_hash) + { + LOG_FATAL(log, "Checksum of the binary: {}, integrity check passed.", calculated_binary_hash); + } + else + { + LOG_FATAL(log, "Calculated checksum of the ClickHouse binary ({0}) does not correspond" + " to the reference checksum stored in the binary ({1})." + " It may indicate one of the following:" + " - the file was changed just after startup;" + " - the file is damaged on disk due to faulty hardware;" + " - the loaded executable is damaged in memory due to faulty hardware;" + " - the file was intentionally modified;" + " - logical error in code." + , calculated_binary_hash, daemon.stored_binary_hash); + } +#endif + /// Write crash to system.crash_log table if available. if (collectCrashLog) collectCrashLog(sig, thread_num, query_id, stack_trace); @@ -481,8 +500,9 @@ void BaseDaemon::kill() { dumpCoverageReportIfPossible(); pid_file.reset(); - if (::raise(SIGKILL) != 0) - throw Poco::SystemException("cannot kill process"); + /// Exit with the same code as it is usually set by shell when process is terminated by SIGKILL. + /// It's better than doing 'raise' or 'kill', because they have no effect for 'init' process (with pid = 0, usually in Docker). + _exit(128 + SIGKILL); } std::string BaseDaemon::getDefaultCorePath() const @@ -787,6 +807,13 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() #else build_id_info = "no build id"; #endif + +#if defined(__linux__) + std::string executable_path = getExecutablePath(); + + if (!executable_path.empty()) + stored_binary_hash = DB::Elf(executable_path).getBinaryHash(); +#endif } void BaseDaemon::logRevision() const @@ -846,13 +873,13 @@ void BaseDaemon::handleSignal(int signal_id) onInterruptSignals(signal_id); } else - throw DB::Exception(std::string("Unsupported signal: ") + msan_strsignal(signal_id), 0); + throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0); } void BaseDaemon::onInterruptSignals(int signal_id) { is_cancelled = true; - LOG_INFO(&logger(), "Received termination signal ({})", msan_strsignal(signal_id)); + LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id)); if (sigint_signals_counter >= 2) { @@ -998,3 +1025,9 @@ void BaseDaemon::setupWatchdog() #endif } } + + +String BaseDaemon::getStoredBinaryHash() const +{ + return stored_binary_hash; +} diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 090d4997606..42d94629ae9 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -60,7 +60,7 @@ public: static void terminate(); /// Forceful shutdown - void kill(); + [[noreturn]] void kill(); /// Cancellation request has been received. bool isCancelled() const @@ -121,6 +121,9 @@ public: /// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly). void shouldSetupWatchdog(char * argv0_); + /// Hash of the binary for integrity checks. + String getStoredBinaryHash() const; + protected: virtual void logRevision() const; @@ -168,6 +171,7 @@ protected: Poco::Util::AbstractConfiguration * last_configuration = nullptr; String build_id_info; + String stored_binary_hash; std::vector handled_signals; diff --git a/base/glibc-compatibility/musl/lgamma.c b/base/glibc-compatibility/musl/lgamma.c index fbbe1fedc92..fb9d105d0fa 100644 --- a/base/glibc-compatibility/musl/lgamma.c +++ b/base/glibc-compatibility/musl/lgamma.c @@ -258,11 +258,3 @@ double lgamma_r(double x, int *signgamp) r = nadj - r; return r; } - - -int signgam; - -double lgamma(double x) -{ - return lgamma_r(x, &signgam); -} diff --git a/base/glibc-compatibility/musl/lgammal.c b/base/glibc-compatibility/musl/lgammal.c index 19db11fba1a..b158748ce1f 100644 --- a/base/glibc-compatibility/musl/lgammal.c +++ b/base/glibc-compatibility/musl/lgammal.c @@ -328,12 +328,3 @@ long double lgammal_r(long double x, int *sg) return lgamma_r(x, sg); } #endif - - -int signgam_lgammal; - -long double lgammal(long double x) -{ - return lgammal_r(x, &signgam_lgammal); -} - diff --git a/base/glibc-compatibility/musl/strsignal.c b/base/glibc-compatibility/musl/strsignal.c new file mode 100644 index 00000000000..fee894e8550 --- /dev/null +++ b/base/glibc-compatibility/musl/strsignal.c @@ -0,0 +1,125 @@ +#include +#include + +#if (SIGHUP == 1) && (SIGINT == 2) && (SIGQUIT == 3) && (SIGILL == 4) \ + && (SIGTRAP == 5) && (SIGABRT == 6) && (SIGBUS == 7) && (SIGFPE == 8) \ + && (SIGKILL == 9) && (SIGUSR1 == 10) && (SIGSEGV == 11) && (SIGUSR2 == 12) \ + && (SIGPIPE == 13) && (SIGALRM == 14) && (SIGTERM == 15) && (SIGSTKFLT == 16) \ + && (SIGCHLD == 17) && (SIGCONT == 18) && (SIGSTOP == 19) && (SIGTSTP == 20) \ + && (SIGTTIN == 21) && (SIGTTOU == 22) && (SIGURG == 23) && (SIGXCPU == 24) \ + && (SIGXFSZ == 25) && (SIGVTALRM == 26) && (SIGPROF == 27) && (SIGWINCH == 28) \ + && (SIGPOLL == 29) && (SIGPWR == 30) && (SIGSYS == 31) + +#define sigmap(x) x + +#else + +static const char map[] = { + [SIGHUP] = 1, + [SIGINT] = 2, + [SIGQUIT] = 3, + [SIGILL] = 4, + [SIGTRAP] = 5, + [SIGABRT] = 6, + [SIGBUS] = 7, + [SIGFPE] = 8, + [SIGKILL] = 9, + [SIGUSR1] = 10, + [SIGSEGV] = 11, + [SIGUSR2] = 12, + [SIGPIPE] = 13, + [SIGALRM] = 14, + [SIGTERM] = 15, +#if defined(SIGSTKFLT) + [SIGSTKFLT] = 16, +#elif defined(SIGEMT) + [SIGEMT] = 16, +#endif + [SIGCHLD] = 17, + [SIGCONT] = 18, + [SIGSTOP] = 19, + [SIGTSTP] = 20, + [SIGTTIN] = 21, + [SIGTTOU] = 22, + [SIGURG] = 23, + [SIGXCPU] = 24, + [SIGXFSZ] = 25, + [SIGVTALRM] = 26, + [SIGPROF] = 27, + [SIGWINCH] = 28, + [SIGPOLL] = 29, + [SIGPWR] = 30, + [SIGSYS] = 31 +}; + +#define sigmap(x) ((x) >= sizeof map ? (x) : map[(x)]) + +#endif + +static const char strings[] = + "Unknown signal\0" + "Hangup\0" + "Interrupt\0" + "Quit\0" + "Illegal instruction\0" + "Trace/breakpoint trap\0" + "Aborted\0" + "Bus error\0" + "Arithmetic exception\0" + "Killed\0" + "User defined signal 1\0" + "Segmentation fault\0" + "User defined signal 2\0" + "Broken pipe\0" + "Alarm clock\0" + "Terminated\0" +#if defined(SIGSTKFLT) + "Stack fault\0" +#elif defined(SIGEMT) + "Emulator trap\0" +#else + "Unknown signal\0" +#endif + "Child process status\0" + "Continued\0" + "Stopped (signal)\0" + "Stopped\0" + "Stopped (tty input)\0" + "Stopped (tty output)\0" + "Urgent I/O condition\0" + "CPU time limit exceeded\0" + "File size limit exceeded\0" + "Virtual timer expired\0" + "Profiling timer expired\0" + "Window changed\0" + "I/O possible\0" + "Power failure\0" + "Bad system call\0" + "RT32" + "\0RT33\0RT34\0RT35\0RT36\0RT37\0RT38\0RT39\0RT40" + "\0RT41\0RT42\0RT43\0RT44\0RT45\0RT46\0RT47\0RT48" + "\0RT49\0RT50\0RT51\0RT52\0RT53\0RT54\0RT55\0RT56" + "\0RT57\0RT58\0RT59\0RT60\0RT61\0RT62\0RT63\0RT64" +#if _NSIG > 65 + "\0RT65\0RT66\0RT67\0RT68\0RT69\0RT70\0RT71\0RT72" + "\0RT73\0RT74\0RT75\0RT76\0RT77\0RT78\0RT79\0RT80" + "\0RT81\0RT82\0RT83\0RT84\0RT85\0RT86\0RT87\0RT88" + "\0RT89\0RT90\0RT91\0RT92\0RT93\0RT94\0RT95\0RT96" + "\0RT97\0RT98\0RT99\0RT100\0RT101\0RT102\0RT103\0RT104" + "\0RT105\0RT106\0RT107\0RT108\0RT109\0RT110\0RT111\0RT112" + "\0RT113\0RT114\0RT115\0RT116\0RT117\0RT118\0RT119\0RT120" + "\0RT121\0RT122\0RT123\0RT124\0RT125\0RT126\0RT127\0RT128" +#endif + ""; + +char *strsignal(int signum) +{ + const char *s = strings; + + signum = sigmap(signum); + if (signum - 1U >= _NSIG-1) signum = 0; + + for (; signum--; s++) for (; *s; s++); + + return (char *)s; +} diff --git a/base/harmful/CMakeLists.txt b/base/harmful/CMakeLists.txt new file mode 100644 index 00000000000..399f6ecc625 --- /dev/null +++ b/base/harmful/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(harmful harmful.c) +install(TARGETS harmful EXPORT global ARCHIVE DESTINATION lib) diff --git a/base/harmful/README.md b/base/harmful/README.md new file mode 100644 index 00000000000..7e56c1f62e4 --- /dev/null +++ b/base/harmful/README.md @@ -0,0 +1 @@ +A library that traps whenever harmful functions from libc are called. diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c new file mode 100644 index 00000000000..df625a3e4d6 --- /dev/null +++ b/base/harmful/harmful.c @@ -0,0 +1,244 @@ +/** This library provides runtime instrumentation (hardening) + * that ensures no "harmful" functions from libc are called + * (by terminating the program immediately). + */ + +/// It is only enabled in debug build (its intended use is for CI checks). +#if !defined(NDEBUG) + +#if defined(__clang__) + #pragma clang diagnostic ignored "-Wincompatible-library-redeclaration" +#else + #pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch" +#endif + +/// We cannot use libc headers here. +long write(int, const void *, unsigned long); +#define TRAP(func) void func() { write(2, #func "\n", __builtin_strlen(#func) + 1); __builtin_trap(); } + +/// Trap all non thread-safe functions: +/// nm -D /lib/x86_64-linux-gnu/{libc.so.6,libdl.so.2,libm.so.6,libpthread.so.0,librt.so.1,libnss_dns.so.2,libresolv.so.2} | grep -P '_r@?$' | awk '{ print $3 }' | sed -r -e 's/_r//' | grep -vP '^_' + +/// See also https://reviews.llvm.org/D90944 + +/// You can edit this list and even comment out some functions. +/// The only purpose of the library is to force you to pay attention. + +TRAP(argp_error) +TRAP(argp_help) +TRAP(argp_parse) +TRAP(argp_state_help) +TRAP(argp_usage) +TRAP(asctime) +TRAP(clearenv) +TRAP(crypt) +TRAP(ctime) +TRAP(cuserid) +TRAP(drand48) +TRAP(ecvt) +TRAP(encrypt) +TRAP(endfsent) +TRAP(endgrent) +TRAP(endhostent) +TRAP(endnetent) +TRAP(endnetgrent) +TRAP(endprotoent) +TRAP(endpwent) +TRAP(endservent) +TRAP(endutent) +TRAP(endutxent) +TRAP(erand48) +TRAP(error_at_line) +///TRAP(exit) +TRAP(fcloseall) +TRAP(fcvt) +TRAP(fgetgrent) +TRAP(fgetpwent) +TRAP(gammal) +TRAP(getchar_unlocked) +TRAP(getdate) +TRAP(getfsent) +TRAP(getfsfile) +TRAP(getfsspec) +TRAP(getgrent) +TRAP(getgrent_r) +TRAP(getgrgid) +TRAP(getgrnam) +TRAP(gethostbyaddr) +TRAP(gethostbyname) +TRAP(gethostbyname2) +TRAP(gethostent) +TRAP(getlogin) +TRAP(getmntent) +TRAP(getnetbyaddr) +TRAP(getnetbyname) +TRAP(getnetent) +TRAP(getnetgrent) +TRAP(getnetgrent_r) +TRAP(getopt) +TRAP(getopt_long) +TRAP(getopt_long_only) +TRAP(getpass) +TRAP(getprotobyname) +TRAP(getprotobynumber) +TRAP(getprotoent) +TRAP(getpwent) +TRAP(getpwent_r) +TRAP(getpwnam) +TRAP(getpwuid) +TRAP(getservbyname) +TRAP(getservbyport) +TRAP(getservent) +TRAP(getutent) +TRAP(getutent_r) +TRAP(getutid) +TRAP(getutid_r) +TRAP(getutline) +TRAP(getutline_r) +TRAP(getutxent) +TRAP(getutxid) +TRAP(getutxline) +TRAP(getwchar_unlocked) +//TRAP(glob) +//TRAP(glob64) +TRAP(gmtime) +TRAP(hcreate) +TRAP(hdestroy) +TRAP(hsearch) +TRAP(innetgr) +TRAP(jrand48) +TRAP(l64a) +TRAP(lcong48) +TRAP(lgammafNx) +TRAP(localeconv) +TRAP(localtime) +TRAP(login) +TRAP(login_tty) +TRAP(logout) +TRAP(logwtmp) +TRAP(lrand48) +TRAP(mallinfo) +TRAP(mallopt) +TRAP(mblen) +TRAP(mbrlen) +TRAP(mbrtowc) +TRAP(mbsnrtowcs) +TRAP(mbsrtowcs) +//TRAP(mbtowc) // Used by Standard C++ library +TRAP(mcheck) +TRAP(mprobe) +TRAP(mrand48) +TRAP(mtrace) +TRAP(muntrace) +TRAP(nrand48) +TRAP(__ppc_get_timebase_freq) +TRAP(ptsname) +TRAP(putchar_unlocked) +TRAP(putenv) +TRAP(pututline) +TRAP(pututxline) +TRAP(putwchar_unlocked) +TRAP(qecvt) +TRAP(qfcvt) +TRAP(register_printf_function) +TRAP(seed48) +//TRAP(setenv) +TRAP(setfsent) +TRAP(setgrent) +TRAP(sethostent) +TRAP(sethostid) +TRAP(setkey) +//TRAP(setlocale) // Used by replxx at startup +TRAP(setlogmask) +TRAP(setnetent) +TRAP(setnetgrent) +TRAP(setprotoent) +TRAP(setpwent) +TRAP(setservent) +TRAP(setutent) +TRAP(setutxent) +TRAP(siginterrupt) +TRAP(sigpause) +//TRAP(sigprocmask) +TRAP(sigsuspend) +TRAP(sleep) +TRAP(srand48) +//TRAP(strerror) // Used by RocksDB and many other libraries, unfortunately. +//TRAP(strsignal) // This function is imported from Musl and is thread safe. +TRAP(strtok) +TRAP(tcflow) +TRAP(tcsendbreak) +TRAP(tmpnam) +TRAP(ttyname) +TRAP(unsetenv) +TRAP(updwtmp) +TRAP(utmpname) +TRAP(utmpxname) +//TRAP(valloc) +TRAP(vlimit) +//TRAP(wcrtomb) // Used by Standard C++ library +TRAP(wcsnrtombs) +TRAP(wcsrtombs) +TRAP(wctomb) +TRAP(wordexp) +TRAP(basename) +TRAP(catgets) +TRAP(dbm_clearerr) +TRAP(dbm_close) +TRAP(dbm_delete) +TRAP(dbm_error) +TRAP(dbm_fetch) +TRAP(dbm_firstkey) +TRAP(dbm_nextkey) +TRAP(dbm_open) +TRAP(dbm_store) +TRAP(dirname) +TRAP(dlerror) +TRAP(ftw) +TRAP(getc_unlocked) +//TRAP(getenv) // Ok at program startup +TRAP(inet_ntoa) +TRAP(lgamma) +TRAP(lgammaf) +TRAP(lgammal) +TRAP(nftw) +TRAP(nl_langinfo) +TRAP(putc_unlocked) +TRAP(rand) +/** In the current POSIX.1 specification (POSIX.1-2008), readdir() is not required to be thread-safe. However, in modern + * implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams + * are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external + * synchronization is still preferable to the use of the deprecated readdir_r(3) function. It is expected that a future + * version of POSIX.1 will require that readdir() be thread-safe when concurrently employed on different directory streams. + * - man readdir + */ +//TRAP(readdir) +TRAP(system) +TRAP(wcstombs) +TRAP(ether_aton) +TRAP(ether_ntoa) +TRAP(fgetsgent) +TRAP(fgetspent) +TRAP(getaliasbyname) +TRAP(getaliasent) +TRAP(getrpcbyname) +TRAP(getrpcbynumber) +TRAP(getrpcent) +TRAP(getsgent) +TRAP(getsgnam) +TRAP(getspent) +TRAP(getspnam) +TRAP(initstate) +TRAP(random) +TRAP(setstate) +TRAP(sgetsgent) +TRAP(sgetspent) +TRAP(srandom) +TRAP(twalk) +TRAP(lgammaf128) +TRAP(lgammaf32) +TRAP(lgammaf32x) +TRAP(lgammaf64) +TRAP(lgammaf64x) + +#endif diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 87a30c9effc..49516ab3207 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,9 @@ # This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54444) -SET(VERSION_MAJOR 20) -SET(VERSION_MINOR 13) +SET(VERSION_REVISION 54445) +SET(VERSION_MAJOR 21) +SET(VERSION_MINOR 1) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471) -SET(VERSION_DESCRIBE v20.13.1.1-prestable) -SET(VERSION_STRING 20.13.1.1) +SET(VERSION_GITHASH c2a3c6391437db2a05fe815a9d6faf81464d39ec) +SET(VERSION_DESCRIBE v21.1.1-testing) +SET(VERSION_STRING 21.1.1) # end of autochange diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake index 2cdfed56de8..d8e9cf9588d 100644 --- a/cmake/find/ccache.cmake +++ b/cmake/find/ccache.cmake @@ -32,12 +32,21 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}") - # 4+ ccache respect SOURCE_DATE_EPOCH (always includes it into the hash - # of the manifest) and debian will extract these from d/changelog, and - # makes cache of ccache unusable + # debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is + # filled from the debian/changelog or current time. # - # FIXME: once sloppiness will be introduced for this this can be removed. - if (CCACHE_VERSION VERSION_GREATER "4.0") + # - 4.0+ ccache always includes this environment variable into the hash + # of the manifest, which do not allow to use previous cache, + # - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness. + # + # So for: + # - 4.2+ time_macros sloppiness is used, + # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable. + if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2") + message(STATUS "Use time_macros sloppiness for ccache") + set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros") + set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros") + elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0") message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}") set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}") diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index b610a32759f..d3a727e9cb8 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -39,6 +39,7 @@ find_package(Threads REQUIRED) if (NOT OS_ANDROID) # Our compatibility layer doesn't build under Android, many errors in musl. add_subdirectory(base/glibc-compatibility) + add_subdirectory(base/harmful) endif () include (cmake/find/unwind.cmake) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index be3d3f86348..12078cea263 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -18,7 +18,11 @@ if (WITH_COVERAGE) set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE}) separate_arguments(WITHOUT_COVERAGE_LIST) # disable coverage for contib files and build with optimisations - add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST}) + if (COMPILER_CLANG) + add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST}) + else() + add_compile_options(-O3 -DNDEBUG -finline-functions ${WITHOUT_COVERAGE_LIST}) + endif() endif() if (SANITIZE STREQUAL "undefined") diff --git a/contrib/mariadb-connector-c b/contrib/mariadb-connector-c index e05523ca7c1..21f451d4d31 160000 --- a/contrib/mariadb-connector-c +++ b/contrib/mariadb-connector-c @@ -1 +1 @@ -Subproject commit e05523ca7c1fb8d095b612a1b1cfe96e199ffb17 +Subproject commit 21f451d4d3157ffed31ec60a8b76c407190e66bd diff --git a/contrib/replxx b/contrib/replxx index 254be98ae7f..cdb6e3f2ce4 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 254be98ae7f2fd92d6db768f8e11ea5a5226cbf5 +Subproject commit cdb6e3f2ce4464225daf9c8beeae7db98d590bdc diff --git a/contrib/rocksdb b/contrib/rocksdb index 8b966f0ca29..54a0decabbc 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 8b966f0ca298fc1475bd09d9775f32dff0fdce0a +Subproject commit 54a0decabbcf4c0bb5cf7befa9c597f28289bff5 diff --git a/debian/changelog b/debian/changelog index 5ea6b472e46..3a267a83c69 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (20.13.1.1) unstable; urgency=low +clickhouse (21.1.0) unstable; urgency=low * Modified source code - -- clickhouse-release Mon, 23 Nov 2020 10:29:24 +0300 + -- Alexey Milovidov Mon, 11 Jan 2021 03:51:08 +0300 diff --git a/debian/control b/debian/control index 809b5bb5d2a..9b34e982698 100644 --- a/debian/control +++ b/debian/control @@ -40,7 +40,7 @@ Description: Common files for ClickHouse Package: clickhouse-server Architecture: all Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser -Recommends: libcap2-bin, krb5-user +Recommends: libcap2-bin Replaces: clickhouse-server-common, clickhouse-server-base Provides: clickhouse-server-common Description: Server binary for ClickHouse diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 3ef6b8c8b32..ddfe3cd177b 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.13.1.* +ARG version=21.1.0 RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c46e3b23994..85e2e8b0f04 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -31,7 +31,7 @@ find . -name '*.so.*' -print -exec mv '{}' /output \; if [ "performance" == "$COMBINED_OUTPUT" ] then cp -r ../tests/performance /output - cp -r ../tests/config/top_level_domains / + cp -r ../tests/config/top_level_domains /output cp -r ../docker/test/performance-comparison/config /output ||: rm /output/unit_tests_dbms ||: rm /output/clickhouse-odbc-bridge ||: diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index f7e107a2fc9..890aa35fe92 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.13.1.* +ARG version=21.1.0 ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 8e3b5193874..2e17151b31f 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.13.1.* +ARG version=21.1.0 RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 356d3212932..542f7a673cd 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -10,6 +10,11 @@ 10 + + + + + diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 26db6455fd5..0488cdce155 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -81,12 +81,11 @@ function fuzz echo Server started fuzzer_exit_code=0 - # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. - # They are all alphanumeric. - # shellcheck disable=SC2012 - ./clickhouse-client --query-fuzzer-runs=1000 \ - < <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \ - > >(tail -10000 > fuzzer.log) \ + # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric. + # SC2046: Quote this to prevent word splitting. Actually I need word splitting. + # shellcheck disable=SC2012,SC2046 + ./clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ + > >(tail -n 10000 > fuzzer.log) \ 2>&1 \ || fuzzer_exit_code=$? diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 59d7cc98063..9a0d8093a55 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -36,6 +36,22 @@ function wait_for_server # port, pid fi } +function left_or_right() +{ + local from=$1 && shift + local basename=$1 && shift + + if [ -e "$from/$basename" ]; then + echo "$from/$basename" + return + fi + + case "$from" in + left) echo "right/$basename" ;; + right) echo "left/$basename" ;; + esac +} + function configure { # Use the new config for both servers, so that we can change it in a PR. @@ -55,7 +71,7 @@ function configure # server *config* directives overrides --path db0 --user_files_path db0/user_files - --top_level_domains_path /top_level_domains + --top_level_domains_path "$(left_or_right right top_level_domains)" --tcp_port $LEFT_SERVER_PORT ) left/clickhouse-server "${setup_left_server_opts[@]}" &> setup-server-log.log & @@ -103,7 +119,7 @@ function restart # server *config* directives overrides --path left/db --user_files_path left/db/user_files - --top_level_domains_path /top_level_domains + --top_level_domains_path "$(left_or_right left top_level_domains)" --tcp_port $LEFT_SERVER_PORT ) left/clickhouse-server "${left_server_opts[@]}" &>> left-server-log.log & @@ -118,7 +134,7 @@ function restart # server *config* directives overrides --path right/db --user_files_path right/db/user_files - --top_level_domains_path /top_level_domains + --top_level_domains_path "$(left_or_right right top_level_domains)" --tcp_port $RIGHT_SERVER_PORT ) right/clickhouse-server "${right_server_opts[@]}" &>> right-server-log.log & diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 91c040ce6ad..f2fcefd604f 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -37,7 +37,15 @@ chmod 777 -R /var/lib/clickhouse clickhouse-client --query "SHOW DATABASES" clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" clickhouse-client --query "CREATE DATABASE test" -service clickhouse-server restart && sleep 5 + +service clickhouse-server restart + +# Wait for server to start accepting connections +for _ in {1..120}; do + clickhouse-client --query "SELECT 1" && break + sleep 1 +done + clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 5b096bcc5fa..fb453e55417 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -11,7 +11,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. -Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. +Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and it is available to general public. Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. @@ -84,11 +84,9 @@ If you want to improve performance of ClickHouse in some scenario, and if improv Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing. -You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. These kind of tests are not automated. - ## Miscellaneous Tests {#miscellaneous-tests} -There are tests for external dictionaries located at `tests/external_dictionaries` and for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests. +There are tests for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests. There is separate test for quorum inserts. This test run ClickHouse cluster on separate servers and emulate various failure cases: network split, packet drop (between ClickHouse nodes, between ClickHouse and ZooKeeper, between ClickHouse server and client, etc.), `kill -9`, `kill -STOP` and `kill -CONT` , like [Jepsen](https://aphyr.com/tags/Jepsen). Then the test checks that all acknowledged inserts was written and all rejected inserts was not. @@ -169,53 +167,55 @@ Precise query execution timings are not recorded and not compared due to high va ## Build Tests {#build-tests} -Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. Tests are located at `ci` directory. They run build from source inside Docker, Vagrant, and sometimes with `qemu-user-static` inside Docker. These tests are under development and test runs are not automated. +Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well. -Motivation: - -Normally we release and run all tests on a single variant of ClickHouse build. But there are alternative build variants that are not thoroughly tested. Examples: - -- build on FreeBSD -- build on Debian with libraries from system packages -- build with shared linking of libraries -- build on AArch64 platform -- build on PowerPc platform +Examples: +- cross-compile for Darwin x86_64 (Mac OS X) +- cross-compile for FreeBSD x86_64 +- cross-compile for Linux AArch64 +- build on Ubuntu with libraries from system packages (discouraged) +- build with shared linking of libraries (discouraged) For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts. Though we cannot run all tests on all variant of builds, we want to check at least that various build variants are not broken. For this purpose we use build tests. +We also test that there are no translation units that are too long to compile or require too much RAM. + +We also test that there are no too large stack frames. + ## Testing for Protocol Compatibility {#testing-for-protocol-compatibility} When we extend ClickHouse network protocol, we test manually that old clickhouse-client works with new clickhouse-server and new clickhouse-client works with old clickhouse-server (simply by running binaries from corresponding packages). +We also test some cases automatically with integrational tests: +- if data written by old version of ClickHouse can be successfully read by the new version; +- do distributed queries work in a cluster with different ClickHouse versions. + ## Help from the Compiler {#help-from-the-compiler} Main ClickHouse code (that is located in `dbms` directory) is built with `-Wall -Wextra -Werror` and with some additional enabled warnings. Although these options are not enabled for third-party libraries. Clang has even more useful warnings - you can look for them with `-Weverything` and pick something to default build. -For production builds, gcc is used (it still generates slightly more efficient code than clang). For development, clang is usually more convenient to use. You can build on your own machine with debug mode (to save battery of your laptop), but please note that compiler is able to generate more warnings with `-O3` due to better control flow and inter-procedure analysis. When building with clang in debug mode, debug version of `libc++` is used that allows to catch more errors at runtime. +For production builds, clang is used, but we also test make gcc builds. For development, clang is usually more convenient to use. You can build on your own machine with debug mode (to save battery of your laptop), but please note that compiler is able to generate more warnings with `-O3` due to better control flow and inter-procedure analysis. When building with clang in debug mode, debug version of `libc++` is used that allows to catch more errors at runtime. ## Sanitizers {#sanitizers} ### Address sanitizer -We run functional and integration tests under ASan on per-commit basis. - -### Valgrind (Memcheck) -We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse). - -### Undefined behaviour sanitizer -We run functional and integration tests under ASan on per-commit basis. +We run functional, integration, stress and unit tests under ASan on per-commit basis. ### Thread sanitizer -We run functional tests under TSan on per-commit basis. We still don’t run integration tests under TSan on per-commit basis. +We run functional, integration, stress and unit tests under TSan on per-commit basis. ### Memory sanitizer -Currently we still don’t use MSan. +We run functional, integration, stress and unit tests under MSan on per-commit basis. -### Debug allocator -Debug version of `jemalloc` is used for debug build. +### Undefined behaviour sanitizer +We run functional, integration, stress and unit tests under UBSan on per-commit basis. The code of some third-party libraries is not sanitized for UB. + +### Valgrind (Memcheck) +We used to run functional tests under Valgrind overnight, but don't do it anymore. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse). ## Fuzzing {#fuzzing} @@ -233,19 +233,62 @@ Google OSS-Fuzz can be found at `docker/fuzz`. We also use simple fuzz test to generate random SQL queries and to check that the server doesn’t die executing them. You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer). +We also use sophisticated AST-based query fuzzer that is able to find huge amount of corner cases. It does random permutations and substitutions in queries AST. It remembers AST nodes from previous tests to use them for fuzzing of subsequent tests while processing them in random order. + +## Stress test + +Stress tests are another case of fuzzing. It runs all functional tests in parallel in random order with a single server. Results of the tests are not checked. + +It is checked that: +- server does not crash, no debug or sanitizer traps are triggered; +- there are no deadlocks; +- the database structure is consistent; +- server can successfully stop after the test and start again without exceptions. + +There are five variants (Debug, ASan, TSan, MSan, UBSan). + +## Thread Fuzzer + +Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases. + ## Security Audit {#security-audit} People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint. ## Static Analyzers {#static-analyzers} -We run `PVS-Studio` on per-commit basis. We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`. You will find instructions for usage in `tests/instructions/` directory. Also you can read [the article in russian](https://habr.com/company/yandex/blog/342018/). +We run `clang-tidy` and `PVS-Studio` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks. + +We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory. Also you can read [the article in russian](https://habr.com/company/yandex/blog/342018/). If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box. +We also use `shellcheck` for static analysis of shell scripts. + ## Hardening {#hardening} -`FORTIFY_SOURCE` is used by default. It is almost useless, but still makes sense in rare cases and we don’t disable it. +In debug build we are using custom allocator that does ASLR of user-level allocations. + +We also manually protect memory regions that are expected to be readonly after allocation. + +In debug build we also involve a customization of libc that ensures that no "harmful" (obsolete, insecure, not thread-safe) functions are called. + +Debug assertions are used extensively. + +In debug build, if exception with "logical error" code (implies a bug) is being thrown, the program is terminated prematurally. It allows to use exceptions in release build but make it an assertion in debug build. + +Debug version of jemalloc is used for debug builds. +Debug version of libc++ is used for debug builds. + +## Runtime Integrity Checks + +Data stored on disk is checksummed. Data in MergeTree tables is checksummed in three ways simultaneously* (compressed data blocks, uncompressed data blocks, the total checksum across blocks). Data transferred over network between client and server or between servers is also checksummed. Replication ensures bit-identical data on replicas. + +It is required to protect from faulty hardware (bit rot on storage media, bit flips in RAM on server, bit flips in RAM of network controller, bit flips in RAM of network switch, bit flips in RAM of client, bit flips on the wire). Note that bit flips are common and likely to occur even for ECC RAM and in presense of TCP checksums (if you manage to run thousands of servers processing petabytes of data each day). [See the video (russian)](https://www.youtube.com/watch?v=ooBAQIe0KlQ). + +ClickHouse provides diagnostics that will help ops engineers to find faulty hardware. + +\* and it is not slow. ## Code Style {#code-style} @@ -259,6 +302,8 @@ Alternatively you can try `uncrustify` tool to reformat your code. Configuration `CLion` has its own code formatter that has to be tuned for our code style. +We also use `codespell` to find typos in code. It is automated as well. + ## Metrica B2B Tests {#metrica-b2b-tests} Each ClickHouse release is tested with Yandex Metrica and AppMetrica engines. Testing and stable versions of ClickHouse are deployed on VMs and run with a small copy of Metrica engine that is processing fixed sample of input data. Then results of two instances of Metrica engine are compared together. @@ -267,13 +312,25 @@ These tests are automated by separate team. Due to high number of moving parts, ## Test Coverage {#test-coverage} -As of July 2018 we don’t track test coverage. +We also track test coverage but only for functional tests and only for clickhouse-server. It is performed on daily basis. + +## Tests for Tests + +There is automated check for flaky tests. It runs all new tests 100 times (for functional tests) or 10 times (for integration tests). If at least single time the test failed, it is considered flaky. + +## Testflows + +[Testflows](https://testflows.com/) is an enterprise-grade testing framework. It is used by Altinity for some of the tests and we run these tests in our CI. + +## Yandex Checks (only for Yandex employees) + +These checks are importing ClickHouse code into Yandex internal monorepository, so ClickHouse codebase can be used as a library by other products at Yandex (YT and YDB). Note that clickhouse-server itself is not being build from internal repo and unmodified open-source build is used for Yandex applications. ## Test Automation {#test-automation} We run tests with Yandex internal CI and job automation system named “Sandbox”. -Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored eternally. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you. +Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you. We don’t use Travis CI due to the limit on time and computational power. We don’t use Jenkins. It was used before and now we are happy we are not using Jenkins. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 4f81a679b8e..80769fe9954 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -98,7 +98,9 @@ For a description of parameters, see the [CREATE query description](../../../sql - `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192. - `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes). - `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage). - - `max_parts_in_total` — Maximum number of parts in all partitions. + - `max_parts_in_total` — Maximum number of parts in all partitions. + - `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. + - `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. **Example of Sections Setting** diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 04a239e0b06..ef34c8d3804 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -38,15 +38,15 @@ Example of setting the addresses of the ZooKeeper cluster: ``` xml - + example1 2181 - + example2 2181 - + example3 2181 @@ -61,21 +61,21 @@ Example of setting the addresses of the auxiliary ZooKeeper cluster: ``` xml - + example_2_1 2181 - + example_2_2 2181 - + example_2_3 2181 - + example_3_1 2181 diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 9f96ca3fe8c..7fffa962480 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -25,10 +25,27 @@ The Distributed engine accepts parameters: - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples +Also it accept the following settings: + +- `fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk. + +- `fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to asynchronous inserts on Distributed table (after insert, after sending the data to shard, etc). + +!!! note "Note" + + **Durability settings** (`fsync_...`): + + - Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards. + - May significantly decrease the inserts' performance + - Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings` + Example: ``` sql Distributed(logs, default, hits[, sharding_key[, policy_name]]) +SETTINGS + fsync_after_insert=0, + fsync_directories=0; ``` Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster. diff --git a/docs/en/getting-started/example-datasets/brown-benchmark.md b/docs/en/getting-started/example-datasets/brown-benchmark.md new file mode 100644 index 00000000000..b5ca23eddb9 --- /dev/null +++ b/docs/en/getting-started/example-datasets/brown-benchmark.md @@ -0,0 +1,414 @@ +--- +toc_priority: 20 +toc_title: Brown University Benchmark +--- + +# Brown University Benchmark + +MgBench - A new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/). + +Download the data: +``` +wget https://datasets.clickhouse.tech/mgbench{1..3}.csv.xz +``` + +Unpack the data: +``` +xz -v -d mgbench{1..3}.csv.xz +``` + +Create tables: +``` +CREATE DATABASE mgbench; + + +CREATE TABLE mgbench.logs1 ( + log_time DateTime, + machine_name LowCardinality(String), + machine_group LowCardinality(String), + cpu_idle Nullable(Float32), + cpu_nice Nullable(Float32), + cpu_system Nullable(Float32), + cpu_user Nullable(Float32), + cpu_wio Nullable(Float32), + disk_free Nullable(Float32), + disk_total Nullable(Float32), + part_max_used Nullable(Float32), + load_fifteen Nullable(Float32), + load_five Nullable(Float32), + load_one Nullable(Float32), + mem_buffers Nullable(Float32), + mem_cached Nullable(Float32), + mem_free Nullable(Float32), + mem_shared Nullable(Float32), + swap_free Nullable(Float32), + bytes_in Nullable(Float32), + bytes_out Nullable(Float32) +) +ENGINE = MergeTree() +ORDER BY (machine_group, machine_name, log_time); + + +CREATE TABLE mgbench.logs2 ( + log_time DateTime, + client_ip IPv4, + request String, + status_code UInt16, + object_size UInt64 +) +ENGINE = MergeTree() +ORDER BY log_time; + + +CREATE TABLE mgbench.logs3 ( + log_time DateTime64, + device_id FixedString(15), + device_name LowCardinality(String), + device_type LowCardinality(String), + device_floor UInt8, + event_type LowCardinality(String), + event_unit FixedString(1), + event_value Nullable(Float32) +) +ENGINE = MergeTree() +ORDER BY (event_type, log_time); +``` + +Insert data: + +``` +clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv +clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv +clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv +``` + +Run benchmark queries: +``` +-- Q1.1: What is the CPU/network utilization for each web server since midnight? + +SELECT machine_name, + MIN(cpu) AS cpu_min, + MAX(cpu) AS cpu_max, + AVG(cpu) AS cpu_avg, + MIN(net_in) AS net_in_min, + MAX(net_in) AS net_in_max, + AVG(net_in) AS net_in_avg, + MIN(net_out) AS net_out_min, + MAX(net_out) AS net_out_max, + AVG(net_out) AS net_out_avg +FROM ( + SELECT machine_name, + COALESCE(cpu_user, 0.0) AS cpu, + COALESCE(bytes_in, 0.0) AS net_in, + COALESCE(bytes_out, 0.0) AS net_out + FROM logs1 + WHERE machine_name IN ('anansi','aragog','urd') + AND log_time >= TIMESTAMP '2017-01-11 00:00:00' +) AS r +GROUP BY machine_name; + + +-- Q1.2: Which computer lab machines have been offline in the past day? + +SELECT machine_name, + log_time +FROM logs1 +WHERE (machine_name LIKE 'cslab%' OR + machine_name LIKE 'mslab%') + AND load_one IS NULL + AND log_time >= TIMESTAMP '2017-01-10 00:00:00' +ORDER BY machine_name, + log_time; + + +-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation? + +SELECT dt, + hr, + AVG(load_fifteen) AS load_fifteen_avg, + AVG(load_five) AS load_five_avg, + AVG(load_one) AS load_one_avg, + AVG(mem_free) AS mem_free_avg, + AVG(swap_free) AS swap_free_avg +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + load_fifteen, + load_five, + load_one, + mem_free, + swap_free + FROM logs1 + WHERE machine_name = 'babbage' + AND load_fifteen IS NOT NULL + AND load_five IS NOT NULL + AND load_one IS NOT NULL + AND mem_free IS NOT NULL + AND swap_free IS NOT NULL + AND log_time >= TIMESTAMP '2017-01-01 00:00:00' +) AS r +GROUP BY dt, + hr +ORDER BY dt, + hr; + + +-- Q1.4: Over a 1-month period, how often was each server blocked on disk I/O? + +SELECT machine_name, + COUNT(*) AS spikes +FROM logs1 +WHERE machine_group = 'Servers' + AND cpu_wio > 0.99 + AND log_time >= TIMESTAMP '2016-12-01 00:00:00' + AND log_time < TIMESTAMP '2017-01-01 00:00:00' +GROUP BY machine_name +ORDER BY spikes DESC +LIMIT 10; + + +-- Q1.5: Which externally reachable VMs have run low on memory? + +SELECT machine_name, + dt, + MIN(mem_free) AS mem_free_min +FROM ( + SELECT machine_name, + CAST(log_time AS DATE) AS dt, + mem_free + FROM logs1 + WHERE machine_group = 'DMZ' + AND mem_free IS NOT NULL +) AS r +GROUP BY machine_name, + dt +HAVING MIN(mem_free) < 10000 +ORDER BY machine_name, + dt; + + +-- Q1.6: What is the total hourly network traffic across all file servers? + +SELECT dt, + hr, + SUM(net_in) AS net_in_sum, + SUM(net_out) AS net_out_sum, + SUM(net_in) + SUM(net_out) AS both_sum +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in, + COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out + FROM logs1 + WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon', + 'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey', + 'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps', + 'poprocks','razzles','runts','smarties','smuggler','spree','stride', + 'tootsie','trident','wrigley','york') +) AS r +GROUP BY dt, + hr +ORDER BY both_sum DESC +LIMIT 10; + + +-- Q2.1: Which requests have caused server errors within the past 2 weeks? + +SELECT * +FROM logs2 +WHERE status_code >= 500 + AND log_time >= TIMESTAMP '2012-12-18 00:00:00' +ORDER BY log_time; + + +-- Q2.2: During a specific 2-week period, was the user password file leaked? + +SELECT * +FROM logs2 +WHERE status_code >= 200 + AND status_code < 300 + AND request LIKE '%/etc/passwd%' + AND log_time >= TIMESTAMP '2012-05-06 00:00:00' + AND log_time < TIMESTAMP '2012-05-20 00:00:00'; + + +-- Q2.3: What was the average path depth for top-level requests in the past month? + +SELECT top_level, + AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg +FROM ( + SELECT SUBSTRING(request FROM 1 FOR len) AS top_level, + request + FROM ( + SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len, + request + FROM logs2 + WHERE status_code >= 200 + AND status_code < 300 + AND log_time >= TIMESTAMP '2012-12-01 00:00:00' + ) AS r + WHERE len > 0 +) AS s +WHERE top_level IN ('/about','/courses','/degrees','/events', + '/grad','/industry','/news','/people', + '/publications','/research','/teaching','/ugrad') +GROUP BY top_level +ORDER BY top_level; + + +-- Q2.4: During the last 3 months, which clients have made an excessive number of requests? + +SELECT client_ip, + COUNT(*) AS num_requests +FROM logs2 +WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00' +GROUP BY client_ip +HAVING COUNT(*) >= 100000 +ORDER BY num_requests DESC; + + +-- Q2.5: What are the daily unique visitors? + +SELECT dt, + COUNT(DISTINCT client_ip) +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + client_ip + FROM logs2 +) AS r +GROUP BY dt +ORDER BY dt; + + +-- Q2.6: What are the average and maximum data transfer rates (Gbps)? + +SELECT AVG(transfer) / 125000000.0 AS transfer_avg, + MAX(transfer) / 125000000.0 AS transfer_max +FROM ( + SELECT log_time, + SUM(object_size) AS transfer + FROM logs2 + GROUP BY log_time +) AS r; + + +-- Q3.1: Did the indoor temperature reach freezing over the weekend? + +SELECT * +FROM logs3 +WHERE event_type = 'temperature' + AND event_value <= 32.0 + AND log_time >= '2019-11-29 17:00:00.000'; + + +-- Q3.4: Over the past 6 months, how frequently was each door opened? + +SELECT device_name, + device_floor, + COUNT(*) AS ct +FROM logs3 +WHERE event_type = 'door_open' + AND log_time >= '2019-06-01 00:00:00.000' +GROUP BY device_name, + device_floor +ORDER BY ct DESC; + + +-- Q3.5: Where in the building do large temperature variations occur in winter and summer? + +WITH temperature AS ( + SELECT dt, + device_name, + device_type, + device_floor + FROM ( + SELECT dt, + hr, + device_name, + device_type, + device_floor, + AVG(event_value) AS temperature_hourly_avg + FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + device_name, + device_type, + device_floor, + event_value + FROM logs3 + WHERE event_type = 'temperature' + ) AS r + GROUP BY dt, + hr, + device_name, + device_type, + device_floor + ) AS s + GROUP BY dt, + device_name, + device_type, + device_floor + HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0 +) +SELECT DISTINCT device_name, + device_type, + device_floor, + 'WINTER' +FROM temperature +WHERE dt >= DATE '2018-12-01' + AND dt < DATE '2019-03-01' +UNION +SELECT DISTINCT device_name, + device_type, + device_floor, + 'SUMMER' +FROM temperature +WHERE dt >= DATE '2019-06-01' + AND dt < DATE '2019-09-01'; + + +-- Q3.6: For each device category, what are the monthly power consumption metrics? + +SELECT yr, + mo, + SUM(coffee_hourly_avg) AS coffee_monthly_sum, + AVG(coffee_hourly_avg) AS coffee_monthly_avg, + SUM(printer_hourly_avg) AS printer_monthly_sum, + AVG(printer_hourly_avg) AS printer_monthly_avg, + SUM(projector_hourly_avg) AS projector_monthly_sum, + AVG(projector_hourly_avg) AS projector_monthly_avg, + SUM(vending_hourly_avg) AS vending_monthly_sum, + AVG(vending_hourly_avg) AS vending_monthly_avg +FROM ( + SELECT dt, + yr, + mo, + hr, + AVG(coffee) AS coffee_hourly_avg, + AVG(printer) AS printer_hourly_avg, + AVG(projector) AS projector_hourly_avg, + AVG(vending) AS vending_hourly_avg + FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(YEAR FROM log_time) AS yr, + EXTRACT(MONTH FROM log_time) AS mo, + EXTRACT(HOUR FROM log_time) AS hr, + CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee, + CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer, + CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector, + CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending + FROM logs3 + WHERE device_type = 'meter' + ) AS r + GROUP BY dt, + yr, + mo, + hr +) AS s +GROUP BY yr, + mo +ORDER BY yr, + mo; +``` + +The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md index 00bedd4aa78..72f44d8caf1 100644 --- a/docs/en/getting-started/example-datasets/index.md +++ b/docs/en/getting-started/example-datasets/index.md @@ -13,11 +13,12 @@ The list of documented datasets: - [GitHub Events](../../getting-started/example-datasets/github-events.md) - [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md) - [Recipes](../../getting-started/example-datasets/recipes.md) +- [OnTime](../../getting-started/example-datasets/ontime.md) +- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) - [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) - [WikiStat](../../getting-started/example-datasets/wikistat.md) - [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md) - [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) -- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) -- [OnTime](../../getting-started/example-datasets/ontime.md) +- [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md) [Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) diff --git a/docs/en/getting-started/example-datasets/metrica.md b/docs/en/getting-started/example-datasets/metrica.md index cdbb9b56eeb..159c99b15a0 100644 --- a/docs/en/getting-started/example-datasets/metrica.md +++ b/docs/en/getting-started/example-datasets/metrica.md @@ -71,4 +71,4 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" [ClickHouse tutorial](../../getting-started/tutorial.md) is based on Yandex.Metrica dataset and the recommended way to get started with this dataset is to just go through tutorial. -Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hists` and `test.visits` there). +Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hits` and `test.visits` there). diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index 5e499cafb2a..6e46cddba52 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -398,6 +398,8 @@ ORDER BY c DESC LIMIT 10; ``` +You can also play with the data in Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIERheU9mV2VlaywgY291bnQoKikgQVMgYwpGUk9NIG9udGltZQpXSEVSRSBZZWFyPj0yMDAwIEFORCBZZWFyPD0yMDA4CkdST1VQIEJZIERheU9mV2VlawpPUkRFUiBCWSBjIERFU0M7Cg==). + This performance test was created by Vadim Tkachenko. See: - https://www.percona.com/blog/2009/10/02/analyzing-air-traffic-performance-with-infobright-and-monetdb/ diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index f3a6381aeca..c08eec61b1c 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -13,6 +13,7 @@ toc_title: Client Libraries - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-client](https://github.com/yurial/clickhouse-client) - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) - PHP - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e5f836e3b5a..82537ba637a 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -844,23 +844,27 @@ Higher values will lead to higher memory usage. ## max_compress_block_size {#max-compress-block-size} -The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isn’t any reason to change this setting. +The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. + +!!! note "Warning" + This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse. Don’t confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table). ## min_compress_block_size {#min-compress-block-size} -For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)" tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least ‘min_compress_block_size’. By default, 65,536. +For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least `min_compress_block_size`. By default, 65,536. -The actual size of the block, if the uncompressed data is less than ‘max_compress_block_size’, is no less than this value and no less than the volume of data for one mark. +The actual size of the block, if the uncompressed data is less than `max_compress_block_size`, is no less than this value and no less than the volume of data for one mark. -Let’s look at an example. Assume that ‘index_granularity’ was set to 8192 during table creation. +Let’s look at an example. Assume that `index_granularity` was set to 8192 during table creation. We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks. We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed. -There usually isn’t any reason to change this setting. +!!! note "Warning" + This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse. ## max_query_size {#settings-max_query_size} diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 7a9e386d419..5dc23aee686 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -20,7 +20,33 @@ System tables: Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. -Unlike other system tables, the system tables [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. +Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), crash_log and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. + +System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are: + +- `database`: database the system log table belongs to. This option is deprecated now. All system log tables are under database `system`. +- `table`: table to insert data. +- `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression. +- `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression. +- `flush_interval_milliseconds`: interval of flushing data to disk. +- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit. + +An example: + +``` + + + system + query_log
+ toYYYYMM(event_date) + event_date + INTERVAL 30 DAY DELETE + + 7500 +
+
+``` By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables. diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 4ca1fc657ee..30122cb133e 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -11,6 +11,7 @@ This table contains the following columns (the column type is shown in brackets) - `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. - `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). - `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. +- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting). Example: @@ -21,11 +22,11 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') ``` ``` text -┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┐ -│ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ -│ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ -│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ -└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘ +┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┬─supports_parallel_insert─┐ +│ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ 1 │ +│ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │ +│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ +└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┴──────────────────────────┘ ``` **See also** diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index c4b7156ad95..dc7727bdfd8 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1290,25 +1290,68 @@ Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference ## arrayMin(\[func,\] arr1, …) {#array-min} -Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements. +Returns the min of the `func` values. If the function is omitted, it just returns the min of the array elements. Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +Examples: +```sql +SELECT arrayMin([1, 2, 4]) AS res +┌─res─┐ +│ 1 │ +└─────┘ + + +SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res +┌─res─┐ +│ -4 │ +└─────┘ +``` + ## arrayMax(\[func,\] arr1, …) {#array-max} -Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements. +Returns the max of the `func` values. If the function is omitted, it just returns the max of the array elements. Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +Examples: +```sql +SELECT arrayMax([1, 2, 4]) AS res +┌─res─┐ +│ 4 │ +└─────┘ + + +SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res +┌─res─┐ +│ -1 │ +└─────┘ +``` + ## arraySum(\[func,\] arr1, …) {#array-sum} Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements. Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +Examples: +```sql +SELECT arraySum([2,3]) AS res +┌─res─┐ +│ 5 │ +└─────┘ + + +SELECT arraySum(x -> x*x, [2, 3]) AS res +┌─res─┐ +│ 13 │ +└─────┘ +``` + + ## arrayAvg(\[func,\] arr1, …) {#array-avg} -Returns the sum of the `func` values. If the function is omitted, it just returns the average of the array elements. +Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements. Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 2bd4c4c5a80..1bf7b8f0640 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -182,6 +182,14 @@ If `NULL` is passed to the function as input, then it returns the `Nullable(Noth Gets the size of the block. In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for. +## byteSize(...) {#function-bytesize} + +Get an estimate of uncompressed byte size of its arguments in memory. +E.g. for UInt32 argument it will return constant 4, for String argument - the string length + 9 (terminating zero + length). +The function can take multiple arguments. The typical application is byteSize(*). + +Use case: Suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis. + ## materialize(x) {#materializex} Turns a constant into a full column containing just one value. diff --git a/docs/es/interfaces/third-party/client-libraries.md b/docs/es/interfaces/third-party/client-libraries.md index 818bdbbc6f0..b61ab1a5d9c 100644 --- a/docs/es/interfaces/third-party/client-libraries.md +++ b/docs/es/interfaces/third-party/client-libraries.md @@ -13,6 +13,7 @@ toc_title: Client Libraries - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-client](https://github.com/yurial/clickhouse-client) - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) - PHP - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) diff --git a/docs/fr/interfaces/third-party/client-libraries.md b/docs/fr/interfaces/third-party/client-libraries.md index 5a86d12a09c..7949aa1d7cf 100644 --- a/docs/fr/interfaces/third-party/client-libraries.md +++ b/docs/fr/interfaces/third-party/client-libraries.md @@ -15,6 +15,7 @@ toc_title: "Biblioth\xE8ques Clientes" - [clickhouse-chauffeur](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-client](https://github.com/yurial/clickhouse-client) - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) - PHP - [smi2 / phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [8bitov / clickhouse-PHP-client](https://packagist.org/packages/8bitov/clickhouse-php-client) diff --git a/docs/ja/interfaces/third-party/client-libraries.md b/docs/ja/interfaces/third-party/client-libraries.md index ffe7b641c38..c7bd368bc4c 100644 --- a/docs/ja/interfaces/third-party/client-libraries.md +++ b/docs/ja/interfaces/third-party/client-libraries.md @@ -15,6 +15,7 @@ toc_title: "\u30AF\u30E9\u30A4\u30A2\u30F3\u30C8" - [clickhouse-ドライバ](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-クライアント](https://github.com/yurial/clickhouse-client) - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) - PHP - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [8bitov/clickhouse-php-クライアント](https://packagist.org/packages/8bitov/clickhouse-php-client) diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 501f7732745..c7bd64c4ab1 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -77,17 +77,19 @@ ORDER BY expr - `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (необязательные): - - `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage). - - `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage). + - `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage). + - `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage). - `min_index_granularity_bytes` — минимально допустимый размер гранул данных в байтах. Значение по умолчанию — 1024b. Для обеспечения защиты от случайного создания таблиц с очень низким значением `index_granularity_bytes`. Смотрите [Хранение данных](#mergetree-data-storage). - - `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`. - - `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера". - - `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов. - - `merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день). - - `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её. - - `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192. - - `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes). + - `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`. + - `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера". + - `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов. + - `merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день). + - `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её. + - `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192. + - `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes). - `min_bytes_for_wide_part`, `min_rows_for_wide_part` — минимальное количество байт/строк в куске данных для хранения в формате `Wide`. Можно задать одну или обе настройки или не задавать ни одной. Подробнее см. в разделе [Хранение данных](#mergetree-data-storage). + - `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная. + - `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная. **Пример задания секций** diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index c07aab5826c..26e05b02509 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -13,6 +13,7 @@ toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-client](https://github.com/yurial/clickhouse-client) - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) - PHP - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index c4f5cdaf2ca..a3bf13bfed3 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -811,23 +811,27 @@ log_query_threads=1 ## max_compress_block_size {#max-compress-block-size} -Максимальный размер блоков не сжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативки. Как правило, не имеет смысла менять эту настройку. +Максимальный размер блоков несжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативной памяти. + +!!! note "Предупреждение" + Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. Не путайте блоки для сжатия (кусок памяти, состоящий из байт) и блоки для обработки запроса (пачка строк из таблицы). ## min_compress_block_size {#min-compress-block-size} -Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min_compress_block_size. По умолчанию - 65 536. +Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше `min_compress_block_size`. По умолчанию - 65 536. -Реальный размер блока, если несжатых данных меньше max_compress_block_size, будет не меньше этого значения и не меньше объёма данных на одну засечку. +Реальный размер блока, если несжатых данных меньше `max_compress_block_size`, будет не меньше этого значения и не меньше объёма данных на одну засечку. -Рассмотрим пример. Пусть index_granularity, указанная при создании таблицы - 8192. +Рассмотрим пример. Пусть `index_granularity`, указанная при создании таблицы - 8192. -Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как min_compress_block_size = 65 536, сжатый блок будет сформирован на каждые две засечки. +Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как `min_compress_block_size` = 65 536, сжатый блок будет сформирован на каждые две засечки. Пусть мы записываем столбец URL типа String (средний размер - 60 байт на значение). При записи 8192 строк, будет, в среднем, чуть меньше 500 КБ данных. Так как это больше 65 536 строк, то сжатый блок будет сформирован на каждую засечку. В этом случае, при чтении с диска данных из диапазона в одну засечку, не будет разжато лишних данных. -Как правило, не имеет смысла менять эту настройку. +!!! note "Предупреждение" + Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. ## max_query_size {#settings-max_query_size} diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md index 36e4acb6a24..8f104e3a7d8 100644 --- a/docs/zh/development/style.md +++ b/docs/zh/development/style.md @@ -267,7 +267,7 @@ void executeQuery( **9.** 多行注释的开头和结尾不得有空行(关闭多行注释的行除外)。 -**10.** 要注释掉代码,请使用基本注释,而不是«记录»注释。 +**10.** 要注释掉代码,请使用基本注释,而不是“文档”注释。 **11.** 在提交之前删除代码的无效注释部分。 @@ -335,7 +335,7 @@ template struct ExtractDomain ``` -**7.** 对于抽象类型(接口),用 `I` 前缀。 +**7.** 对于抽象类(接口),用 `I` 前缀。 ``` cpp class IBlockInputStream @@ -349,7 +349,7 @@ class IBlockInputStream bool info_successfully_loaded = false; ``` -**9.** `define` 和全局常量的名称使用带下划线的 `ALL_CAPS`。 +**9.** `define` 和全局常量的名称使用全大写带下划线的形式,如 `ALL_CAPS`。 ``` cpp #define MAX_SRC_TABLE_NAMES_TO_STORE 1000 @@ -357,14 +357,14 @@ bool info_successfully_loaded = false; **10.** 文件名应使用与其内容相同的样式。 -如果文件包含单个类,则以与该类名称相同的方式命名该文件。 +如果文件包含单个类,则以与该类名称相同的方式命名该文件(CamelCase)。 -如果文件包含单个函数,则以与函数名称相同的方式命名文件。 +如果文件包含单个函数,则以与函数名称相同的方式命名文件(camelCase)。 **11.** 如果名称包含缩写,则: - 对于变量名,缩写应使用小写字母 `mysql_connection`(不是 `mySQL_connection` )。 -- 对于类和函数的名称,请将大写字母保留在缩写 `MySQLConnection`(不是 `MySqlConnection` 。 +- 对于类和函数的名称,请将大写字母保留在缩写 `MySQLConnection`(不是 `MySqlConnection`)。 **12.** 仅用于初始化类成员的构造方法参数的命名方式应与类成员相同,但最后使用下划线。 @@ -411,7 +411,7 @@ enum class CompressionMethod 如果缩短版本是常用的,则可以接受不完整的单词。 -如果注释中旁边包含全名,您也可以使用缩写。 +如果旁边有注释包含全名,您也可以使用缩写。 **17.** C++ 源码文件名称必须为 `.cpp` 拓展名。 头文件必须为 `.h` 拓展名。 @@ -441,7 +441,7 @@ enum class CompressionMethod 在离线数据处理应用程序中,通常可以接受不捕获异常。 -在处理用户请求的服务器中,通常足以捕获连接处理程序顶层的异常。 +在处理用户请求的服务器中,捕获连接处理程序顶层的异常通常就足够了。 在线程函数中,你应该在 `join` 之后捕获并保留所有异常以在主线程中重新抛出它们。 @@ -548,7 +548,7 @@ Fork不用于并行化。 **10.** 常量。 -使用 const 引用,指向常量的指针,`const_iterator`和 const 指针。 +使用 const 引用、指针,指向常量、`const_iterator`和 const 方法。 将 `const` 视为默认值,仅在必要时使用非 `const`。 @@ -560,7 +560,7 @@ Fork不用于并行化。 **12.** 数值类型。 -使用 `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, 以及 `Int64`, `size_t`, `ssize_t` 还有 `ptrdiff_t`。 +使用 `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32` 和 `Int64`,同样还有 `size_t`, `ssize_t` 和 `ptrdiff_t`。 不要使用这些类型:`signed / unsigned long`,`long long`,`short`,`signed / unsigned char`,`char`。 @@ -732,11 +732,11 @@ CPU指令集是我们服务器中支持的最小集合。 目前,它是SSE 4.2 **8.** 尽可能经常地进行提交,即使代码只是部分准备好了。 -目的明确的功能,使用分支。 +为了这种目的可以创建分支。 -如果 `master` 分支中的代码尚不可构建,请在 `push` 之前将其从构建中排除。您需要在几天内完成或删除它。 +如果您的代码在 `master` 分支中尚不可构建,在 `push` 之前需要将其从构建中排除。您需要在几天内完成或删除它。 -**9.** 对于不重要的更改,请使用分支并在服务器上发布它们。 +**9.** 对于非一般的更改,请使用分支并在服务器上发布它们。 **10.** 未使用的代码将从 repo 中删除。 diff --git a/docs/zh/interfaces/third-party/client-libraries.md b/docs/zh/interfaces/third-party/client-libraries.md index e94eb8bcfc0..e2412f2b8de 100644 --- a/docs/zh/interfaces/third-party/client-libraries.md +++ b/docs/zh/interfaces/third-party/client-libraries.md @@ -13,6 +13,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试 - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-client](https://github.com/yurial/clickhouse-client) - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) - PHP - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) diff --git a/docs/zh/operations/system-tables/index.md b/docs/zh/operations/system-tables/index.md index fcf6741761b..56067bc5057 100644 --- a/docs/zh/operations/system-tables/index.md +++ b/docs/zh/operations/system-tables/index.md @@ -22,9 +22,35 @@ toc_title: "\u7CFB\u7EDF\u8868" 大多数系统表将数据存储在RAM中。 ClickHouse服务器在开始时创建此类系统表。 -与其他系统表不同,系统表 [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) 由 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表,ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改,则ClickHouse会重命名当前表并创建一个新表。 +与其他系统表不同,系统日志表 [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log), [part_log](../../operations/system-tables/part_log.md#system.part_log), crash_log and text_log 默认采用[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表,ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改,则ClickHouse会重命名当前表并创建一个新表。 -默认情况下,表增长是无限的。 要控制表的大小,可以使用 [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。 +用户可以通过在`/etc/clickhouse-server/config.d/`下创建与系统表同名的配置文件, 或者在`/etc/clickhouse-server/config.xml`中设置相应配置项,来自定义系统日志表的结构。可以自定义的配置项如下: + +- `database`: 系统日志表所在的数据库。这个选项目前已经废弃。所有的系统日表都位于`system`库中。 +- `table`: 系统日志表名。 +- `partition_by`: 指定[PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md)表达式。 +- `ttl`: 指定系统日志表TTL选项。 +- `flush_interval_milliseconds`: 指定系统日志表数据落盘时间。 +- `engine`: 指定完整的表引擎定义。(以`ENGINE = `开始)。 这个选项与`partition_by`以及`ttl`冲突。如果两者一起设置,服务启动时会抛出异常并且退出。 + +一个配置定义的例子如下: + +``` + + + system + query_log
+ toYYYYMM(event_date) + event_date + INTERVAL 30 DAY DELETE + + 7500 +
+
+``` + +默认情况下,表增长是无限的。 要控制表的大小,可以使用 TTL 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。 ## 系统指标的来源 {#system-tables-sources-of-system-metrics} diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 5371f093b10..9adca58b55a 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -216,7 +216,13 @@ if (CLICKHOUSE_SPLIT_BINARY) install(PROGRAMS clickhouse-split-helper DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME clickhouse COMPONENT clickhouse) else () add_executable (clickhouse main.cpp) - target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils) + + # A library that prevent usage of several functions from libc. + if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID) + set (HARMFUL_LIB harmful) + endif () + + target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB}) target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if (ENABLE_CLICKHOUSE_SERVER) @@ -312,6 +318,10 @@ else () if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM) endif() + + if (USE_BINARY_HASH) + add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM) + endif() endif () if (ENABLE_TESTS AND USE_GTEST) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 16e90816443..ca2a3db193f 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -59,7 +59,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -110,6 +112,7 @@ namespace ErrorCodes extern const int INVALID_USAGE_OF_INPUT; extern const int DEADLOCK_AVOIDED; extern const int UNRECOGNIZED_ARGUMENTS; + extern const int SYNTAX_ERROR; } @@ -136,6 +139,9 @@ private: bool stdin_is_a_tty = false; /// stdin is a terminal. bool stdout_is_a_tty = false; /// stdout is a terminal. + /// If not empty, queries will be read from these files + std::vector queries_files; + std::unique_ptr connection; /// Connection to DB. String full_query; /// Current query as it was given to the client. @@ -478,10 +484,10 @@ private: /// - stdin is not a terminal. In this case queries are read from it. /// - -qf (--queries-file) command line option is present. /// The value of the option is used as file with query (or of multiple queries) to execute. - if (!stdin_is_a_tty || config().has("query") || config().has("queries-file")) + if (!stdin_is_a_tty || config().has("query") || !queries_files.empty()) is_interactive = false; - if (config().has("query") && config().has("queries-file")) + if (config().has("query") && !queries_files.empty()) { throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); } @@ -696,14 +702,8 @@ private: auto query_id = config().getString("query_id", ""); if (!query_id.empty()) context.setCurrentQueryId(query_id); - if (query_fuzzer_runs) - { - nonInteractiveWithFuzzing(); - } - else - { - nonInteractive(); - } + + nonInteractive(); /// If exception code isn't zero, we should return non-zero return code anyway. if (last_exception_received_from_server) @@ -794,15 +794,22 @@ private: { String text; - if (config().has("queries-file")) + if (!queries_files.empty()) { - ReadBufferFromFile in(config().getString("queries-file")); - readStringUntilEOF(text, in); - processMultiQuery(text); + for (const auto & queries_file : queries_files) + { + connection->setDefaultDatabase(connection_parameters.default_database); + ReadBufferFromFile in(queries_file); + readStringUntilEOF(text, in); + if (!processMultiQuery(text)) + break; + } return; } else if (config().has("query")) + { text = config().getRawString("query"); /// Poco configuration should not process substitutions in form of ${...} inside query. + } else { /// If 'query' parameter is not set, read a query from stdin. @@ -811,113 +818,10 @@ private: readStringUntilEOF(text, in); } - processQueryText(text); - } - - void nonInteractiveWithFuzzing() - { - if (config().has("query")) - { - // Poco configuration should not process substitutions in form of - // ${...} inside query - processWithFuzzing(config().getRawString("query")); - return; - } - - // Try to stream the queries from stdin, without reading all of them - // into memory. The interface of the parser does not support streaming, - // in particular, it can't distinguish the end of partial input buffer - // and the final end of input file. This means we have to try to split - // the input into separate queries here. Two patterns of input are - // especially interesting: - // 1) multiline query: - // select 1 - // from system.numbers; - // - // 2) csv insert with in-place data: - // insert into t format CSV 1;2 - // - // (1) means we can't split on new line, and (2) means we can't split on - // semicolon. Solution: split on ';\n'. This sequence is frequent enough - // in the SQL tests which are our principal input for fuzzing. Now we - // have another interesting case: - // 3) escaped semicolon followed by newline, e.g. - // select '; - // ' - // - // To handle (3), parse until we can, and read more data if the parser - // complains. Hopefully this should be enough... - ReadBufferFromFileDescriptor in(STDIN_FILENO); - std::string text; - while (!in.eof()) - { - // Read until separator. - while (!in.eof()) - { - char * next_separator = find_first_symbols<';'>(in.position(), - in.buffer().end()); - - if (next_separator < in.buffer().end()) - { - next_separator++; - if (next_separator < in.buffer().end() - && *next_separator == '\n') - { - // Found ';\n', append it to the query text and try to - // parse. - next_separator++; - text.append(in.position(), next_separator - in.position()); - in.position() = next_separator; - break; - } - } - - // Didn't find the semicolon and reached the end of buffer. - text.append(in.position(), next_separator - in.position()); - in.position() = next_separator; - - if (text.size() > 1024 * 1024) - { - // We've read a lot of text and still haven't seen a separator. - // Likely some pathological input, just fall through to prevent - // too long loops. - break; - } - } - - // Parse and execute what we've read. - const auto * new_end = processWithFuzzing(text); - - if (new_end > &text[0]) - { - const auto rest_size = text.size() - (new_end - &text[0]); - - memcpy(&text[0], new_end, rest_size); - text.resize(rest_size); - } - else - { - // We didn't read enough text to parse a query. Will read more. - } - - // Ensure that we're still connected to the server. If the server died, - // the reconnect is going to fail with an exception, and the fuzzer - // will exit. The ping() would be the best match here, but it's - // private, probably for a good reason that the protocol doesn't allow - // pings at any possible moment. - // Don't forget to reset the default database which might have changed. - connection->setDefaultDatabase(""); - connection->forceConnected(connection_parameters.timeouts); - - if (text.size() > 4 * 1024) - { - // Some pathological situation where the text is larger than 4kB - // and we still cannot parse a single query in it. Abort. - std::cerr << "Read too much text and still can't parse a query." - " Aborting." << std::endl; - exit(1); - } - } + if (query_fuzzer_runs) + processWithFuzzing(text); + else + processQueryText(text); } bool processQueryText(const String & text) @@ -945,7 +849,8 @@ private: { const bool test_mode = config().has("testmode"); - { /// disable logs if expects errors + { + /// disable logs if expects errors TestHint test_hint(test_mode, all_queries_text); if (test_hint.clientError() || test_hint.serverError()) processTextAsSingleQuery("SET send_logs_level = 'fatal'"); @@ -1019,7 +924,7 @@ private: if (hint.clientError() != e.code()) { if (hint.clientError()) - e.addMessage("\nExpected clinet error: " + std::to_string(hint.clientError())); + e.addMessage("\nExpected client error: " + std::to_string(hint.clientError())); throw; } @@ -1078,40 +983,51 @@ private: expected_client_error = test_hint.clientError(); expected_server_error = test_hint.serverError(); - try + if (query_fuzzer_runs) { - processParsedSingleQuery(); - - if (insert_ast && insert_ast->data) - { - // For VALUES format: use the end of inline data as reported - // by the format parser (it is saved in sendData()). This - // allows us to handle queries like: - // insert into t values (1); select 1 - //, where the inline data is delimited by semicolon and not - // by a newline. - this_query_end = parsed_query->as()->end; - } - } - catch (...) - { - last_exception_received_from_server = std::make_unique(getCurrentExceptionMessage(true), getCurrentExceptionCode()); - actual_client_error = last_exception_received_from_server->code(); - if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error)) - std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message(); - received_exception_from_server = true; - } - - if (!test_hint.checkActual(actual_server_error, actual_client_error, received_exception_from_server, last_exception_received_from_server)) - connection->forceConnected(connection_parameters.timeouts); - - if (received_exception_from_server && !ignore_error) - { - if (is_interactive) - break; - else + if (!processWithFuzzing(full_query)) return false; } + else + { + try + { + processParsedSingleQuery(); + + if (insert_ast && insert_ast->data) + { + // For VALUES format: use the end of inline data as reported + // by the format parser (it is saved in sendData()). This + // allows us to handle queries like: + // insert into t values (1); select 1 + //, where the inline data is delimited by semicolon and not + // by a newline. + this_query_end = parsed_query->as()->end; + } + } + catch (...) + { + last_exception_received_from_server = std::make_unique(getCurrentExceptionMessage(true), getCurrentExceptionCode()); + actual_client_error = last_exception_received_from_server->code(); + if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error)) + std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message(); + received_exception_from_server = true; + } + + if (!test_hint.checkActual( + actual_server_error, actual_client_error, received_exception_from_server, last_exception_received_from_server)) + { + connection->forceConnected(connection_parameters.timeouts); + } + + if (received_exception_from_server && !ignore_error) + { + if (is_interactive) + break; + else + return false; + } + } this_query_begin = this_query_end; } @@ -1120,165 +1036,148 @@ private: } - // Returns the last position we could parse. - const char * processWithFuzzing(const String & text) + /// Returns false when server is not available. + bool processWithFuzzing(const String & text) { - /// Several queries separated by ';'. - /// INSERT data is ended by the end of line, not ';'. + ASTPtr orig_ast; - const char * begin = text.data(); - const char * end = begin + text.size(); - - while (begin < end) + try { - // Skip whitespace before the query - while (isWhitespaceASCII(*begin) || *begin == ';') + const char * begin = text.data(); + orig_ast = parseQuery(begin, begin + text.size(), true); + } + catch (const Exception & e) + { + if (e.code() != ErrorCodes::SYNTAX_ERROR) + throw; + } + + if (!orig_ast) + { + // Can't continue after a parsing error + return true; + } + + // Don't repeat inserts, the tables grow too big. Also don't repeat + // creates because first we run the unmodified query, it will succeed, + // and the subsequent queries will fail. When we run out of fuzzer + // errors, it may be interesting to add fuzzing of create queries that + // wraps columns into LowCardinality or Nullable. Also there are other + // kinds of create queries such as CREATE DICTIONARY, we could fuzz + // them as well. Also there is no point fuzzing DROP queries. + size_t this_query_runs = query_fuzzer_runs; + if (orig_ast->as() || orig_ast->as() || orig_ast->as()) + { + this_query_runs = 1; + } + + ASTPtr fuzz_base = orig_ast; + for (size_t fuzz_step = 0; fuzz_step < this_query_runs; ++fuzz_step) + { + fmt::print(stderr, "Fuzzing step {} out of {}\n", + fuzz_step, this_query_runs); + + ASTPtr ast_to_process; + try { - ++begin; + WriteBufferFromOwnString dump_before_fuzz; + fuzz_base->dumpTree(dump_before_fuzz); + auto base_before_fuzz = fuzz_base->formatForErrorMessage(); + + ast_to_process = fuzz_base->clone(); + + WriteBufferFromOwnString dump_of_cloned_ast; + ast_to_process->dumpTree(dump_of_cloned_ast); + + // Run the original query as well. + if (fuzz_step > 0) + { + fuzzer.fuzzMain(ast_to_process); + } + + auto base_after_fuzz = fuzz_base->formatForErrorMessage(); + + // Debug AST cloning errors. + if (base_before_fuzz != base_after_fuzz) + { + fmt::print(stderr, + "Base before fuzz: {}\n" + "Base after fuzz: {}\n", + base_before_fuzz, base_after_fuzz); + fmt::print(stderr, "Dump before fuzz:\n{}\n", dump_before_fuzz.str()); + fmt::print(stderr, "Dump of cloned AST:\n{}\n", dump_of_cloned_ast.str()); + fmt::print(stderr, "Dump after fuzz:\n"); + + WriteBufferFromOStream cerr_buf(std::cerr, 4096); + fuzz_base->dumpTree(cerr_buf); + cerr_buf.next(); + + fmt::print(stderr, "IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly."); + + assert(false); + } + + auto fuzzed_text = ast_to_process->formatForErrorMessage(); + if (fuzz_step > 0 && fuzzed_text == base_before_fuzz) + { + fmt::print(stderr, "Got boring AST\n"); + continue; + } + + parsed_query = ast_to_process; + query_to_send = parsed_query->formatForErrorMessage(); + + processParsedSingleQuery(); + } + catch (...) + { + // Some functions (e.g. protocol parsers) don't throw, but + // set last_exception instead, so we'll also do it here for + // uniformity. + last_exception_received_from_server = std::make_unique(getCurrentExceptionMessage(true), getCurrentExceptionCode()); + received_exception_from_server = true; } - const auto * this_query_begin = begin; - ASTPtr orig_ast = parseQuery(begin, end, true); - - if (!orig_ast) + if (received_exception_from_server) { - // Can't continue after a parsing error - return begin; + fmt::print(stderr, "Error on processing query '{}': {}\n", + ast_to_process->formatForErrorMessage(), + last_exception_received_from_server->message()); } - auto * as_insert = orig_ast->as(); - if (as_insert && as_insert->data) + if (!connection->isConnected()) { - // INSERT data is ended by newline - as_insert->end = find_first_symbols<'\n'>(as_insert->data, end); - begin = as_insert->end; + // Probably the server is dead because we found an assertion + // failure. Fail fast. + fmt::print(stderr, "Lost connection to the server\n"); + return false; } - full_query = text.substr(this_query_begin - text.data(), - begin - text.data()); - - // Don't repeat inserts, the tables grow too big. Also don't repeat - // creates because first we run the unmodified query, it will succeed, - // and the subsequent queries will fail. When we run out of fuzzer - // errors, it may be interesting to add fuzzing of create queries that - // wraps columns into LowCardinality or Nullable. Also there are other - // kinds of create queries such as CREATE DICTIONARY, we could fuzz - // them as well. - int this_query_runs = query_fuzzer_runs; - if (as_insert - || orig_ast->as()) + // The server is still alive so we're going to continue fuzzing. + // Determine what we're going to use as the starting AST. + if (received_exception_from_server) { - this_query_runs = 1; + // Query completed with error, keep the previous starting AST. + // Also discard the exception that we now know to be non-fatal, + // so that it doesn't influence the exit code. + last_exception_received_from_server.reset(nullptr); + received_exception_from_server = false; } - - ASTPtr fuzz_base = orig_ast; - for (int fuzz_step = 0; fuzz_step < this_query_runs; fuzz_step++) + else if (ast_to_process->formatForErrorMessage().size() > 500) { - fprintf(stderr, "fuzzing step %d out of %d for query at pos %zd\n", - fuzz_step, this_query_runs, this_query_begin - text.data()); - - ASTPtr ast_to_process; - try - { - WriteBufferFromOwnString dump_before_fuzz; - fuzz_base->dumpTree(dump_before_fuzz); - auto base_before_fuzz = fuzz_base->formatForErrorMessage(); - - ast_to_process = fuzz_base->clone(); - - WriteBufferFromOwnString dump_of_cloned_ast; - ast_to_process->dumpTree(dump_of_cloned_ast); - - // Run the original query as well. - if (fuzz_step > 0) - { - fuzzer.fuzzMain(ast_to_process); - } - - auto base_after_fuzz = fuzz_base->formatForErrorMessage(); - - // Debug AST cloning errors. - if (base_before_fuzz != base_after_fuzz) - { - fprintf(stderr, "base before fuzz: %s\n" - "base after fuzz: %s\n", base_before_fuzz.c_str(), - base_after_fuzz.c_str()); - fprintf(stderr, "dump before fuzz:\n%s\n", - dump_before_fuzz.str().c_str()); - fprintf(stderr, "dump of cloned ast:\n%s\n", - dump_of_cloned_ast.str().c_str()); - fprintf(stderr, "dump after fuzz:\n"); - WriteBufferFromOStream cerr_buf(std::cerr, 4096); - fuzz_base->dumpTree(cerr_buf); - cerr_buf.next(); - - fmt::print(stderr, "IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly."); - - assert(false); - } - - auto fuzzed_text = ast_to_process->formatForErrorMessage(); - if (fuzz_step > 0 && fuzzed_text == base_before_fuzz) - { - fprintf(stderr, "got boring ast\n"); - continue; - } - - parsed_query = ast_to_process; - query_to_send = parsed_query->formatForErrorMessage(); - - processParsedSingleQuery(); - } - catch (...) - { - // Some functions (e.g. protocol parsers) don't throw, but - // set last_exception instead, so we'll also do it here for - // uniformity. - last_exception_received_from_server = std::make_unique(getCurrentExceptionMessage(true), getCurrentExceptionCode()); - received_exception_from_server = true; - } - - if (received_exception_from_server) - { - fmt::print(stderr, "Error on processing query '{}': {}\n", - ast_to_process->formatForErrorMessage(), - last_exception_received_from_server->message()); - } - - if (!connection->isConnected()) - { - // Probably the server is dead because we found an assertion - // failure. Fail fast. - fmt::print(stderr, "Lost connection to the server\n"); - return begin; - } - - // The server is still alive so we're going to continue fuzzing. - // Determine what we're going to use as the starting AST. - if (received_exception_from_server) - { - // Query completed with error, keep the previous starting AST. - // Also discard the exception that we now know to be non-fatal, - // so that it doesn't influence the exit code. - last_exception_received_from_server.reset(nullptr); - received_exception_from_server = false; - } - else if (ast_to_process->formatForErrorMessage().size() > 500) - { - // ast too long, start from original ast - fprintf(stderr, "Current AST is too long, discarding it and using the original AST as a start\n"); - fuzz_base = orig_ast; - } - else - { - // fuzz starting from this successful query - fprintf(stderr, "Query succeeded, using this AST as a start\n"); - fuzz_base = ast_to_process; - } + // ast too long, start from original ast + fmt::print(stderr, "Current AST is too long, discarding it and using the original AST as a start\n"); + fuzz_base = orig_ast; + } + else + { + // fuzz starting from this successful query + fmt::print(stderr, "Query succeeded, using this AST as a start\n"); + fuzz_base = ast_to_process; } } - return begin; + return true; } void processTextAsSingleQuery(const String & text_) @@ -1891,6 +1790,13 @@ private: { if (!block_out_stream) { + /// Ignore all results when fuzzing as they can be huge. + if (query_fuzzer_runs) + { + block_out_stream = std::make_shared(block); + return; + } + WriteBuffer * out_buf = nullptr; String pager = config().getString("pager", ""); if (!pager.empty()) @@ -2348,7 +2254,8 @@ public: "Suggestion limit for how many databases, tables and columns to fetch.") ("multiline,m", "multiline") ("multiquery,n", "multiquery") - ("queries-file", po::value(), "file path with queries to execute") + ("queries-file", po::value>()->multitoken(), + "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)") ("format,f", po::value(), "default output format") ("testmode,T", "enable test hints in comments") ("ignore-error", "do not stop processing in multiquery mode") @@ -2478,12 +2385,11 @@ public: if (options.count("query")) config().setString("query", options["query"].as()); if (options.count("queries-file")) - config().setString("queries-file", options["queries-file"].as()); + queries_files = options["queries-file"].as>(); if (options.count("database")) config().setString("database", options["database"].as()); if (options.count("pager")) config().setString("pager", options["pager"].as()); - if (options.count("port") && !options["port"].defaulted()) config().setInt("port", options["port"].as()); if (options.count("secure")) @@ -2537,7 +2443,6 @@ public: config().setBool("multiquery", true); // Ignore errors in parsing queries. - // TODO stop using parseQuery. config().setBool("ignore-error", true); ignore_error = true; } diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 2fba335bc66..8404586d394 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -763,7 +764,7 @@ namespace fmt::print("Server started\n"); break; } - ::sleep(1); + sleepForSeconds(1); } if (try_num == num_tries) @@ -875,7 +876,7 @@ namespace fmt::print("Server stopped\n"); break; } - ::sleep(1); + sleepForSeconds(1); } if (try_num == num_tries) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index dbf153eeb81..5a8d35e204d 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -273,11 +273,12 @@ try global_context->setCurrentDatabase(default_database); applyCmdOptions(*global_context); - String path = global_context->getPath(); - if (!path.empty()) + if (config().has("path")) { + String path = global_context->getPath(); + /// Lock path directory before read - status.emplace(global_context->getPath() + "status", StatusFile::write_full_info); + status.emplace(path + "status", StatusFile::write_full_info); LOG_DEBUG(log, "Loading metadata from {}", path); Poco::File(path + "data/").createDirectories(); @@ -288,7 +289,7 @@ try DatabaseCatalog::instance().loadDatabases(); LOG_DEBUG(log, "Loaded metadata."); } - else + else if (!config().has("no-system-tables")) { attachSystemTables(*global_context); } @@ -540,6 +541,7 @@ void LocalServer::init(int argc, char ** argv) ("logger.log", po::value(), "Log file name") ("logger.level", po::value(), "Log level") ("ignore-error", "do not stop processing if a query failed") + ("no-system-tables", "do not attach system tables (better startup time)") ("version,V", "print version information and exit") ; @@ -602,6 +604,8 @@ void LocalServer::init(int argc, char ** argv) config().setString("logger.level", options["logger.level"].as()); if (options.count("ignore-error")) config().setBool("ignore-error", true); + if (options.count("no-system-tables")) + config().setBool("no-system-tables", true); std::vector arguments; for (int arg_num = 1; arg_num < argc; ++arg_num) diff --git a/programs/main.cpp b/programs/main.cpp index dee02c55832..cbb22b7a87b 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -18,6 +18,7 @@ #endif #include +#include #include #include @@ -62,6 +63,14 @@ int mainEntryClickHouseStatus(int argc, char ** argv); int mainEntryClickHouseRestart(int argc, char ** argv); #endif +int mainEntryClickHouseHashBinary(int, char **) +{ + /// Intentionally without newline. So you can run: + /// objcopy --add-section .note.ClickHouse.hash=<(./clickhouse hash-binary) clickhouse + std::cout << getHashOfLoadedBinaryHex(); + return 0; +} + #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) namespace @@ -110,6 +119,7 @@ std::pair clickhouse_applications[] = {"status", mainEntryClickHouseStatus}, {"restart", mainEntryClickHouseRestart}, #endif + {"hash-binary", mainEntryClickHouseHashBinary}, }; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 76765c0374c..2f8029fc39c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -65,6 +65,8 @@ #include #include #include +#include +#include #include #include #include @@ -184,6 +186,7 @@ namespace ErrorCodes extern const int FAILED_TO_GETPWUID; extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int NETWORK_ERROR; + extern const int CORRUPTED_DATA; } @@ -436,7 +439,44 @@ int Server::main(const std::vector & /*args*/) #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); - if (executable_path.empty()) + + if (!executable_path.empty()) + { + /// Integrity check based on checksum of the executable code. + /// Note: it is not intended to protect from malicious party, + /// because the reference checksum can be easily modified as well. + /// And we don't involve asymmetric encryption with PKI yet. + /// It's only intended to protect from faulty hardware. + /// Note: it is only based on machine code. + /// But there are other sections of the binary (e.g. exception handling tables) + /// that are interpreted (not executed) but can alter the behaviour of the program as well. + + String calculated_binary_hash = getHashOfLoadedBinaryHex(); + + if (stored_binary_hash.empty()) + { + LOG_WARNING(log, "Calculated checksum of the binary: {}." + " There is no information about the reference checksum.", calculated_binary_hash); + } + else if (calculated_binary_hash == stored_binary_hash) + { + LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash); + } + else + { + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Calculated checksum of the ClickHouse binary ({0}) does not correspond" + " to the reference checksum stored in the binary ({1})." + " It may indicate one of the following:" + " - the file {2} was changed just after startup;" + " - the file {2} is damaged on disk due to faulty hardware;" + " - the loaded executable is damaged in memory due to faulty hardware;" + " - the file {2} was intentionally modified;" + " - logical error in code." + , calculated_binary_hash, stored_binary_hash, executable_path); + } + } + else executable_path = "/usr/bin/clickhouse"; /// It is used for information messages. /// After full config loaded diff --git a/programs/server/config.xml b/programs/server/config.xml index 2bdcbd47ccf..372315c7922 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -204,7 +204,7 @@ /etc/clickhouse-server/server.key /etc/clickhouse-server/dhparam.pem @@ -432,7 +432,7 @@ - + - + - + @@ -656,7 +676,7 @@ system query_log
- + --> diff --git a/programs/server/embedded.xml b/programs/server/embedded.xml index 1ac568682f9..a66f57d1eb7 100644 --- a/programs/server/embedded.xml +++ b/programs/server/embedded.xml @@ -20,7 +20,7 @@ - + ::/0 diff --git a/programs/server/users.xml b/programs/server/users.xml index 838b46e6938..3223d855651 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -77,7 +77,7 @@ Strongly recommended that regexp is ends with $ All results of DNS requests are cached till server restart. --> - + ::/0 diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 3255ea42edb..e8c4d70ea26 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -188,13 +188,13 @@ public: if (!limit_num_elems) { if (rhs_elems.value.size()) - cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena); + cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena); } else { UInt64 elems_to_insert = std::min(static_cast(max_elems) - cur_elems.value.size(), rhs_elems.value.size()); if (elems_to_insert) - cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.begin() + elems_to_insert, arena); + cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena); } } diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index 276abb90920..5e7e3844956 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -53,17 +53,35 @@ class AggregateFunctionIfNullUnary final private: size_t num_arguments; + /// The name of the nested function, including combinators (i.e. *If) + /// + /// getName() from the nested_function cannot be used because in case of *If combinator + /// with Nullable argument nested_function will point to the function w/o combinator. + /// (I.e. sumIf(Nullable, 1) -> sum()), and distributed query processing will fail. + /// + /// And nested_function cannot point to the function with *If since + /// due to optimization in the add() which pass only one column with the result, + /// and so AggregateFunctionIf::add() cannot be called this way + /// (it write to the last argument -- num_arguments-1). + /// + /// And to avoid extra level of indirection, the name of function is cached: + /// + /// AggregateFunctionIfNullUnary::add -> [ AggregateFunctionIf::add -> ] AggregateFunctionSum::add + String name; + using Base = AggregateFunctionNullBase>; public: String getName() const override { - return Base::getName(); + return name; } - AggregateFunctionIfNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : Base(std::move(nested_function_), arguments, params), num_arguments(arguments.size()) + AggregateFunctionIfNullUnary(const String & name_, AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) + : Base(std::move(nested_function_), arguments, params) + , num_arguments(arguments.size()) + , name(name_) { if (num_arguments == 0) throw Exception("Aggregate function " + getName() + " require at least one argument", @@ -174,14 +192,14 @@ AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter( { if (return_type_is_nullable) { - return std::make_shared>(nested_func, arguments, params); + return std::make_shared>(nested_function->getName(), nested_func, arguments, params); } else { if (serialize_flag) - return std::make_shared>(nested_func, arguments, params); + return std::make_shared>(nested_function->getName(), nested_func, arguments, params); else - return std::make_shared>(nested_func, arguments, params); + return std::make_shared>(nested_function->getName(), nested_func, arguments, params); } } else diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp index ebae17a8a07..2d4e6dfaf32 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -66,8 +66,7 @@ auto parseArguments(const std::string & name, const DataTypes & arguments) values_types.push_back(array_type->getNestedType()); } - return std::tuple{std::move(keys_type), std::move(values_types), - tuple_argument}; + return std::tuple{std::move(keys_type), std::move(values_types), tuple_argument}; } // This function instantiates a particular overload of the sumMap family of diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index c991474d0e6..6c8155f967c 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -54,6 +54,8 @@ struct AggregateFunctionMapData * ([1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20]) * * minMap and maxMap share the same idea, but calculate min and max correspondingly. + * + * NOTE: The implementation of these functions are "amateur grade" - not efficient and low quality. */ template @@ -72,7 +74,8 @@ public: const DataTypes & values_types_, const DataTypes & argument_types_) : Base(argument_types_, {} /* parameters */), keys_type(keys_type_), values_types(values_types_) - {} + { + } DataTypePtr getReturnType() const override { @@ -81,13 +84,26 @@ public: for (const auto & value_type : values_types) { + if constexpr (std::is_same_v) + { + if (!value_type->isSummable()) + throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Values for {} cannot be summed, passed type {}", + getName(), value_type->getName()}; + } + DataTypePtr result_type; if constexpr (overflow) { + if (value_type->onlyNull()) + throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Cannot calculate {} of type {}", + getName(), value_type->getName()}; + // Overflow, meaning that the returned type is the same as - // the input type. - result_type = value_type; + // the input type. Nulls are skipped. + result_type = removeNullable(value_type); } else { @@ -120,9 +136,9 @@ public: } } - void add(AggregateDataPtr place, const IColumn** _columns, const size_t row_num, Arena *) const override + void add(AggregateDataPtr place, const IColumn ** columns_, const size_t row_num, Arena *) const override { - const auto & columns = getArgumentColumns(_columns); + const auto & columns = getArgumentColumns(columns_); // Column 0 contains array of keys of known type const ColumnArray & array_column0 = assert_cast(*columns[0]); @@ -148,16 +164,13 @@ public: // Insert column values for all keys for (size_t i = 0; i < keys_vec_size; ++i) { - auto value = value_column.operator[](values_vec_offset + i); - auto key = key_column.operator[](keys_vec_offset + i).get(); + auto value = value_column[values_vec_offset + i]; + auto key = key_column[keys_vec_offset + i].get(); if (!keepKey(key)) continue; - if (value.isNull()) - continue; - - typename std::decay_t::iterator it; + decltype(merged_maps.begin()) it; if constexpr (IsDecimalNumber) { // FIXME why is storing NearestFieldType not enough, and we @@ -170,17 +183,20 @@ public: if (it != merged_maps.end()) { - applyVisitor(Visitor(value), it->second[col]); + if (!value.isNull()) + { + if (it->second[col].isNull()) + it->second[col] = value; + else + applyVisitor(Visitor(value), it->second[col]); + } } else { // Create a value array for this key Array new_values; - new_values.resize(values_types.size()); - for (size_t k = 0; k < new_values.size(); ++k) - { - new_values[k] = (k == col) ? value : values_types[k]->getDefault(); - } + new_values.resize(size); + new_values[col] = value; if constexpr (IsDecimalNumber) { @@ -207,7 +223,8 @@ public: if (it != merged_maps.end()) { for (size_t col = 0; col < values_types.size(); ++col) - applyVisitor(Visitor(elem.second[col]), it->second[col]); + if (!elem.second[col].isNull()) + applyVisitor(Visitor(elem.second[col]), it->second[col]); } else merged_maps[elem.first] = elem.second; @@ -253,6 +270,8 @@ public: void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { + size_t num_columns = values_types.size(); + // Final step does compaction of keys that have zero values, this mutates the state auto & merged_maps = this->data(place).merged_maps; @@ -263,9 +282,9 @@ public: { // Key is not compacted if it has at least one non-zero value bool erase = true; - for (size_t col = 0; col < values_types.size(); ++col) + for (size_t col = 0; col < num_columns; ++col) { - if (it->second[col] != values_types[col]->getDefault()) + if (!it->second[col].isNull() && it->second[col] != values_types[col]->getDefault()) { erase = false; break; @@ -290,7 +309,7 @@ public: to_keys_offsets.push_back(to_keys_offsets.back() + size); to_keys_col.reserve(size); - for (size_t col = 0; col < values_types.size(); ++col) + for (size_t col = 0; col < num_columns; ++col) { auto & to_values_arr = assert_cast(to_tuple.getColumn(col + 1)); auto & to_values_offsets = to_values_arr.getOffsets(); @@ -305,10 +324,13 @@ public: to_keys_col.insert(elem.first); // Write 0..n arrays of values - for (size_t col = 0; col < values_types.size(); ++col) + for (size_t col = 0; col < num_columns; ++col) { auto & to_values_col = assert_cast(to_tuple.getColumn(col + 1)).getData(); - to_values_col.insert(elem.second[col]); + if (elem.second[col].isNull()) + to_values_col.insertDefault(); + else + to_values_col.insert(elem.second[col]); } } } diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 96f0eb183be..2be9d874a05 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -19,12 +19,12 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -struct ComparePairFirst final +struct ComparePair final { template bool operator()(const std::pair & lhs, const std::pair & rhs) const { - return lhs.first < rhs.first; + return lhs.first == rhs.first ? lhs.second < rhs.second : lhs.first < rhs.first; } }; @@ -33,8 +33,8 @@ template struct AggregateFunctionWindowFunnelData { using TimestampEvent = std::pair; - using TimestampEvents = PODArray; - using Comparator = ComparePairFirst; + using TimestampEvents = PODArrayWithStackMemory; + using Comparator = ComparePair; bool sorted = true; TimestampEvents events_list; @@ -47,8 +47,13 @@ struct AggregateFunctionWindowFunnelData void add(T timestamp, UInt8 event) { // Since most events should have already been sorted by timestamp. - if (sorted && events_list.size() > 0 && events_list.back().first > timestamp) - sorted = false; + if (sorted && events_list.size() > 0) + { + if (events_list.back().first == timestamp) + sorted = events_list.back().second <= event; + else + sorted = events_list.back().first <= timestamp; + } events_list.emplace_back(timestamp, event); } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 99b3342f314..51dada0780b 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -393,6 +393,12 @@ size_t ColumnAggregateFunction::byteSize() const + (my_arena ? my_arena->size() : 0); } +size_t ColumnAggregateFunction::byteSizeAt(size_t) const +{ + /// Lower estimate as aggregate function can allocate more data in Arena. + return sizeof(data[0]) + func->sizeOfData(); +} + /// Like in byteSize(), the size is underestimated. size_t ColumnAggregateFunction::allocatedBytes() const { diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 4e5e66542e9..13bc40c56b1 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -163,6 +163,8 @@ public: size_t byteSize() const override; + size_t byteSizeAt(size_t n) const override; + size_t allocatedBytes() const override; void protect() override; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 251c0bd7921..8c0e06424e7 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -403,6 +403,21 @@ size_t ColumnArray::byteSize() const } +size_t ColumnArray::byteSizeAt(size_t n) const +{ + const auto & offsets_data = getOffsets(); + + size_t pos = offsets_data[n - 1]; + size_t end = offsets_data[n]; + + size_t res = sizeof(offsets_data[0]); + for (; pos < end; ++pos) + res += getData().byteSizeAt(pos); + + return res; +} + + size_t ColumnArray::allocatedBytes() const { return getData().allocatedBytes() + getOffsets().allocated_bytes(); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 8a02af92dce..e81ecbc1ca0 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,6 +84,7 @@ public: void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; + size_t byteSizeAt(size_t n) const override; size_t allocatedBytes() const override; void protect() override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 9392a1cfaff..99c997ab269 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -187,6 +187,11 @@ public: return data->byteSize() + sizeof(s); } + size_t byteSizeAt(size_t) const override + { + return data->byteSizeAt(0); + } + size_t allocatedBytes() const override { return data->allocatedBytes() + sizeof(s); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 6610662c88f..51ad1486520 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -87,6 +87,7 @@ public: size_t size() const override { return data.size(); } size_t byteSize() const override { return data.size() * sizeof(data[0]); } + size_t byteSizeAt(size_t) const override { return sizeof(data[0]); } size_t allocatedBytes() const override { return data.allocated_bytes(); } void protect() override { data.protect(); } void reserve(size_t n) override { data.reserve(n); } diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 3bc74d4c2ff..286b3a752dc 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -57,6 +57,11 @@ public: return chars.size() + sizeof(n); } + size_t byteSizeAt(size_t) const override + { + return n; + } + size_t allocatedBytes() const override { return chars.allocated_bytes() + sizeof(n); diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index 292e10e8e14..83f668ded75 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -140,6 +140,15 @@ size_t ColumnFunction::byteSize() const return total_size; } +size_t ColumnFunction::byteSizeAt(size_t n) const +{ + size_t total_size = 0; + for (const auto & column : captured_columns) + total_size += column.column->byteSizeAt(n); + + return total_size; +} + size_t ColumnFunction::allocatedBytes() const { size_t total_size = 0; diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 45daacf295e..f97f41a8627 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -47,6 +47,7 @@ public: void getExtremes(Field &, Field &) const override {} size_t byteSize() const override; + size_t byteSizeAt(size_t n) const override; size_t allocatedBytes() const override; void appendArguments(const ColumnsWithTypeAndName & columns); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 5ace0a5d64f..a497be8847d 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -151,6 +151,7 @@ public: void reserve(size_t n) override { idx.reserve(n); } size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); } + size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); } size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); } void forEachSubcolumn(ColumnCallback callback) override diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index c210da2aa11..1cfd7e6c4ef 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -211,6 +211,11 @@ size_t ColumnMap::byteSize() const return nested->byteSize(); } +size_t ColumnMap::byteSizeAt(size_t n) const +{ + return nested->byteSizeAt(n); +} + size_t ColumnMap::allocatedBytes() const { return nested->allocatedBytes(); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 08197c3e1e9..9f1410eefe4 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -77,6 +77,7 @@ public: void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; + size_t byteSizeAt(size_t n) const override; size_t allocatedBytes() const override; void protect() override; void forEachSubcolumn(ColumnCallback callback) override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 4f2117b1405..35ce005073a 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -495,6 +495,11 @@ size_t ColumnNullable::byteSize() const return getNestedColumn().byteSize() + getNullMapColumn().byteSize(); } +size_t ColumnNullable::byteSizeAt(size_t n) const +{ + return sizeof(getNullMapData()[0]) + getNestedColumn().byteSizeAt(n); +} + size_t ColumnNullable::allocatedBytes() const { return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes(); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 47b0103eab4..8a17f657340 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -101,6 +101,7 @@ public: const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; + size_t byteSizeAt(size_t n) const override; size_t allocatedBytes() const override; void protect() override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index b71751dbc4e..c1e76c5e28e 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -71,6 +71,12 @@ public: return chars.size() + offsets.size() * sizeof(offsets[0]); } + size_t byteSizeAt(size_t n) const override + { + assert(n < size()); + return sizeAt(n) + sizeof(offsets[0]); + } + size_t allocatedBytes() const override { return chars.allocated_bytes() + offsets.allocated_bytes(); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index e3b45ee3d5c..fa5a15d0351 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -424,6 +424,14 @@ size_t ColumnTuple::byteSize() const return res; } +size_t ColumnTuple::byteSizeAt(size_t n) const +{ + size_t res = 0; + for (const auto & column : columns) + res += column->byteSizeAt(n); + return res; +} + size_t ColumnTuple::allocatedBytes() const { size_t res = 0; diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 0bee3463f2f..68b502f9705 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -83,6 +83,7 @@ public: void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; size_t byteSize() const override; + size_t byteSizeAt(size_t n) const override; size_t allocatedBytes() const override; void protect() override; void forEachSubcolumn(ColumnCallback callback) override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index d87fdd65d15..5d58b2484e0 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -88,6 +88,10 @@ public: bool isNumeric() const override { return column_holder->isNumeric(); } size_t byteSize() const override { return column_holder->byteSize(); } + size_t byteSizeAt(size_t n) const override + { + return getNestedColumn()->byteSizeAt(n); + } void protect() override { column_holder->protect(); } size_t allocatedBytes() const override { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index c6600ca7e31..59fe567fb9c 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -178,6 +178,11 @@ public: return data.size() * sizeof(data[0]); } + size_t byteSizeAt(size_t) const override + { + return sizeof(data[0]); + } + size_t allocatedBytes() const override { if constexpr (is_POD) diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 6dbcfacefe9..824b5411744 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -333,6 +333,9 @@ public: /// Size of column data in memory (may be approximate) - for profiling. Zero, if could not be determined. virtual size_t byteSize() const = 0; + /// Size of single value in memory (for accounting purposes) + virtual size_t byteSizeAt(size_t /*n*/) const = 0; + /// Size of memory, allocated for column. /// This is greater or equals to byteSize due to memory reservation in containers. /// Zero, if could not be determined. diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index faf74fa7e9d..10ef692dc6a 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -33,6 +33,7 @@ public: void insertDefault() override { ++s; } void popBack(size_t n) override { s -= n; } size_t byteSize() const override { return 0; } + size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override diff --git a/src/Common/DirectorySyncGuard.cpp b/src/Common/DirectorySyncGuard.cpp new file mode 100644 index 00000000000..f279a0d25d5 --- /dev/null +++ b/src/Common/DirectorySyncGuard.cpp @@ -0,0 +1,41 @@ +#include +#include +#include +#include // O_RDWR + +/// OSX does not have O_DIRECTORY +#ifndef O_DIRECTORY +#define O_DIRECTORY O_RDWR +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_FSYNC; +} + +DirectorySyncGuard::DirectorySyncGuard(const DiskPtr & disk_, const String & path) + : disk(disk_) + , fd(disk_->open(path, O_DIRECTORY)) +{} + +DirectorySyncGuard::~DirectorySyncGuard() +{ + try + { +#if defined(OS_DARWIN) + if (fcntl(fd, F_FULLFSYNC, 0)) + throwFromErrno("Cannot fcntl(F_FULLFSYNC)", ErrorCodes::CANNOT_FSYNC); +#endif + disk->sync(fd); + disk->close(fd); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +} diff --git a/src/Common/FileSyncGuard.h b/src/Common/DirectorySyncGuard.h similarity index 55% rename from src/Common/FileSyncGuard.h rename to src/Common/DirectorySyncGuard.h index 486b02d0f24..062d20324ed 100644 --- a/src/Common/FileSyncGuard.h +++ b/src/Common/DirectorySyncGuard.h @@ -1,36 +1,26 @@ #pragma once -#include +#include +#include namespace DB { +class IDisk; +using DiskPtr = std::shared_ptr; + /// Helper class, that receives file descriptor and does fsync for it in destructor. /// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end. /// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version. /// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496 -class FileSyncGuard +class DirectorySyncGuard { public: /// NOTE: If you have already opened descriptor, it's preferred to use /// this constructor instead of constructor with path. - FileSyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {} - - FileSyncGuard(const DiskPtr & disk_, const String & path) - : disk(disk_), fd(disk_->open(path, O_RDWR)) {} - - ~FileSyncGuard() - { - try - { - disk->sync(fd); - disk->close(fd); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } + DirectorySyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {} + DirectorySyncGuard(const DiskPtr & disk_, const std::string & path); + ~DirectorySyncGuard(); private: DiskPtr disk; diff --git a/src/Common/Elf.cpp b/src/Common/Elf.cpp index 0c2359b3418..ee78c988f69 100644 --- a/src/Common/Elf.cpp +++ b/src/Common/Elf.cpp @@ -151,6 +151,15 @@ String Elf::getBuildID(const char * nhdr_pos, size_t size) } +String Elf::getBinaryHash() const +{ + if (auto section = findSectionByName(".note.ClickHouse.hash")) + return {section->begin(), section->end()}; + else + return {}; +} + + const char * Elf::Section::name() const { if (!elf.section_names) diff --git a/src/Common/Elf.h b/src/Common/Elf.h index 632d7e6f0b1..90783ddc18d 100644 --- a/src/Common/Elf.h +++ b/src/Common/Elf.h @@ -59,6 +59,9 @@ public: String getBuildID() const; static String getBuildID(const char * nhdr_pos, size_t size); + /// Hash of the binary for integrity checks. + String getBinaryHash() const; + private: MMapReadBufferFromFile in; size_t elf_size; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 8b14e17a138..1ceb9df011d 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -531,6 +531,7 @@ M(562, TLD_LIST_NOT_FOUND) \ M(563, CANNOT_READ_MAP_FROM_TEXT) \ M(564, INTERSERVER_SCHEME_DOESNT_MATCH) \ + M(565, TOO_MANY_PARTITIONS) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/FileChecker.cpp b/src/Common/FileChecker.cpp index 6cbec3bda77..e7fcc8cadb7 100644 --- a/src/Common/FileChecker.cpp +++ b/src/Common/FileChecker.cpp @@ -21,18 +21,16 @@ namespace ErrorCodes FileChecker::FileChecker(DiskPtr disk_, const String & file_info_path_) : disk(std::move(disk_)) { setPath(file_info_path_); + load(); } void FileChecker::setPath(const String & file_info_path_) { files_info_path = file_info_path_; - - tmp_files_info_path = parentPath(files_info_path) + "tmp_" + fileName(files_info_path); } void FileChecker::update(const String & full_file_path) { - initialize(); map[fileName(full_file_path)] = disk->getFileSize(full_file_path); } @@ -41,19 +39,19 @@ void FileChecker::setEmpty(const String & full_file_path) map[fileName(full_file_path)] = 0; } +FileChecker::Map FileChecker::getFileSizes() const +{ + return map; +} + CheckResults FileChecker::check() const { - // Read the files again every time you call `check` - so as not to violate the constancy. - // `check` method is rarely called. - - CheckResults results; - Map local_map; - load(local_map, files_info_path); - - if (local_map.empty()) + if (map.empty()) return {}; - for (const auto & name_size : local_map) + CheckResults results; + + for (const auto & name_size : map) { const String & name = name_size.first; String path = parentPath(files_info_path) + name; @@ -97,17 +95,10 @@ void FileChecker::repair() } } -void FileChecker::initialize() -{ - if (initialized) - return; - - load(map, files_info_path); - initialized = true; -} - void FileChecker::save() const { + std::string tmp_files_info_path = parentPath(files_info_path) + "tmp_" + fileName(files_info_path); + { std::unique_ptr out = disk->writeFile(tmp_files_info_path); @@ -134,14 +125,14 @@ void FileChecker::save() const disk->replaceFile(tmp_files_info_path, files_info_path); } -void FileChecker::load(Map & local_map, const String & path) const +void FileChecker::load() { - local_map.clear(); + map.clear(); - if (!disk->exists(path)) + if (!disk->exists(files_info_path)) return; - std::unique_ptr in = disk->readFile(path); + std::unique_ptr in = disk->readFile(files_info_path); WriteBufferFromOwnString out; /// The JSON library does not support whitespace. We delete them. Inefficient. @@ -156,7 +147,7 @@ void FileChecker::load(Map & local_map, const String & path) const JSON files = json["yandex"]; for (const JSON file : files) // NOLINT - local_map[unescapeForFileName(file.getName())] = file.getValue()["size"].toUInt(); + map[unescapeForFileName(file.getName())] = file.getValue()["size"].toUInt(); } } diff --git a/src/Common/FileChecker.h b/src/Common/FileChecker.h index 015d4cadb07..73e4470f231 100644 --- a/src/Common/FileChecker.h +++ b/src/Common/FileChecker.h @@ -18,6 +18,7 @@ public: void update(const String & full_file_path); void setEmpty(const String & full_file_path); void save() const; + bool empty() const { return map.empty(); } /// Check the files whose parameters are specified in sizes.json CheckResults check() const; @@ -27,21 +28,18 @@ public: /// The purpose of this function is to rollback a group of unfinished writes. void repair(); -private: /// File name -> size. using Map = std::map; - void initialize(); - void updateImpl(const String & file_path); - void load(Map & local_map, const String & path) const; + Map getFileSizes() const; + +private: + void load(); DiskPtr disk; String files_info_path; - String tmp_files_info_path; - /// The data from the file is read lazily. Map map; - bool initialized = false; Poco::Logger * log = &Poco::Logger::get("FileChecker"); }; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 3269196012f..4b0e1f9cada 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -234,7 +234,12 @@ void MemoryTracker::updatePeak(Int64 will_be) void MemoryTracker::free(Int64 size) { if (BlockerInThread::isBlocked(level)) + { + /// Since the BlockerInThread should respect the level, we should go to the next parent. + if (auto * loaded_next = parent.load(std::memory_order_relaxed)) + loaded_next->free(size); return; + } std::bernoulli_distribution sample(sample_probability); if (unlikely(sample_probability && sample(thread_local_rng))) diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 7bd9550500e..f0cc9df11cd 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -89,8 +89,8 @@ protected: static constexpr size_t pad_right = integerRoundUp(pad_right_, ELEMENT_SIZE); /// pad_left is also rounded up to 16 bytes to maintain alignment of allocated memory. static constexpr size_t pad_left = integerRoundUp(integerRoundUp(pad_left_, ELEMENT_SIZE), 16); - /// Empty array will point to this static memory as padding. - static constexpr char * null = pad_left ? const_cast(empty_pod_array) + empty_pod_array_size : nullptr; + /// Empty array will point to this static memory as padding and begin/end. + static constexpr char * null = const_cast(empty_pod_array) + pad_left; static_assert(pad_left <= empty_pod_array_size && "Left Padding exceeds empty_pod_array_size. Is the element size too large?"); @@ -268,8 +268,11 @@ public: reserve(required_capacity, std::forward(allocator_params)...); size_t items_byte_size = byte_size(number_of_items); - memcpy(c_end, ptr, items_byte_size); - c_end += items_byte_size; + if (items_byte_size) + { + memcpy(c_end, ptr, items_byte_size); + c_end += items_byte_size; + } } void protect() @@ -289,6 +292,18 @@ public: #endif } + template + inline void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]]) + { +#if !defined(NDEBUG) + const char * ptr_begin = reinterpret_cast(&*from_begin); + const char * ptr_end = reinterpret_cast(&*from_end); + + /// Also it's safe if the range is empty. + assert(!((ptr_begin >= c_start && ptr_begin < c_end) || (ptr_end > c_start && ptr_end <= c_end)) || (ptr_begin == ptr_end)); +#endif + } + ~PODArrayBase() { dealloc(); @@ -444,6 +459,7 @@ public: template void insertPrepare(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params) { + this->assertNotIntersects(from_begin, from_end); size_t required_capacity = this->size() + (from_end - from_begin); if (required_capacity > this->capacity()) this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward(allocator_params)...); @@ -457,6 +473,28 @@ public: insert_assume_reserved(from_begin, from_end); } + /// In contrast to 'insert' this method is Ok even for inserting from itself. + /// Because we obtain iterators after reserving memory. + template + void insertByOffsets(Container && rhs, size_t from_begin, size_t from_end, TAllocatorParams &&... allocator_params) + { + static_assert(memcpy_can_be_used_for_assignment, std::decay_t>); + + assert(from_end >= from_begin); + assert(from_end <= rhs.size()); + + size_t required_capacity = this->size() + (from_end - from_begin); + if (required_capacity > this->capacity()) + this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward(allocator_params)...); + + size_t bytes_to_copy = this->byte_size(from_end - from_begin); + if (bytes_to_copy) + { + memcpy(this->c_end, reinterpret_cast(rhs.begin() + from_begin), bytes_to_copy); + this->c_end += bytes_to_copy; + } + } + /// Works under assumption, that it's possible to read up to 15 excessive bytes after `from_end` and this PODArray is padded. template void insertSmallAllowReadWriteOverflow15(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params) @@ -476,6 +514,9 @@ public: static_assert(memcpy_can_be_used_for_assignment, std::decay_t>); size_t bytes_to_copy = this->byte_size(from_end - from_begin); + if (!bytes_to_copy) + return; + size_t bytes_to_move = this->byte_size(end() - it); insertPrepare(from_begin, from_end); @@ -492,10 +533,14 @@ public: void insert_assume_reserved(It1 from_begin, It2 from_end) { static_assert(memcpy_can_be_used_for_assignment, std::decay_t>); + this->assertNotIntersects(from_begin, from_end); size_t bytes_to_copy = this->byte_size(from_end - from_begin); - memcpy(this->c_end, reinterpret_cast(&*from_begin), bytes_to_copy); - this->c_end += bytes_to_copy; + if (bytes_to_copy) + { + memcpy(this->c_end, reinterpret_cast(&*from_begin), bytes_to_copy); + this->c_end += bytes_to_copy; + } } template @@ -626,15 +671,18 @@ public: void assign(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params) { static_assert(memcpy_can_be_used_for_assignment, std::decay_t>); + this->assertNotIntersects(from_begin, from_end); size_t required_capacity = from_end - from_begin; if (required_capacity > this->capacity()) this->reserve_exact(required_capacity, std::forward(allocator_params)...); size_t bytes_to_copy = this->byte_size(required_capacity); - memcpy(this->c_start, reinterpret_cast(&*from_begin), bytes_to_copy); - - this->c_end = this->c_start + bytes_to_copy; + if (bytes_to_copy) + { + memcpy(this->c_start, reinterpret_cast(&*from_begin), bytes_to_copy); + this->c_end = this->c_start + bytes_to_copy; + } } // ISO C++ has strict ambiguity rules, thus we cannot apply TAllocatorParams here. diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 0b0604cbe30..c459bf41352 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -45,9 +45,6 @@ M(CreatedReadBufferAIOFailed, "") \ M(CreatedReadBufferMMap, "") \ M(CreatedReadBufferMMapFailed, "") \ - M(CreatedWriteBufferOrdinary, "") \ - M(CreatedWriteBufferAIO, "") \ - M(CreatedWriteBufferAIOFailed, "") \ M(DiskReadElapsedMicroseconds, "Total time spent waiting for read syscall. This include reads from page cache.") \ M(DiskWriteElapsedMicroseconds, "Total time spent waiting for write syscall. This include writes to page cache.") \ M(NetworkReceiveElapsedMicroseconds, "") \ diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 3809f84711e..5105fff03b2 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -25,7 +25,7 @@ thread_local ThreadStatus * current_thread = nullptr; thread_local ThreadStatus * main_thread = nullptr; #if !defined(SANITIZER) && !defined(ARCADIA_BUILD) - alignas(4096) static thread_local char alt_stack[4096]; + alignas(4096) static thread_local char alt_stack[std::max(MINSIGSTKSZ, 4096)]; static thread_local bool has_alt_stack = false; #endif diff --git a/src/Common/UnicodeBar.cpp b/src/Common/UnicodeBar.cpp index 8ff5e2052c1..29a9838cd62 100644 --- a/src/Common/UnicodeBar.cpp +++ b/src/Common/UnicodeBar.cpp @@ -5,33 +5,25 @@ #include #include #include +#include - -namespace DB -{ - namespace ErrorCodes - { - extern const int PARAMETER_OUT_OF_BOUND; - } -} +#include namespace UnicodeBar { - double getWidth(Int64 x, Int64 min, Int64 max, double max_width) + double getWidth(double x, double min, double max, double max_width) { + if (isNaN(x)) + return 0; + if (x <= min) return 0; if (x >= max) return max_width; - /// The case when max - min overflows - Int64 max_difference; - if (common::subOverflow(max, min, max_difference)) - throw DB::Exception(DB::ErrorCodes::PARAMETER_OUT_OF_BOUND, "The arguments to render unicode bar will lead to arithmetic overflow"); - - return (x - min) * max_width / max_difference; + return (x - min) / (max - min) * max_width; } size_t getWidthInBytes(double width) diff --git a/src/Common/UnicodeBar.h b/src/Common/UnicodeBar.h index 0c62bd7e8f7..e6f49dde856 100644 --- a/src/Common/UnicodeBar.h +++ b/src/Common/UnicodeBar.h @@ -10,7 +10,7 @@ */ namespace UnicodeBar { - double getWidth(Int64 x, Int64 min, Int64 max, double max_width); + double getWidth(double x, double min, double max, double max_width); size_t getWidthInBytes(double width); /// In `dst` there must be a space for barWidthInBytes(width) characters and a trailing zero. diff --git a/src/Common/getHashOfLoadedBinary.cpp b/src/Common/getHashOfLoadedBinary.cpp new file mode 100644 index 00000000000..d401b0311be --- /dev/null +++ b/src/Common/getHashOfLoadedBinary.cpp @@ -0,0 +1,58 @@ +#include + +#if defined(__linux__) + +#include +#include +#include + + +static int callback(dl_phdr_info * info, size_t, void * data) +{ + SipHash & hash = *reinterpret_cast(data); + + for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) + { + const auto & phdr = info->dlpi_phdr[header_index]; + + if (phdr.p_type == PT_LOAD && (phdr.p_flags & PF_X)) + { + hash.update(phdr.p_filesz); + hash.update(reinterpret_cast(info->dlpi_addr + phdr.p_vaddr), phdr.p_filesz); + } + } + + return 1; /// Do not continue iterating. +} + + +SipHash getHashOfLoadedBinary() +{ + SipHash hash; + dl_iterate_phdr(callback, &hash); + return hash; +} + + +std::string getHashOfLoadedBinaryHex() +{ + SipHash hash = getHashOfLoadedBinary(); + std::array checksum; + hash.get128(checksum[0], checksum[1]); + return getHexUIntUppercase(checksum); +} + +#else + +SipHash getHashOfLoadedBinary() +{ + return {}; +} + + +std::string getHashOfLoadedBinaryHex() +{ + return {}; +} + +#endif diff --git a/src/Common/getHashOfLoadedBinary.h b/src/Common/getHashOfLoadedBinary.h new file mode 100644 index 00000000000..3a4a22b1bf1 --- /dev/null +++ b/src/Common/getHashOfLoadedBinary.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +/** Calculate hash of the executable loaded segments of the first loaded object. + * It can be used for integrity checks. + * Does not work when ClickHouse is build as multiple shared libraries. + * Note: we don't hash all loaded readonly segments, because some of them are modified by 'strip' + * and we want something that survives 'strip'. + * Note: program behaviour can be affected not only by machine code but also by the data in these segments, + * so the integrity check is going to be incomplete. + */ +SipHash getHashOfLoadedBinary(); +std::string getHashOfLoadedBinaryHex(); diff --git a/src/Common/ya.make b/src/Common/ya.make index 5d2534c3547..5b5da618bbe 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -37,6 +37,7 @@ SRCS( CurrentMetrics.cpp CurrentThread.cpp DNSResolver.cpp + DirectorySyncGuard.cpp Dwarf.cpp Elf.cpp ErrorCodes.cpp @@ -98,6 +99,7 @@ SRCS( formatIPv6.cpp formatReadable.cpp getExecutablePath.cpp + getHashOfLoadedBinary.cpp getMappedArea.cpp getMultipleKeysFromConfig.cpp getNumberOfPhysicalCPUCores.cpp diff --git a/src/Compression/CompressedReadBuffer.h b/src/Compression/CompressedReadBuffer.h index 1e8ea4784c7..3fa7347507c 100644 --- a/src/Compression/CompressedReadBuffer.h +++ b/src/Compression/CompressedReadBuffer.h @@ -16,8 +16,8 @@ private: bool nextImpl() override; public: - CompressedReadBuffer(ReadBuffer & in_) - : CompressedReadBufferBase(&in_), BufferWithOwnMemory(0) + CompressedReadBuffer(ReadBuffer & in_, bool allow_different_codecs_ = false) + : CompressedReadBufferBase(&in_, allow_different_codecs_), BufferWithOwnMemory(0) { } diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 8b16b68a999..5be31913140 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -67,20 +67,25 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c buf[pos / 8] ^= 1 << pos % 8; }; - /// Check if the difference caused by single bit flip in data. - for (size_t bit_pos = 0; bit_pos < size * 8; ++bit_pos) + /// If size is too huge, then this may be caused by corruption. + /// And anyway this is pretty heavy, so avoid burning too much CPU here. + if (size < (1ULL << 20)) { - flip_bit(data, bit_pos); - - auto checksum_of_data_with_flipped_bit = CityHash_v1_0_2::CityHash128(data, size); - if (expected_checksum == checksum_of_data_with_flipped_bit) + /// Check if the difference caused by single bit flip in data. + for (size_t bit_pos = 0; bit_pos < size * 8; ++bit_pos) { - message << ". The mismatch is caused by single bit flip in data block at byte " << (bit_pos / 8) << ", bit " << (bit_pos % 8) << ". " - << message_hardware_failure; - throw Exception(message.str(), ErrorCodes::CHECKSUM_DOESNT_MATCH); - } + flip_bit(data, bit_pos); - flip_bit(data, bit_pos); /// Restore + auto checksum_of_data_with_flipped_bit = CityHash_v1_0_2::CityHash128(data, size); + if (expected_checksum == checksum_of_data_with_flipped_bit) + { + message << ". The mismatch is caused by single bit flip in data block at byte " << (bit_pos / 8) << ", bit " << (bit_pos % 8) << ". " + << message_hardware_failure; + throw Exception(message.str(), ErrorCodes::CHECKSUM_DOESNT_MATCH); + } + + flip_bit(data, bit_pos); /// Restore + } } /// Check if the difference caused by single bit flip in stored checksum. diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 46d7d7dfcc4..aacf95b1950 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -46,6 +46,9 @@ CompressionCodecPtr CompressionCodecFactory::get(const String & family_name, std void CompressionCodecFactory::validateCodec(const String & family_name, std::optional level, bool sanity_check) const { + if (family_name.empty()) + throw Exception("Compression codec name cannot be empty", ErrorCodes::BAD_ARGUMENTS); + if (level) { auto literal = std::make_shared(static_cast(*level)); diff --git a/src/Core/BackgroundSchedulePool.cpp b/src/Core/BackgroundSchedulePool.cpp index 6b568ea937f..b3aedd6c875 100644 --- a/src/Core/BackgroundSchedulePool.cpp +++ b/src/Core/BackgroundSchedulePool.cpp @@ -1,5 +1,4 @@ #include "BackgroundSchedulePool.h" -#include #include #include #include diff --git a/src/Core/MySQL/Authentication.cpp b/src/Core/MySQL/Authentication.cpp index b0f5f8ccae2..e685ad0394d 100644 --- a/src/Core/MySQL/Authentication.cpp +++ b/src/Core/MySQL/Authentication.cpp @@ -6,7 +6,6 @@ #include #include -#include #include #include diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a4e655bbbda..9107e703e9a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -353,6 +353,7 @@ class IColumn; M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \ \ M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \ + M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \ M(Bool, check_query_single_value_result, true, "Return check query result as single 1/0 value", 0) \ M(Bool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries", 0) \ \ @@ -404,13 +405,13 @@ class IColumn; M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \ M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ + M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \ + M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ M(Bool, allow_experimental_map_type, false, "Allow data type Map", 0) \ M(Bool, allow_experimental_window_functions, false, "Allow experimental window functions", 0) \ - \ - M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated parser", 0) \ - \ + M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated experimental parser", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 1252dd7f4de..35607bcb182 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -14,6 +16,7 @@ #include #include #include +#include namespace DB diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.h b/src/DataStreams/PushingToViewsBlockOutputStream.h index 18dc3ffd800..e6fc49b7b46 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -1,14 +1,15 @@ #pragma once -#include #include -#include -#include +#include +#include + namespace DB { class ReplicatedMergeTreeBlockOutputStream; +class Context; /** Writes data to the specified table and to all dependent materialized views. diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index 51d229a1126..14e51ffefdf 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -225,7 +225,7 @@ std::variant RemoteQueryExecutor::read(std::unique_ptr if (!read_context->resumeRoutine()) return Block(); - if (read_context->is_read_in_progress) + if (read_context->is_read_in_progress.load(std::memory_order_relaxed)) { read_context->setTimer(); return read_context->epoll_fd; diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.h b/src/DataStreams/RemoteQueryExecutorReadContext.h index f8c64954b83..6d7099899ae 100644 --- a/src/DataStreams/RemoteQueryExecutorReadContext.h +++ b/src/DataStreams/RemoteQueryExecutorReadContext.h @@ -22,7 +22,7 @@ class RemoteQueryExecutorReadContext public: using Self = RemoteQueryExecutorReadContext; - bool is_read_in_progress = false; + std::atomic_bool is_read_in_progress = false; Packet packet; std::exception_ptr exception; @@ -162,7 +162,7 @@ public: bool resumeRoutine() { - if (is_read_in_progress && !checkTimeout()) + if (is_read_in_progress.load(std::memory_order_relaxed) && !checkTimeout()) return false; { @@ -226,9 +226,9 @@ public: throw; } - read_context.is_read_in_progress = true; + read_context.is_read_in_progress.store(true, std::memory_order_relaxed); fiber = std::move(fiber).resume(); - read_context.is_read_in_progress = false; + read_context.is_read_in_progress.store(false, std::memory_order_relaxed); } }; diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 1b1c0cd4353..006d0e11434 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -1,10 +1,9 @@ #pragma once #include -#include - #include + namespace DB { diff --git a/src/Databases/MySQL/MaterializeMySQLSyncThread.h b/src/Databases/MySQL/MaterializeMySQLSyncThread.h index 54f148026ad..26934b87511 100644 --- a/src/Databases/MySQL/MaterializeMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.h @@ -7,7 +7,6 @@ #if USE_MYSQL # include -# include # include # include # include diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 5547e34758f..1cf5946c95c 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -21,6 +21,8 @@ #include #include #include +#include + namespace ProfileEvents { diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index f4041e671ec..59df778e1f2 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 5ac821e5eda..58b117b3596 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -22,6 +22,8 @@ #include #include #include +#include + namespace ProfileEvents { diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index af9a0c0a7ee..4758d62f1df 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index 7ce963380d4..652ea9935ed 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -146,7 +146,7 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate auto tmp_path = path + ".tmp"; { auto src_buffer = DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold); - auto dst_buffer = cache_disk->writeFile(tmp_path, buf_size, WriteMode::Rewrite, estimated_size, aio_threshold); + auto dst_buffer = cache_disk->writeFile(tmp_path, buf_size, WriteMode::Rewrite); copyData(*src_buffer, *dst_buffer); } cache_disk->moveFile(tmp_path, path); @@ -175,10 +175,10 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate } std::unique_ptr -DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold) +DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode) { if (!cache_file_predicate(path)) - return DiskDecorator::writeFile(path, buf_size, mode, estimated_size, aio_threshold); + return DiskDecorator::writeFile(path, buf_size, mode); LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write file {} to cache", backQuote(path)); @@ -187,12 +187,12 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode cache_disk->createDirectories(dir_path); return std::make_unique( - cache_disk->writeFile(path, buf_size, mode, estimated_size, aio_threshold), - [this, path, buf_size, mode, estimated_size, aio_threshold]() + cache_disk->writeFile(path, buf_size, mode), + [this, path, buf_size, mode]() { /// Copy file from cache to actual disk when cached buffer is finalized. - auto src_buffer = cache_disk->readFile(path, buf_size, estimated_size, aio_threshold, 0); - auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode, estimated_size, aio_threshold); + auto src_buffer = cache_disk->readFile(path, buf_size, 0, 0, 0); + auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode); copyData(*src_buffer, *dst_buffer); dst_buffer->finalize(); }, diff --git a/src/Disks/DiskCacheWrapper.h b/src/Disks/DiskCacheWrapper.h index b0b373d900c..9e2b3c3b592 100644 --- a/src/Disks/DiskCacheWrapper.h +++ b/src/Disks/DiskCacheWrapper.h @@ -36,7 +36,7 @@ public: std::unique_ptr readFile(const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold) const override; std::unique_ptr - writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold) override; + writeFile(const String & path, size_t buf_size, WriteMode mode) override; void remove(const String & path) override; void removeRecursive(const String & path) override; void createHardLink(const String & src_path, const String & dst_path) override; diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index aaa54005f6f..e62f2737ec5 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -125,9 +125,9 @@ DiskDecorator::readFile(const String & path, size_t buf_size, size_t estimated_s } std::unique_ptr -DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold) +DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode) { - return delegate->writeFile(path, buf_size, mode, estimated_size, aio_threshold); + return delegate->writeFile(path, buf_size, mode); } void DiskDecorator::remove(const String & path) diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index 1ce3c3ea773..61b7ee9ecee 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -38,7 +38,7 @@ public: std::unique_ptr readFile(const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold) const override; std::unique_ptr - writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold) override; + writeFile(const String & path, size_t buf_size, WriteMode mode) override; void remove(const String & path) override; void removeRecursive(const String & path) override; void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index cde9b3c5a41..e2d6a5410a3 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -7,7 +7,6 @@ #include #include -#include #include #include @@ -232,10 +231,10 @@ DiskLocal::readFile(const String & path, size_t buf_size, size_t estimated_size, } std::unique_ptr -DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold) +DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode) { int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1; - return createWriteBufferFromFileBase(disk_path + path, estimated_size, aio_threshold, buf_size, flags); + return std::make_unique(disk_path + path, buf_size, flags); } void DiskLocal::remove(const String & path) diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 762a8502faa..d862f72d4f8 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -83,9 +83,7 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode, - size_t estimated_size, - size_t aio_threshold) override; + WriteMode mode) override; void remove(const String & path) override; diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp index d185263d48c..aecdbf02f50 100644 --- a/src/Disks/DiskMemory.cpp +++ b/src/Disks/DiskMemory.cpp @@ -330,7 +330,7 @@ std::unique_ptr DiskMemory::readFile(const String & path return std::make_unique(path, iter->second.data); } -std::unique_ptr DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t, size_t) +std::unique_ptr DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode) { std::lock_guard lock(mutex); diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index 4d4b947098b..e24f3c9c184 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -74,9 +74,7 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode, - size_t estimated_size, - size_t aio_threshold) override; + WriteMode mode) override; void remove(const String & path) override; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index ac0f5a2ae8f..437718924db 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -148,9 +148,7 @@ public: virtual std::unique_ptr writeFile( const String & path, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - WriteMode mode = WriteMode::Rewrite, - size_t estimated_size = 0, - size_t aio_threshold = 0) = 0; + WriteMode mode = WriteMode::Rewrite) = 0; /// Remove file or directory. Throws exception if file doesn't exists or if directory is not empty. virtual void remove(const String & path) = 0; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 4786c05f8b0..d9b2eaefa81 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -665,7 +665,7 @@ std::unique_ptr DiskS3::readFile(const String & path, si return std::make_unique(std::move(reader), min_bytes_for_seek); } -std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t, size_t) +std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode) { bool exist = exists(path); if (exist && readMeta(path).read_only) diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index f62c603adda..900378f46cd 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -88,9 +88,7 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode, - size_t estimated_size, - size_t aio_threshold) override; + WriteMode mode) override; void remove(const String & path) override; diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 6669a369a9d..91a67799115 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -192,6 +192,10 @@ struct ConvertImpl else throw Exception("Unexpected UInt128 to big int conversion", ErrorCodes::NOT_IMPLEMENTED); } + else if constexpr (std::is_same_v != std::is_same_v) + { + throw Exception("Conversion between numeric types and UUID is not supported", ErrorCodes::NOT_IMPLEMENTED); + } else { if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) diff --git a/src/Functions/FunctionsStringHash.h b/src/Functions/FunctionsStringHash.h index c09abc33319..72fb6e4c76d 100644 --- a/src/Functions/FunctionsStringHash.h +++ b/src/Functions/FunctionsStringHash.h @@ -18,6 +18,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int ARGUMENT_OUT_OF_BOUND; } // FunctionStringHash @@ -30,6 +31,8 @@ public: static constexpr auto name = Name::name; static constexpr size_t default_shingle_size = 3; static constexpr size_t default_num_hashes = 6; + static constexpr size_t max_shingle_size = 25; + static constexpr size_t max_num_hashes = 25; static FunctionPtr create(const Context &) { return std::make_shared(); } @@ -100,10 +103,14 @@ public: } if (shingle_size == 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument (shingle size) of function {} cannot be zero", getName()); - + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument (shingle size) of function {} cannot be zero", getName()); if (num_hashes == 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument (num hashes) of function {} cannot be zero", getName()); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Third argument (num hashes) of function {} cannot be zero", getName()); + + if (shingle_size > max_shingle_size) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument (shingle size) of function {} cannot be greater then {}", getName(), max_shingle_size); + if (num_hashes > max_num_hashes) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Third argument (num hashes) of function {} cannot be greater then {}", getName(), max_num_hashes); auto type = std::make_shared(); if constexpr (is_simhash) diff --git a/src/Functions/bar.cpp b/src/Functions/bar.cpp index 2eddf23af66..7364311a1be 100644 --- a/src/Functions/bar.cpp +++ b/src/Functions/bar.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -57,23 +56,30 @@ public: + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isNativeNumber(arguments[0]) || !isNativeNumber(arguments[1]) || !isNativeNumber(arguments[2]) - || (arguments.size() == 4 && !isNativeNumber(arguments[3]))) + if (!isNumber(arguments[0]) || !isNumber(arguments[1]) || !isNumber(arguments[2]) + || (arguments.size() == 4 && !isNumber(arguments[3]))) throw Exception("All arguments for function " + getName() + " must be numeric.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared(); } bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {3}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - Int64 min = extractConstant(arguments, 1, "Second"); /// The level at which the line has zero length. - Int64 max = extractConstant(arguments, 2, "Third"); /// The level at which the line has the maximum length. + /// The maximum width of the bar in characters. + Float64 max_width = 80; /// Motivated by old-school terminal size. - /// The maximum width of the bar in characters, by default. - Float64 max_width = arguments.size() == 4 ? extractConstant(arguments, 3, "Fourth") : 80; + if (arguments.size() == 4) + { + const auto & max_width_column = *arguments[3].column; + + if (!isColumnConst(max_width_column)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Fourth argument for function {} must be constant", getName()); + + max_width = max_width_column.getFloat64(0); + } if (isNaN(max_width)) throw Exception("Argument 'max_width' must not be NaN", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -86,83 +92,32 @@ public: const auto & src = *arguments[0].column; - auto res_column = ColumnString::create(); - - if (executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width) - || executeNumber(src, *res_column, min, max, max_width)) - { - return res_column; - } - else - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - -private: - template - T extractConstant(const ColumnsWithTypeAndName & arguments, size_t argument_pos, const char * which_argument) const - { - const auto & column = *arguments[argument_pos].column; - - if (!isColumnConst(column)) - throw Exception( - which_argument + String(" argument for function ") + getName() + " must be constant.", ErrorCodes::ILLEGAL_COLUMN); - - return applyVisitor(FieldVisitorConvertToNumber(), column[0]); - } - - template - static void fill(const PaddedPODArray & src, - ColumnString::Chars & dst_chars, - ColumnString::Offsets & dst_offsets, - Int64 min, - Int64 max, - Float64 max_width) - { - size_t size = src.size(); size_t current_offset = 0; - dst_offsets.resize(size); - dst_chars.reserve(size * (UnicodeBar::getWidthInBytes(max_width) + 1)); /// lines 0-terminated. + auto res_column = ColumnString::create(); - for (size_t i = 0; i < size; ++i) + ColumnString::Chars & dst_chars = res_column->getChars(); + ColumnString::Offsets & dst_offsets = res_column->getOffsets(); + + dst_offsets.resize(input_rows_count); + dst_chars.reserve(input_rows_count * (UnicodeBar::getWidthInBytes(max_width) + 1)); /// strings are 0-terminated. + + for (size_t i = 0; i < input_rows_count; ++i) { - Float64 width = UnicodeBar::getWidth(src[i], min, max, max_width); + Float64 width = UnicodeBar::getWidth( + src.getFloat64(i), + arguments[1].column->getFloat64(i), + arguments[2].column->getFloat64(i), + max_width); + size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1; dst_chars.resize(next_size); UnicodeBar::render(width, reinterpret_cast(&dst_chars[current_offset])); current_offset = next_size; dst_offsets[i] = current_offset; } - } - template - static void fill(T src, String & dst_chars, Int64 min, Int64 max, Float64 max_width) - { - Float64 width = UnicodeBar::getWidth(src, min, max, max_width); - dst_chars.resize(UnicodeBar::getWidthInBytes(width)); - UnicodeBar::render(width, dst_chars.data()); - } - - template - static bool executeNumber(const IColumn & src, ColumnString & dst, Int64 min, Int64 max, Float64 max_width) - { - if (const ColumnVector * col = checkAndGetColumn>(&src)) - { - fill(col->getData(), dst.getChars(), dst.getOffsets(), min, max, max_width); - return true; - } - else - return false; + return res_column; } }; diff --git a/src/Functions/byteSize.cpp b/src/Functions/byteSize.cpp new file mode 100644 index 00000000000..ea1a9c83581 --- /dev/null +++ b/src/Functions/byteSize.cpp @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +/** byteSize() - get the value size in number of bytes for accounting purposes. + */ +class FunctionByteSize : public IFunction +{ +public: + static constexpr auto name = "byteSize"; + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override { return name; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + size_t num_args = arguments.size(); + + /// If the resulting size is constant, return constant column. + + bool all_constant = true; + UInt64 constant_size = 0; + for (size_t arg_num = 0; arg_num < num_args; ++arg_num) + { + if (arguments[arg_num].type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion()) + { + constant_size += arguments[arg_num].type->getSizeOfValueInMemory(); + } + else + { + all_constant = false; + break; + } + } + + if (all_constant) + return result_type->createColumnConst(input_rows_count, constant_size); + + auto result_col = ColumnUInt64::create(input_rows_count); + auto & vec_res = result_col->getData(); + for (size_t arg_num = 0; arg_num < num_args; ++arg_num) + { + const IColumn * column = arguments[arg_num].column.get(); + + if (arg_num == 0) + for (size_t row_num = 0; row_num < input_rows_count; ++row_num) + vec_res[row_num] = column->byteSizeAt(row_num); + else + for (size_t row_num = 0; row_num < input_rows_count; ++row_num) + vec_res[row_num] += column->byteSizeAt(row_num); + } + + return result_col; + } +}; + +} + +void registerFunctionByteSize(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 2c0baa7bba4..653922bbced 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -66,6 +66,7 @@ void registerFunctionHasThreadFuzzer(FunctionFactory &); void registerFunctionInitializeAggregation(FunctionFactory &); void registerFunctionErrorCodeToName(FunctionFactory &); void registerFunctionTcpPort(FunctionFactory &); +void registerFunctionByteSize(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -132,6 +133,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionInitializeAggregation(factory); registerFunctionErrorCodeToName(factory); registerFunctionTcpPort(factory); + registerFunctionByteSize(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Functions/trap.cpp b/src/Functions/trap.cpp index 38cf53948e6..61ec52a1d07 100644 --- a/src/Functions/trap.cpp +++ b/src/Functions/trap.cpp @@ -140,6 +140,11 @@ public: { executeImpl(block, result_type, input_rows_count); } + else if (mode == "harmful function") + { + double res = drand48(); + (void)res; + } else if (mode == "mmap many") { std::vector maps; diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 7e64deef64d..b97a4a187e9 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -204,6 +204,7 @@ SRCS( blockSerializedSize.cpp blockSize.cpp buildId.cpp + byteSize.cpp caseWithExpression.cpp cbrt.cpp coalesce.cpp diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index fa6b1fc2d8a..56c795324e3 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -561,7 +561,7 @@ template inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) { /// Optimistic path, when whole value is in buffer. - if (buf.position() + 10 <= buf.buffer().end()) + if (!buf.eof() && buf.position() + 10 <= buf.buffer().end()) { UInt16 year = (buf.position()[0] - '0') * 1000 + (buf.position()[1] - '0') * 100 + (buf.position()[2] - '0') * 10 + (buf.position()[3] - '0'); buf.position() += 5; diff --git a/src/IO/WriteBufferAIO.cpp b/src/IO/WriteBufferAIO.cpp deleted file mode 100644 index 8e0224669f2..00000000000 --- a/src/IO/WriteBufferAIO.cpp +++ /dev/null @@ -1,441 +0,0 @@ -#if defined(OS_LINUX) || defined(__FreeBSD__) - -#include -#include -#include - -#include -#include -#include -#include - - -namespace ProfileEvents -{ - extern const Event FileOpen; - extern const Event WriteBufferAIOWrite; - extern const Event WriteBufferAIOWriteBytes; -} - -namespace CurrentMetrics -{ - extern const Metric Write; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int LOGICAL_ERROR; - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int AIO_READ_ERROR; - extern const int AIO_WRITE_ERROR; - extern const int CANNOT_IO_SUBMIT; - extern const int CANNOT_IO_GETEVENTS; - extern const int CANNOT_TRUNCATE_FILE; - extern const int CANNOT_FSYNC; -} - - -/// Note: an additional page is allocated that will contain data that -/// do not fit into the main buffer. -WriteBufferAIO::WriteBufferAIO(const std::string & filename_, size_t buffer_size_, int flags_, mode_t mode_, - char * existing_memory_) - : WriteBufferFromFileBase(buffer_size_ + DEFAULT_AIO_FILE_BLOCK_SIZE, existing_memory_, DEFAULT_AIO_FILE_BLOCK_SIZE), - flush_buffer(BufferWithOwnMemory(this->memory.size(), nullptr, DEFAULT_AIO_FILE_BLOCK_SIZE)), - filename(filename_) -{ - ProfileEvents::increment(ProfileEvents::FileOpen); - - /// Correct the buffer size information so that additional pages do not touch the base class `BufferBase`. - this->buffer().resize(this->buffer().size() - DEFAULT_AIO_FILE_BLOCK_SIZE); - this->internalBuffer().resize(this->internalBuffer().size() - DEFAULT_AIO_FILE_BLOCK_SIZE); - flush_buffer.buffer().resize(this->buffer().size() - DEFAULT_AIO_FILE_BLOCK_SIZE); - flush_buffer.internalBuffer().resize(this->internalBuffer().size() - DEFAULT_AIO_FILE_BLOCK_SIZE); - - int open_flags = (flags_ == -1) ? (O_RDWR | O_TRUNC | O_CREAT) : flags_; - open_flags |= O_DIRECT; - open_flags |= O_CLOEXEC; - - fd = ::open(filename.c_str(), open_flags, mode_); - if (fd == -1) - { - auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; - throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); - } -} - -WriteBufferAIO::~WriteBufferAIO() -{ - if (!aio_failed) - { - try - { - flush(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (fd != -1) - ::close(fd); -} - -off_t WriteBufferAIO::getPositionInFile() -{ - return seek(0, SEEK_CUR); -} - -void WriteBufferAIO::sync() -{ - flush(); - - /// Ask OS to flush data to disk. - int res = ::fsync(fd); - if (res == -1) - throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); -} - -void WriteBufferAIO::nextImpl() -{ - if (!offset()) - return; - - if (waitForAIOCompletion()) - finalize(); - - /// Create a request for asynchronous write. - prepare(); - -#if defined(__FreeBSD__) - request.aio.aio_lio_opcode = LIO_WRITE; - request.aio.aio_fildes = fd; - request.aio.aio_buf = reinterpret_cast(buffer_begin); - request.aio.aio_nbytes = region_aligned_size; - request.aio.aio_offset = region_aligned_begin; -#else - request.aio_lio_opcode = IOCB_CMD_PWRITE; - request.aio_fildes = fd; - request.aio_buf = reinterpret_cast(buffer_begin); - request.aio_nbytes = region_aligned_size; - request.aio_offset = region_aligned_begin; -#endif - - /// Send the request. - while (io_submit(aio_context.ctx, 1, &request_ptr) < 0) - { - if (errno != EINTR) - { - aio_failed = true; - throw Exception("Cannot submit request for asynchronous IO on file " + filename, ErrorCodes::CANNOT_IO_SUBMIT); - } - } - - is_pending_write = true; -} - -off_t WriteBufferAIO::seek(off_t off, int whence) -{ - flush(); - - if (whence == SEEK_SET) - { - if (off < 0) - throw Exception("SEEK_SET underflow", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - pos_in_file = off; - } - else if (whence == SEEK_CUR) - { - if (off >= 0) - { - if (off > (std::numeric_limits::max() - pos_in_file)) - throw Exception("SEEK_CUR overflow", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - } - else if (off < -pos_in_file) - throw Exception("SEEK_CUR underflow", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - pos_in_file += off; - } - else - throw Exception("WriteBufferAIO::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - if (pos_in_file > max_pos_in_file) - max_pos_in_file = pos_in_file; - - return pos_in_file; -} - -void WriteBufferAIO::truncate(off_t length) -{ - flush(); - - int res = ::ftruncate(fd, length); - if (res == -1) - throwFromErrnoWithPath("Cannot truncate file " + filename, filename, ErrorCodes::CANNOT_TRUNCATE_FILE); -} - -void WriteBufferAIO::flush() -{ - next(); - if (waitForAIOCompletion()) - finalize(); -} - -bool WriteBufferAIO::waitForAIOCompletion() -{ - if (!is_pending_write) - return false; - - CurrentMetrics::Increment metric_increment_write{CurrentMetrics::Write}; - - io_event event; - while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) < 0) - { - if (errno != EINTR) - { - aio_failed = true; - throw Exception("Failed to wait for asynchronous IO completion on file " + filename, ErrorCodes::CANNOT_IO_GETEVENTS); - } - } - - // Unpoison the memory returned from an uninstrumented system function. - __msan_unpoison(&event, sizeof(event)); - - is_pending_write = false; -#if defined(__FreeBSD__) - bytes_written = aio_return(reinterpret_cast(event.udata)); -#else - bytes_written = event.res; -#endif - - ProfileEvents::increment(ProfileEvents::WriteBufferAIOWrite); - ProfileEvents::increment(ProfileEvents::WriteBufferAIOWriteBytes, bytes_written); - - return true; -} - -void WriteBufferAIO::prepare() -{ - /// Swap the main and duplicate buffers. - swap(flush_buffer); - - truncation_count = 0; - - /* - A page on disk or in memory - - start address (starting position in case of disk) is a multiply of DEFAULT_AIO_FILE_BLOCK_SIZE - : - : - +---------------+ - | | - | | - | | - | | - | | - | | - +---------------+ - <---------------> - : - : - DEFAULT_AIO_FILE_BLOCK_SIZE - - */ - - /* - Representation of data on a disk - - XXX : the data you want to write - ZZZ : data that is already on disk or zeros, if there is no data - - region_aligned_begin region_aligned_end - : region_begin region_end : - : : : : - : : : : - +---:-----------+---------------+---------------+---------------+--:------------+ - | : | | | | : | - | +-----------+---------------+---------------+---------------+--+ | - |ZZZ|XXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XX|ZZZZZZZZZZZZ| - |ZZZ|XXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XX|ZZZZZZZZZZZZ| - | +-----------+---------------+---------------+---------------+--+ | - | | | | | | - +---------------+---------------+---------------+---------------+---------------+ - - <--><--------------------------------------------------------------><-----------> - : : : - : : : - region_left_padding region_size region_right_padding - - <-------------------------------------------------------------------------------> - : - : - region_aligned_size - */ - - /// Region of the disk in which we want to write data. - const off_t region_begin = pos_in_file; - - if ((flush_buffer.offset() > static_cast(std::numeric_limits::max())) || - (pos_in_file > (std::numeric_limits::max() - static_cast(flush_buffer.offset())))) - throw Exception("An overflow occurred during file operation", ErrorCodes::LOGICAL_ERROR); - - const off_t region_end = pos_in_file + flush_buffer.offset(); - const size_t region_size = region_end - region_begin; - - /// The aligned region of the disk into which we want to write the data. - const size_t region_left_padding = region_begin % DEFAULT_AIO_FILE_BLOCK_SIZE; - const size_t region_right_padding = (DEFAULT_AIO_FILE_BLOCK_SIZE - (region_end % DEFAULT_AIO_FILE_BLOCK_SIZE)) % DEFAULT_AIO_FILE_BLOCK_SIZE; - - region_aligned_begin = region_begin - region_left_padding; - - if (region_end > (std::numeric_limits::max() - static_cast(region_right_padding))) - throw Exception("An overflow occurred during file operation", ErrorCodes::LOGICAL_ERROR); - - const off_t region_aligned_end = region_end + region_right_padding; - region_aligned_size = region_aligned_end - region_aligned_begin; - - bytes_to_write = region_aligned_size; - - /* - Representing data in the buffer before processing - - XXX : the data you want to write - - buffer_begin buffer_end - : : - : : - +---------------+---------------+---------------+-------------:-+ - | | | | : | - +---------------+---------------+---------------+-------------+ | - |XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXX| | - |XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXX| | - +---------------+---------------+---------------+-------------+ | - | | | | | - +---------------+---------------+---------------+---------------+ - - <-------------------------------------------------------------> - : - : - buffer_size - */ - - /// The buffer of data that we want to write to the disk. - buffer_begin = flush_buffer.buffer().begin(); - Position buffer_end = buffer_begin + region_size; - size_t buffer_size = buffer_end - buffer_begin; - - /// Process the buffer so that it reflects the structure of the disk region. - - /* - Representation of data in the buffer after processing - - XXX : the data you want to write - ZZZ : data from disk or zeros, if there is no data - - `buffer_begin` `buffer_end` extra page - : : : - : : : - +---:-----------+---------------+---------------+---------------+--:------------+ - | | | | | : | - | +-----------+---------------+---------------+---------------+--+ | - |ZZZ|XXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XX|ZZZZZZZZZZZZ| - |ZZZ|XXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|XX|ZZZZZZZZZZZZ| - | +-----------+---------------+---------------+---------------+--+ | - | | | | | | - +---------------+---------------+---------------+---------------+---------------+ - - <--><--------------------------------------------------------------><-----------> - : : : - : : : - region_left_padding region_size region_right_padding - - <-------------------------------------------------------------------------------> - : - : - region_aligned_size - */ - - if ((region_left_padding > 0) || (region_right_padding > 0)) - { - char memory_page[DEFAULT_AIO_FILE_BLOCK_SIZE] __attribute__ ((aligned (DEFAULT_AIO_FILE_BLOCK_SIZE))); - - if (region_left_padding > 0) - { - /// Move the buffer data to the right. Complete the beginning of the buffer with data from the disk. - buffer_size += region_left_padding; - buffer_end = buffer_begin + buffer_size; - - ::memmove(buffer_begin + region_left_padding, buffer_begin, (buffer_size - region_left_padding) * sizeof(*buffer_begin)); - - ssize_t read_count = ::pread(fd, memory_page, DEFAULT_AIO_FILE_BLOCK_SIZE, region_aligned_begin); - if (read_count < 0) - throw Exception("Read error", ErrorCodes::AIO_READ_ERROR); - - size_t to_copy = std::min(static_cast(read_count), region_left_padding); - ::memcpy(buffer_begin, memory_page, to_copy * sizeof(*buffer_begin)); - ::memset(buffer_begin + to_copy, 0, (region_left_padding - to_copy) * sizeof(*buffer_begin)); - } - - if (region_right_padding > 0) - { - /// Add the end of the buffer with data from the disk. - ssize_t read_count = ::pread(fd, memory_page, DEFAULT_AIO_FILE_BLOCK_SIZE, region_aligned_end - DEFAULT_AIO_FILE_BLOCK_SIZE); - if (read_count < 0) - throw Exception("Read error", ErrorCodes::AIO_READ_ERROR); - - Position truncation_begin; - off_t offset = DEFAULT_AIO_FILE_BLOCK_SIZE - region_right_padding; - if (read_count > offset) - { - ::memcpy(buffer_end, memory_page + offset, (read_count - offset) * sizeof(*buffer_end)); - truncation_begin = buffer_end + (read_count - offset); - truncation_count = DEFAULT_AIO_FILE_BLOCK_SIZE - read_count; - } - else - { - truncation_begin = buffer_end; - truncation_count = region_right_padding; - } - - ::memset(truncation_begin, 0, truncation_count * sizeof(*truncation_begin)); - } - } -} - -void WriteBufferAIO::finalize() -{ - if (bytes_written < bytes_to_write) - throw Exception("Asynchronous write error on file " + filename, ErrorCodes::AIO_WRITE_ERROR); - - bytes_written -= truncation_count; - -#if defined(__FreeBSD__) - off_t aio_offset = request.aio.aio_offset; -#else - off_t aio_offset = request.aio_offset; -#endif - off_t pos_offset = bytes_written - (pos_in_file - aio_offset); - - if (pos_in_file > (std::numeric_limits::max() - pos_offset)) - throw Exception("An overflow occurred during file operation", ErrorCodes::LOGICAL_ERROR); - pos_in_file += pos_offset; - - if (pos_in_file > max_pos_in_file) - max_pos_in_file = pos_in_file; - - if (truncation_count > 0) - { - /// Truncate the file to remove unnecessary zeros from it. - int res = ::ftruncate(fd, max_pos_in_file); - if (res == -1) - throwFromErrnoWithPath("Cannot truncate file " + filename, filename, ErrorCodes::CANNOT_TRUNCATE_FILE); - } -} - -} - -#endif diff --git a/src/IO/WriteBufferAIO.h b/src/IO/WriteBufferAIO.h deleted file mode 100644 index f514acab359..00000000000 --- a/src/IO/WriteBufferAIO.h +++ /dev/null @@ -1,102 +0,0 @@ -#pragma once - -#if defined(OS_LINUX) || defined(__FreeBSD__) - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -namespace CurrentMetrics -{ - extern const Metric OpenFileForWrite; -} - -namespace DB -{ - -/** Class for asynchronous data writing. - */ -class WriteBufferAIO final : public WriteBufferFromFileBase -{ -public: - WriteBufferAIO(const std::string & filename_, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, int flags_ = -1, mode_t mode_ = 0666, - char * existing_memory_ = nullptr); - ~WriteBufferAIO() override; - - WriteBufferAIO(const WriteBufferAIO &) = delete; - WriteBufferAIO & operator=(const WriteBufferAIO &) = delete; - - off_t getPositionInFile(); - off_t seek(off_t off, int whence); - void truncate(off_t length); - void sync() override; - std::string getFileName() const override { return filename; } - int getFD() const { return fd; } - -private: - void nextImpl() override; - - /// If there's still data in the buffer, we'll write them. - void flush(); - /// Wait for the end of the current asynchronous task. - bool waitForAIOCompletion(); - /// Prepare an asynchronous request. - void prepare(); - /// - void finalize() override; - -private: - /// Buffer for asynchronous data writes. - BufferWithOwnMemory flush_buffer; - - /// Description of the asynchronous write request. - iocb request{}; - iocb * request_ptr{&request}; - - AIOContext aio_context{1}; - - const std::string filename; - - /// The number of bytes to be written to the disk. - off_t bytes_to_write = 0; - /// Number of bytes written with the last request. - off_t bytes_written = 0; - /// The number of zero bytes to be cut from the end of the file - /// after the data write operation completes. - off_t truncation_count = 0; - - /// The current position in the file. - off_t pos_in_file = 0; - /// The maximum position reached in the file. - off_t max_pos_in_file = 0; - - /// The starting position of the aligned region of the disk to which the data is written. - off_t region_aligned_begin = 0; - /// The size of the aligned region of the disk. - size_t region_aligned_size = 0; - - /// The file descriptor for writing. - int fd = -1; - - /// The data buffer that we want to write to the disk. - Position buffer_begin = nullptr; - - /// Is the asynchronous write operation still in progress? - bool is_pending_write = false; - /// Did the asynchronous operation fail? - bool aio_failed = false; - - CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite}; -}; - -} - -#endif diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h index 77530c323d2..b7d58638113 100644 --- a/src/IO/WriteBufferFromFile.h +++ b/src/IO/WriteBufferFromFile.h @@ -50,11 +50,6 @@ public: /// Close file before destruction of object. void close(); - void finalize() override - { - close(); - } - std::string getFileName() const override { return file_name; diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index f89422f9715..624a6c3496a 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -730,7 +730,7 @@ static const char digits100[201] = template inline void writeDateText(const LocalDate & date, WriteBuffer & buf) { - if (buf.position() + 10 <= buf.buffer().end()) + if (reinterpret_cast(buf.position()) + 10 <= reinterpret_cast(buf.buffer().end())) { memcpy(buf.position(), &digits100[date.year() / 100 * 2], 2); buf.position() += 2; @@ -767,7 +767,7 @@ inline void writeDateText(DayNum date, WriteBuffer & buf) template inline void writeDateTimeText(const LocalDateTime & datetime, WriteBuffer & buf) { - if (buf.position() + 19 <= buf.buffer().end()) + if (reinterpret_cast(buf.position()) + 19 <= reinterpret_cast(buf.buffer().end())) { memcpy(buf.position(), &digits100[datetime.year() / 100 * 2], 2); buf.position() += 2; diff --git a/src/IO/WriteIntText.h b/src/IO/WriteIntText.h index 15276bba63f..93444e7df73 100644 --- a/src/IO/WriteIntText.h +++ b/src/IO/WriteIntText.h @@ -24,7 +24,7 @@ namespace detail template void writeIntText(T x, WriteBuffer & buf) { - if (likely(buf.position() + WRITE_HELPERS_MAX_INT_WIDTH < buf.buffer().end())) + if (likely(reinterpret_cast(buf.position()) + WRITE_HELPERS_MAX_INT_WIDTH < reinterpret_cast(buf.buffer().end()))) buf.position() = itoa(x, buf.position()); else detail::writeUIntTextFallback(x, buf); diff --git a/src/IO/createWriteBufferFromFileBase.cpp b/src/IO/createWriteBufferFromFileBase.cpp deleted file mode 100644 index 6022457f32e..00000000000 --- a/src/IO/createWriteBufferFromFileBase.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include -#include -#if defined(OS_LINUX) || defined(__FreeBSD__) -#include -#endif -#include - - -namespace ProfileEvents -{ - extern const Event CreatedWriteBufferOrdinary; - extern const Event CreatedWriteBufferAIO; - extern const Event CreatedWriteBufferAIOFailed; -} - -namespace DB -{ - -std::unique_ptr createWriteBufferFromFileBase(const std::string & filename_, size_t estimated_size, - size_t aio_threshold, size_t buffer_size_, int flags_, mode_t mode, char * existing_memory_, - size_t alignment) -{ -#if defined(OS_LINUX) || defined(__FreeBSD__) - if (aio_threshold && estimated_size >= aio_threshold) - { - /// Attempt to open a file with O_DIRECT - try - { - auto res = std::make_unique(filename_, buffer_size_, flags_, mode, existing_memory_); - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferAIO); - return res; - } - catch (const ErrnoException &) - { - /// Fallback to cached IO if O_DIRECT is not supported. - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferAIOFailed); - } - } -#else - (void)aio_threshold; - (void)estimated_size; -#endif - - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferOrdinary); - return std::make_unique(filename_, buffer_size_, flags_, mode, existing_memory_, alignment); -} - -} diff --git a/src/IO/createWriteBufferFromFileBase.h b/src/IO/createWriteBufferFromFileBase.h deleted file mode 100644 index 42cad88303b..00000000000 --- a/src/IO/createWriteBufferFromFileBase.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ - -/** Create an object to write data to a file. - * estimated_size - number of bytes to write - * aio_threshold - the minimum number of bytes for asynchronous writes - * - * If aio_threshold = 0 or estimated_size < aio_threshold, the write operations are executed synchronously. - * Otherwise, write operations are performed asynchronously. - */ -std::unique_ptr createWriteBufferFromFileBase( - const std::string & filename_, - size_t estimated_size, - size_t aio_threshold, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - int flags_ = -1, - mode_t mode = 0666, - char * existing_memory_ = nullptr, - size_t alignment = 0); - -} diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index eac6183e332..8fed7feeef3 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -152,7 +152,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf) /// Fast path (avoid copying) if the buffer have at least MAX_LENGTH bytes. static constexpr int MAX_LENGTH = 316; - if (likely(buf.position() + MAX_LENGTH <= buf.buffer().end())) + if (likely(!buf.eof() && buf.position() + MAX_LENGTH <= buf.buffer().end())) { auto initial_position = buf.position(); auto res = fast_float::from_chars(initial_position, buf.buffer().end(), x); diff --git a/src/IO/tests/CMakeLists.txt b/src/IO/tests/CMakeLists.txt index da4d330f0a9..fcd59d94cb0 100644 --- a/src/IO/tests/CMakeLists.txt +++ b/src/IO/tests/CMakeLists.txt @@ -55,9 +55,6 @@ add_executable (write_int write_int.cpp) target_link_libraries (write_int PRIVATE clickhouse_common_io) if (OS_LINUX OR OS_FREEBSD) - add_executable(write_buffer_aio write_buffer_aio.cpp) - target_link_libraries (write_buffer_aio PRIVATE clickhouse_common_io) - add_executable(read_buffer_aio read_buffer_aio.cpp) target_link_libraries (read_buffer_aio PRIVATE clickhouse_common_io) endif () diff --git a/src/IO/tests/write_buffer_aio.cpp b/src/IO/tests/write_buffer_aio.cpp deleted file mode 100644 index 9274e5abee5..00000000000 --- a/src/IO/tests/write_buffer_aio.cpp +++ /dev/null @@ -1,498 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace -{ - -namespace fs = std::filesystem; - -void run(); -[[noreturn]] void die(const std::string & msg); -void runTest(unsigned int num, const std::function & func); -std::string createTmpFile(); -std::string generateString(size_t n); - -bool test1(); -bool test2(); -bool test3(); -bool test4(); -bool test5(); -bool test6(); -bool test7(); -bool test8(); -bool test9(); -bool test10(); - -void run() -{ - const std::vector> tests = - { - test1, - test2, - test3, - test4, - test5, - test6, - test7, - test8, - test9, - test10 - }; - - unsigned int num = 0; - for (const auto & test : tests) - { - ++num; - runTest(num, test); - } -} - -void die(const std::string & msg) -{ - std::cout << msg; - ::exit(EXIT_FAILURE); -} - -void runTest(unsigned int num, const std::function & func) -{ - bool ok; - - try - { - ok = func(); - } - catch (const DB::Exception & ex) - { - ok = false; - std::cout << "Caught exception " << ex.displayText() << "\n"; - } - catch (const std::exception & ex) - { - ok = false; - std::cout << "Caught exception " << ex.what() << "\n"; - } - - if (ok) - std::cout << "Test " << num << " passed\n"; - else - std::cout << "Test " << num << " failed\n"; -} - -std::string createTmpFile() -{ - char pattern[] = "/tmp/fileXXXXXX"; - char * dir = ::mkdtemp(pattern); - if (dir == nullptr) - die("Could not create directory"); - - return std::string(dir) + "/foo"; -} - -std::string generateString(size_t n) -{ - static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - - std::string buf; - buf.reserve(n); - - for (size_t i = 0; i < n; ++i) - buf += symbols[i % symbols.length()]; - - return buf; -} - -bool test1() -{ - std::string filename = createTmpFile(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; - - std::string buf = generateString(n); - - { - DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.write(buf.data(), buf.length()); - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - return (received == buf); -} - -bool test2() -{ - std::string filename = createTmpFile(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; - - std::string buf = generateString(n); - - { - DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.write(buf.data(), buf.length() / 2); - out.seek(DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_CUR); - out.write(&buf[buf.length() / 2], buf.length() / 2); - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - if (received.substr(0, buf.length() / 2) != buf.substr(0, buf.length() / 2)) - return false; - if (received.substr(buf.length() / 2, DEFAULT_AIO_FILE_BLOCK_SIZE) != std::string(DEFAULT_AIO_FILE_BLOCK_SIZE, '\0')) - return false; - if (received.substr(buf.length() / 2 + DEFAULT_AIO_FILE_BLOCK_SIZE) != buf.substr(buf.length() / 2)) - return false; - - return true; -} - -bool test3() -{ - std::string filename = createTmpFile(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; - - std::string buf = generateString(n); - - { - DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.write(buf.data(), buf.length()); - - off_t pos1 = out.getPositionInFile(); - - out.truncate(buf.length() / 2); - - off_t pos2 = out.getPositionInFile(); - - if (pos1 != pos2) - return false; - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - return (received == buf.substr(0, buf.length() / 2)); -} - -bool test4() -{ - std::string filename = createTmpFile(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; - - std::string buf = generateString(n); - - { - DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.write(buf.data(), buf.length()); - - off_t pos1 = out.getPositionInFile(); - - out.truncate(3 * buf.length() / 2); - - off_t pos2 = out.getPositionInFile(); - - if (pos1 != pos2) - return false; - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - if (received.substr(0, buf.length()) != buf) - return false; - - if (received.substr(buf.length()) != std::string(buf.length() / 2, '\0')) - return false; - - return true; -} - -bool test5() -{ - std::string filename = createTmpFile(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; - - std::string buf = generateString(n); - - { - DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.seek(1, SEEK_SET); - out.write(buf.data(), buf.length()); - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - return received.substr(1) == buf; -} - -bool test6() -{ - std::string filename = createTmpFile(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; - - std::string buf = generateString(n); - - std::string buf2 = "1111111111"; - - { - DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.seek(3, SEEK_SET); - out.write(buf.data(), buf.length()); - out.seek(-2 * DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_CUR); - out.write(buf2.data(), buf2.length()); - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - if (received.substr(3, 8 * DEFAULT_AIO_FILE_BLOCK_SIZE) != buf.substr(0, 8 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - - if (received.substr(3 + 8 * DEFAULT_AIO_FILE_BLOCK_SIZE, 10) != buf2) - return false; - - if (received.substr(13 + 8 * DEFAULT_AIO_FILE_BLOCK_SIZE) != buf.substr(10 + 8 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - - return true; -} - -bool test7() -{ - std::string filename = createTmpFile(); - - std::string buf2 = "11111111112222222222"; - - { - DB::WriteBufferAIO out(filename, DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.seek(DEFAULT_AIO_FILE_BLOCK_SIZE - (buf2.length() / 2), SEEK_SET); - out.write(buf2.data(), buf2.length()); - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - if (received.length() != 4106) - return false; - if (received.substr(0, 4086) != std::string(4086, '\0')) - return false; - if (received.substr(4086, 20) != buf2) - return false; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - return true; -} - -bool test8() -{ - std::string filename = createTmpFile(); - - std::string buf2 = "11111111112222222222"; - - { - DB::WriteBufferAIO out(filename, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.seek(2 * DEFAULT_AIO_FILE_BLOCK_SIZE - (buf2.length() / 2), SEEK_SET); - out.write(buf2.data(), buf2.length()); - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - if (received.length() != 8202) - return false; - if (received.substr(0, 8182) != std::string(8182, '\0')) - return false; - if (received.substr(8182, 20) != buf2) - return false; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - return true; -} - -bool test9() -{ - std::string filename = createTmpFile(); - - size_t n = 3 * DEFAULT_AIO_FILE_BLOCK_SIZE; - - std::string buf = generateString(n); - - std::string buf2(DEFAULT_AIO_FILE_BLOCK_SIZE + 10, '1'); - - { - DB::WriteBufferAIO out(filename, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.seek(3, SEEK_SET); - out.write(buf.data(), buf.length()); - out.seek(-DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_CUR); - out.write(buf2.data(), buf2.length()); - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - if (received.substr(3, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE) != buf.substr(0, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - - if (received.substr(3 + 2 * DEFAULT_AIO_FILE_BLOCK_SIZE, DEFAULT_AIO_FILE_BLOCK_SIZE + 10) != buf2) - return false; - - return true; -} - -bool test10() -{ - std::string filename = createTmpFile(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE + 3; - - std::string buf = generateString(n); - - { - DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (out.getFileName() != filename) - return false; - if (out.getFD() == -1) - return false; - - out.write(buf.data(), buf.length()); - } - - std::ifstream in(filename.c_str()); - if (!in.is_open()) - die("Could not open file"); - - std::string received{ std::istreambuf_iterator(in), std::istreambuf_iterator() }; - - in.close(); - fs::remove_all(fs::path(filename).parent_path().string()); - - return (received == buf); -} - -} - -int main() -{ - run(); - return 0; -} diff --git a/src/IO/ya.make b/src/IO/ya.make index 4dc3afb2f11..2ef8bd0a986 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -51,7 +51,6 @@ SRCS( ReadHelpers.cpp SeekAvoidingReadBuffer.cpp UseSSL.cpp - WriteBufferAIO.cpp WriteBufferFromFile.cpp WriteBufferFromFileBase.cpp WriteBufferFromFileDescriptor.cpp @@ -69,7 +68,6 @@ SRCS( ZstdInflatingReadBuffer.cpp copyData.cpp createReadBufferFromFileBase.cpp - createWriteBufferFromFileBase.cpp parseDateTimeBestEffort.cpp readFloatText.cpp diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 48273e32209..3b3fdaa65cb 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -127,6 +127,12 @@ std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr getIdentifiers(left_ast, left_identifiers); getIdentifiers(right_ast, right_identifiers); + if (left_identifiers.empty() || right_identifiers.empty()) + { + throw Exception("Not equi-join ON expression: " + queryToString(expr) + ". No columns in one of equality side.", + ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + } + size_t left_idents_table = getTableForIdentifiers(left_identifiers, data); size_t right_idents_table = getTableForIdentifiers(right_identifiers, data); diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index cce62b1a6c4..e0b5591a708 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -847,6 +847,13 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr ast_ddl, const Storage if (!ast_ddl->as() && !ast_ddl->as() && !ast_ddl->as()) return false; + if (auto * alter = ast_ddl->as()) + { + // Setting alters should be executed on all replicas + if (alter->isSettingsAlter()) + return false; + } + return storage->supportsReplication(); } diff --git a/src/Interpreters/DNSCacheUpdater.cpp b/src/Interpreters/DNSCacheUpdater.cpp index 248c0ffa4dd..fb0298f480f 100644 --- a/src/Interpreters/DNSCacheUpdater.cpp +++ b/src/Interpreters/DNSCacheUpdater.cpp @@ -1,7 +1,7 @@ #include "DNSCacheUpdater.h" #include #include -#include + namespace DB { diff --git a/src/Interpreters/DatabaseAndTableWithAlias.h b/src/Interpreters/DatabaseAndTableWithAlias.h index 07a41c12983..b889509c264 100644 --- a/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/src/Interpreters/DatabaseAndTableWithAlias.h @@ -49,7 +49,9 @@ struct TableWithColumnNamesAndTypes { DatabaseAndTableWithAlias table; NamesAndTypesList columns; - NamesAndTypesList hidden_columns; /// Not general columns like MATERIALIZED and ALIAS. They are omitted in * and t.* results. + NamesAndTypesList hidden_columns; /// Not general columns like MATERIALIZED, ALIAS, VIRTUAL. They are omitted in * and t.* results by default. + NamesAndTypesList alias_columns; + NamesAndTypesList materialized_columns; TableWithColumnNamesAndTypes(const DatabaseAndTableWithAlias & table_, const NamesAndTypesList & columns_) : table(table_) @@ -63,11 +65,28 @@ struct TableWithColumnNamesAndTypes void addHiddenColumns(const NamesAndTypesList & addition) { - hidden_columns.insert(hidden_columns.end(), addition.begin(), addition.end()); + addAdditionalColumns(hidden_columns, addition); + } + + void addAliasColumns(const NamesAndTypesList & addition) + { + addAdditionalColumns(alias_columns, addition); + } + + void addMaterializedColumns(const NamesAndTypesList & addition) + { + addAdditionalColumns(alias_columns, addition); + } + +private: + void addAdditionalColumns(NamesAndTypesList & target, const NamesAndTypesList & addition) + { + target.insert(target.end(), addition.begin(), addition.end()); for (auto & col : addition) names.insert(col.name); } + private: NameSet names; }; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 8f74a7be493..7c472a27d88 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -357,10 +357,14 @@ void HashJoin::init(Type type_) joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { map.create(data->type); }); } -size_t HashJoin::getTotalRowCount() const +bool HashJoin::overDictionary() const { - std::shared_lock lock(data->rwlock); - return getTotalRowCountLocked(); + return data->type == Type::DICT; +} + +bool HashJoin::empty() const +{ + return data->type == Type::EMPTY; } size_t HashJoin::getTotalByteCount() const @@ -369,6 +373,18 @@ size_t HashJoin::getTotalByteCount() const return getTotalByteCountLocked(); } +size_t HashJoin::getTotalRowCount() const +{ + std::shared_lock lock(data->rwlock); + return getTotalRowCountLocked(); +} + +bool HashJoin::alwaysReturnsEmptySet() const +{ + std::shared_lock lock(data->rwlock); + return isInnerOrRight(getKind()) && data->empty && !overDictionary(); +} + size_t HashJoin::getTotalRowCountLocked() const { size_t res = 0; diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 37bc9a9d345..c14ad4e5a99 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -150,9 +150,6 @@ class HashJoin : public IJoin public: HashJoin(std::shared_ptr table_join_, const Block & right_sample_block, bool any_take_last_row_ = false); - bool empty() const { return data->type == Type::EMPTY; } - bool overDictionary() const { return data->type == Type::DICT; } - /** Add block of data from right hand of JOIN to the map. * Returns false, if some limit was exceeded and you should not insert more data. */ @@ -188,7 +185,7 @@ public: /// Sum size in bytes of all buffers, used for JOIN maps and for all memory pools. size_t getTotalByteCount() const final; - bool alwaysReturnsEmptySet() const final { return isInnerOrRight(getKind()) && data->empty && !overDictionary(); } + bool alwaysReturnsEmptySet() const final; ASTTableJoin::Kind getKind() const { return kind; } ASTTableJoin::Strictness getStrictness() const { return strictness; } @@ -397,6 +394,9 @@ private: /// Call with already locked rwlock. size_t getTotalRowCountLocked() const; size_t getTotalByteCountLocked() const; + + bool empty() const; + bool overDictionary() const; }; } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2a193dc7249..742c9f6736f 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 0a5a8f141cd..b5313473252 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -544,7 +544,6 @@ Block InterpreterSelectQuery::getSampleBlockImpl() if (analysis_result.prewhere_info) { ExpressionActions(analysis_result.prewhere_info->prewhere_actions).execute(header); - header = materializeBlock(header); if (analysis_result.prewhere_info->remove_prewhere_column) header.erase(analysis_result.prewhere_info->prewhere_column_name); } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index fd36f3a6fd6..7a74164cfd0 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -221,21 +222,43 @@ BlockIO InterpreterSystemQuery::execute() switch (query.type) { case Type::SHUTDOWN: + { context.checkAccess(AccessType::SYSTEM_SHUTDOWN); if (kill(0, SIGTERM)) throwFromErrno("System call kill(0, SIGTERM) failed", ErrorCodes::CANNOT_KILL); break; + } case Type::KILL: + { context.checkAccess(AccessType::SYSTEM_SHUTDOWN); - if (kill(0, SIGKILL)) - throwFromErrno("System call kill(0, SIGKILL) failed", ErrorCodes::CANNOT_KILL); + /// Exit with the same code as it is usually set by shell when process is terminated by SIGKILL. + /// It's better than doing 'raise' or 'kill', because they have no effect for 'init' process (with pid = 0, usually in Docker). + LOG_INFO(log, "Exit immediately as the SYSTEM KILL command has been issued."); + _exit(128 + SIGKILL); + // break; /// unreachable + } + case Type::SUSPEND: + { + auto command = fmt::format("kill -STOP {0} && sleep {1} && kill -CONT {0}", getpid(), query.seconds); + LOG_DEBUG(log, "Will run {}", command); + auto res = ShellCommand::execute(command); + res->in.close(); + WriteBufferFromOwnString out; + copyData(res->out, out); + copyData(res->err, out); + if (!out.str().empty()) + LOG_DEBUG(log, "The command returned output: {}", command, out.str()); + res->wait(); break; + } case Type::DROP_DNS_CACHE: + { context.checkAccess(AccessType::SYSTEM_DROP_DNS_CACHE); DNSResolver::instance().dropCache(); /// Reinitialize clusters to update their resolved_addresses system_context.reloadClusterConfig(); break; + } case Type::DROP_MARK_CACHE: context.checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); system_context.dropMarkCache(); @@ -251,12 +274,15 @@ BlockIO InterpreterSystemQuery::execute() break; #endif case Type::RELOAD_DICTIONARY: + { context.checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY); system_context.getExternalDictionariesLoader().loadOrReload( DatabaseCatalog::instance().resolveDictionaryName(query.target_dictionary)); ExternalDictionariesLoader::resetAll(); break; + } case Type::RELOAD_DICTIONARIES: + { context.checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY); executeCommandsAndThrowIfError( [&] () { system_context.getExternalDictionariesLoader().reloadAllTriedToLoad(); }, @@ -264,6 +290,7 @@ BlockIO InterpreterSystemQuery::execute() ); ExternalDictionariesLoader::resetAll(); break; + } case Type::RELOAD_EMBEDDED_DICTIONARIES: context.checkAccess(AccessType::SYSTEM_RELOAD_EMBEDDED_DICTIONARIES); system_context.getEmbeddedDictionaries().reload(); @@ -273,6 +300,7 @@ BlockIO InterpreterSystemQuery::execute() system_context.reloadConfig(); break; case Type::RELOAD_SYMBOLS: + { #if defined(__ELF__) && !defined(__FreeBSD__) context.checkAccess(AccessType::SYSTEM_RELOAD_SYMBOLS); (void)SymbolIndex::instance(true); @@ -280,6 +308,7 @@ BlockIO InterpreterSystemQuery::execute() #else throw Exception("SYSTEM RELOAD SYMBOLS is not supported on current platform", ErrorCodes::NOT_IMPLEMENTED); #endif + } case Type::STOP_MERGES: startStopAction(ActionLocks::PartsMerge, false); break; @@ -340,6 +369,7 @@ BlockIO InterpreterSystemQuery::execute() ErrorCodes::BAD_ARGUMENTS); break; case Type::FLUSH_LOGS: + { context.checkAccess(AccessType::SYSTEM_FLUSH_LOGS); executeCommandsAndThrowIfError( [&] () { if (auto query_log = context.getQueryLog()) query_log->flush(true); }, @@ -352,6 +382,7 @@ BlockIO InterpreterSystemQuery::execute() [&] () { if (auto opentelemetry_span_log = context.getOpenTelemetrySpanLog()) opentelemetry_span_log->flush(true); } ); break; + } case Type::STOP_LISTEN_QUERIES: case Type::START_LISTEN_QUERIES: throw Exception(String(ASTSystemQuery::typeToString(query.type)) + " is not supported yet", ErrorCodes::NOT_IMPLEMENTED); @@ -586,7 +617,8 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() switch (query.type) { case Type::SHUTDOWN: [[fallthrough]]; - case Type::KILL: + case Type::KILL: [[fallthrough]]; + case Type::SUSPEND: { required_access.emplace_back(AccessType::SYSTEM_SHUTDOWN); break; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 0129e6bdce9..bf3bbf22b8c 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -224,11 +224,14 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt bool first_table = true; for (const auto & table : tables_with_columns) { - for (const auto & column : table.columns) + for (const auto * cols : {&table.columns, &table.alias_columns, &table.materialized_columns}) { - if (first_table || !data.join_using_columns.count(column.name)) + for (const auto & column : *cols) { - addIdentifier(columns, table.table, column.name); + if (first_table || !data.join_using_columns.count(column.name)) + { + addIdentifier(columns, table.table, column.name); + } } } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ae35e009bce..5928da156f3 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -351,7 +351,7 @@ static std::tuple executeQueryImpl( #if !defined(ARCADIA_BUILD) if (settings.use_antlr_parser) { - ast = parseQuery(begin, end, max_query_size, settings.max_parser_depth); + ast = parseQuery(begin, end, max_query_size, settings.max_parser_depth, context.getCurrentDatabase()); } else { diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index 56ca614dc2d..9234aaa831a 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -124,6 +124,8 @@ TablesWithColumns getDatabaseAndTablesWithColumns(const std::vectorchildren.empty()) + return false; + for (const auto & child : command_list->children) + { + const auto & command = child->as(); + if (command.type != ASTAlterCommand::MODIFY_SETTING) + return false; + } + return true; + } + return false; +} /** Get the text that identifies this element. */ String ASTAlterQuery::getID(char delim) const diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 91c80867738..f53a987905e 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -187,6 +187,8 @@ public: ASTExpressionList * command_list = nullptr; + bool isSettingsAlter() const; + String getID(char) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 9ec50a09cf1..024c9c3b767 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -14,6 +15,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NO_SUCH_COLUMN_IN_TABLE; + extern const int CANNOT_COMPILE_REGEXP; } void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes) @@ -86,6 +88,9 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo (*it)->formatImpl(settings, state, frame); } + if (!original_pattern.empty()) + settings.ostr << quoteString(original_pattern); + if (children.size() > 1) settings.ostr << ")"; } @@ -93,24 +98,40 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo void ASTColumnsExceptTransformer::transform(ASTs & nodes) const { std::set expected_columns; - for (const auto & child : children) - expected_columns.insert(child->as().name()); - - for (auto it = nodes.begin(); it != nodes.end();) + if (original_pattern.empty()) { - if (const auto * id = it->get()->as()) + for (const auto & child : children) + expected_columns.insert(child->as().name()); + + for (auto it = nodes.begin(); it != nodes.end();) { - auto expected_column = expected_columns.find(id->shortName()); - if (expected_column != expected_columns.end()) + if (const auto * id = it->get()->as()) { - expected_columns.erase(expected_column); - it = nodes.erase(it); + auto expected_column = expected_columns.find(id->shortName()); + if (expected_column != expected_columns.end()) + { + expected_columns.erase(expected_column); + it = nodes.erase(it); + continue; + } } - else - ++it; - } - else ++it; + } + } + else + { + for (auto it = nodes.begin(); it != nodes.end();) + { + if (const auto * id = it->get()->as()) + { + if (isColumnMatching(id->shortName())) + { + it = nodes.erase(it); + continue; + } + } + ++it; + } } if (is_strict && !expected_columns.empty()) @@ -125,6 +146,21 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const } } +void ASTColumnsExceptTransformer::setPattern(String pattern) +{ + original_pattern = std::move(pattern); + column_matcher = std::make_shared(original_pattern, RE2::Quiet); + if (!column_matcher->ok()) + throw DB::Exception( + "COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(), + DB::ErrorCodes::CANNOT_COMPILE_REGEXP); +} + +bool ASTColumnsExceptTransformer::isColumnMatching(const String & column_name) const +{ + return RE2::PartialMatch(column_name, *column_matcher); +} + void ASTColumnsReplaceTransformer::Replacement::formatImpl( const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { diff --git a/src/Parsers/ASTColumnsTransformers.h b/src/Parsers/ASTColumnsTransformers.h index ec1fd3eee0a..49d29222f02 100644 --- a/src/Parsers/ASTColumnsTransformers.h +++ b/src/Parsers/ASTColumnsTransformers.h @@ -2,6 +2,11 @@ #include +namespace re2 +{ + class RE2; +} + namespace DB { class IASTColumnsTransformer : public IAST @@ -43,9 +48,13 @@ public: return clone; } void transform(ASTs & nodes) const override; + void setPattern(String pattern); + bool isColumnMatching(const String & column_name) const; protected: void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + std::shared_ptr column_matcher; + String original_pattern; }; class ASTColumnsReplaceTransformer : public IASTColumnsTransformer diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 0d6e15a3d8c..f3a43d7f3fd 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -22,6 +22,8 @@ const char * ASTSystemQuery::typeToString(Type type) return "SHUTDOWN"; case Type::KILL: return "KILL"; + case Type::SUSPEND: + return "SUSPEND"; case Type::DROP_DNS_CACHE: return "DROP DNS CACHE"; case Type::DROP_MARK_CACHE: @@ -146,7 +148,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, auto print_on_volume = [&] { - settings.ostr << " ON VOLUME " + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON VOLUME " << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(storage_policy) << (settings.hilite ? hilite_none : "") << "." @@ -182,9 +184,20 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, print_database_table(); } else if (type == Type::RELOAD_DICTIONARY) + { print_database_dictionary(); + } else if (type == Type::DROP_REPLICA) + { print_drop_replica(); + } + else if (type == Type::SUSPEND) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " + << (settings.hilite ? hilite_none : "") << seconds + << (settings.hilite ? hilite_keyword : "") << " SECOND" + << (settings.hilite ? hilite_none : ""); + } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 756b5b52600..ad7eb664659 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -20,6 +20,7 @@ public: UNKNOWN, SHUTDOWN, KILL, + SUSPEND, DROP_DNS_CACHE, DROP_MARK_CACHE, DROP_UNCOMPRESSED_CACHE, @@ -65,9 +66,10 @@ public: String table; String replica; String replica_zk_path; - bool is_drop_whole_replica; + bool is_drop_whole_replica{}; String storage_policy; String volume; + UInt64 seconds{}; String getID(char) const override { return "SYSTEM query"; } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 7c82c4aca1e..649be7e8fa7 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -343,6 +343,26 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" , ErrorCodes::SYNTAX_ERROR); } + else if (Poco::toLower(getIdentifierName(identifier)) == "position") + { + /// POSITION(needle IN haystack) is equivalent to function position(haystack, needle) + if (const auto * list = expr_list_args->as()) + { + if (list->children.size() == 1) + { + if (const auto * in_func = list->children[0]->as()) + { + if (in_func->name == "in") + { + // switch the two arguments + const auto & arg_list = in_func->arguments->as(); + if (arg_list.children.size() == 2) + expr_list_args->children = {arg_list.children[1], arg_list.children[0]}; + } + } + } + } + } /// The parametric aggregate function has two lists (parameters and arguments) in parentheses. Example: quantile(0.9)(x). if (allow_function_parameters && pos->type == TokenType::OpeningRoundBracket) @@ -1427,6 +1447,8 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e is_strict = true; ASTs identifiers; + ASTPtr regex_node; + ParserStringLiteral regex; auto parse_id = [&identifiers, &pos, &expected] { ASTPtr identifier; @@ -1441,7 +1463,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e { // support one or more parameter ++pos; - if (!ParserList::parseUtil(pos, expected, parse_id, false)) + if (!ParserList::parseUtil(pos, expected, parse_id, false) && !regex.parse(pos, regex_node, expected)) return false; if (pos->type != TokenType::ClosingRoundBracket) @@ -1451,12 +1473,15 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e else { // only one parameter - if (!parse_id()) + if (!parse_id() && !regex.parse(pos, regex_node, expected)) return false; } auto res = std::make_shared(); - res->children = std::move(identifiers); + if (regex_node) + res->setPattern(regex_node->as().value.get()); + else + res->children = std::move(identifiers); res->is_strict = is_strict; node = std::move(res); return true; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index f50cf71f54d..afe85f069c7 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -18,6 +18,8 @@ const char * ParserMultiplicativeExpression::operators[] = "*", "multiply", "/", "divide", "%", "modulo", + "MOD", "modulo", + "DIV", "intDiv", nullptr }; diff --git a/src/Parsers/New/ParseTreeVisitor.cpp b/src/Parsers/New/ParseTreeVisitor.cpp index 2179e44d78b..a7c7a2758eb 100644 --- a/src/Parsers/New/ParseTreeVisitor.cpp +++ b/src/Parsers/New/ParseTreeVisitor.cpp @@ -34,7 +34,6 @@ // Include last, because antlr-runtime undefines EOF macros, which is required in boost multiprecision numbers. #include - namespace DB { @@ -114,15 +113,17 @@ antlrcpp::Any ParseTreeVisitor::visitShowTablesStmt(ClickHouseParser::ShowTables auto and_args = PtrTo(new ColumnExprList{ColumnExpr::createLiteral(Literal::createNumber("1"))}); + auto current_database = ColumnExpr::createLiteral(Literal::createString(current_database_name)); if (ctx->databaseIdentifier()) { - auto database = std::make_shared(nullptr, std::make_shared("database")); - auto args = PtrTo(new ColumnExprList{ - ColumnExpr::createIdentifier(database), - ColumnExpr::createLiteral(Literal::createString(visit(ctx->databaseIdentifier()).as>()->getName())) - }); - and_args->push(ColumnExpr::createFunction(std::make_shared("equals"), nullptr, args)); + current_database = ColumnExpr::createLiteral(Literal::createString(visit(ctx->databaseIdentifier()).as>()->getName())); } + auto database = std::make_shared(nullptr, std::make_shared("database")); + auto equals_args = PtrTo(new ColumnExprList{ + ColumnExpr::createIdentifier(database), + current_database + }); + and_args->push(ColumnExpr::createFunction(std::make_shared("equals"), nullptr, equals_args)); if (ctx->LIKE()) { diff --git a/src/Parsers/New/ParseTreeVisitor.h b/src/Parsers/New/ParseTreeVisitor.h index 4f6d93f0327..8301cb5c5b8 100644 --- a/src/Parsers/New/ParseTreeVisitor.h +++ b/src/Parsers/New/ParseTreeVisitor.h @@ -7,7 +7,9 @@ namespace DB { class ParseTreeVisitor : public ClickHouseParserVisitor { + const String & current_database_name; public: + explicit ParseTreeVisitor(const String & database_name) : ClickHouseParserVisitor(), current_database_name(database_name) {} virtual ~ParseTreeVisitor() override = default; // Top-level statements diff --git a/src/Parsers/New/parseQuery.cpp b/src/Parsers/New/parseQuery.cpp index af334717392..c66772385ca 100644 --- a/src/Parsers/New/parseQuery.cpp +++ b/src/Parsers/New/parseQuery.cpp @@ -12,7 +12,6 @@ #include - namespace DB { @@ -20,7 +19,7 @@ using namespace antlr4; using namespace AST; // For testing only -PtrTo parseQuery(const String & query) +PtrTo parseQuery(const String & query, const String & current_database) { ANTLRInputStream input(query); ClickHouseLexer lexer(&input); @@ -34,12 +33,12 @@ PtrTo parseQuery(const String & query) lexer.addErrorListener(&lexer_error_listener); parser.addErrorListener(&parser_error_listener); - ParseTreeVisitor visitor; + ParseTreeVisitor visitor { current_database }; return visitor.visit(parser.queryStmt()); } -ASTPtr parseQuery(const char * begin, const char * end, size_t, size_t) +ASTPtr parseQuery(const char * begin, const char * end, size_t, size_t, const String & current_database) { // TODO: do not ignore |max_parser_depth|. @@ -60,7 +59,7 @@ ASTPtr parseQuery(const char * begin, const char * end, size_t, size_t) lexer.addErrorListener(&lexer_error_listener); parser.addErrorListener(&parser_error_listener); - ParseTreeVisitor visitor; + ParseTreeVisitor visitor { current_database }; PtrTo new_ast = visitor.visit(parser.queryStmt()); auto old_ast = new_ast->convertToOld(); diff --git a/src/Parsers/New/parseQuery.h b/src/Parsers/New/parseQuery.h index ae86dc32eda..8d9c8efd337 100644 --- a/src/Parsers/New/parseQuery.h +++ b/src/Parsers/New/parseQuery.h @@ -2,14 +2,13 @@ #include #include - +#include namespace DB { // Compatibility interface - -AST::PtrTo parseQuery(const std::string & query); -ASTPtr parseQuery(const char * begin, const char * end, size_t max_query_size, size_t max_parser_depth); +AST::PtrTo parseQuery(const std::string & query, const String & current_database); +ASTPtr parseQuery(const char * begin, const char * end, size_t max_query_size, size_t max_parser_depth, const String & current_database); } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 8e6040169c9..552ea9df320 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -228,7 +228,8 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E { if (primary_key) { - throw Exception("Multiple primary keys are not allowed.", ErrorCodes::BAD_ARGUMENTS); + /// Multiple primary keys are not allowed. + return false; } primary_key = elem; } diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index b6a90b348a0..491037da9a9 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -169,6 +169,20 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & parseDatabaseAndTableName(pos, expected, res->database, res->table); break; + case Type::SUSPEND: + { + ASTPtr seconds; + if (!(ParserKeyword{"FOR"}.ignore(pos, expected) + && ParserUnsignedInteger().parse(pos, seconds, expected) + && ParserKeyword{"SECOND"}.ignore(pos, expected))) /// SECOND, not SECONDS to be consistent with INTERVAL parsing in SQL + { + return false; + } + + res->seconds = seconds->as()->value.get(); + break; + } + default: /// There are no [db.table] after COMMAND NAME break; diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 2f0ef603022..88649d9ca25 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -5,11 +5,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_) : IProcessor({header_, header_, header_}, {}), out(out_) { @@ -35,7 +30,7 @@ IOutputFormat::Status IOutputFormat::prepare() if (!input.hasData()) return Status::NeedData; - current_chunk = input.pullData(true); + current_chunk = input.pull(true); current_block_kind = kind; has_input = true; return Status::Ready; @@ -49,31 +44,23 @@ IOutputFormat::Status IOutputFormat::prepare() return Status::Finished; } -static Port::Data prepareTotals(Port::Data data) +static Chunk prepareTotals(Chunk chunk) { - if (data.exception) - return data; - - if (!data.chunk.hasRows()) + if (!chunk.hasRows()) return {}; - if (data.chunk.getNumRows() > 1) + if (chunk.getNumRows() > 1) { /// This may happen if something like ARRAY JOIN was executed on totals. /// Skip rows except the first one. - auto columns = data.chunk.detachColumns(); + auto columns = chunk.detachColumns(); for (auto & column : columns) column = column->cut(0, 1); - data.chunk.setColumns(std::move(columns), 1); + chunk.setColumns(std::move(columns), 1); } - return data; -} - -void IOutputFormat::consume(Chunk) -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method consume s not implemented for {}", getName()); + return chunk; } void IOutputFormat::work() @@ -97,24 +84,17 @@ void IOutputFormat::work() switch (current_block_kind) { case Main: - { - result_rows += current_chunk.chunk.getNumRows(); - result_bytes += current_chunk.chunk.allocatedBytes(); + result_rows += current_chunk.getNumRows(); + result_bytes += current_chunk.allocatedBytes(); consume(std::move(current_chunk)); break; - } case Totals: - { - auto totals = prepareTotals(std::move(current_chunk)); - if (totals.exception || totals.chunk) + if (auto totals = prepareTotals(std::move(current_chunk))) consumeTotals(std::move(totals)); break; - } case Extremes: - { consumeExtremes(std::move(current_chunk)); break; - } } if (auto_flush) diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index c1a326ee16e..4c2b3f30070 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -28,7 +28,7 @@ public: protected: WriteBuffer & out; - Port::Data current_chunk; + Chunk current_chunk; PortKind current_block_kind = PortKind::Main; bool has_input = false; bool finished = false; @@ -41,14 +41,9 @@ protected: friend class ParallelFormattingOutputFormat; - virtual void consume(Chunk); + virtual void consume(Chunk) = 0; virtual void consumeTotals(Chunk) {} virtual void consumeExtremes(Chunk) {} - - virtual void consume(Port::Data data) { consume(data.getChunkOrTrow()); } - virtual void consumeTotals(Port::Data data) { consumeTotals(data.getChunkOrTrow()); } - virtual void consumeExtremes(Port::Data data) { consumeExtremes(data.getChunkOrTrow()); } - virtual void finalize() {} public: @@ -84,19 +79,8 @@ public: virtual void doWritePrefix() {} virtual void doWriteSuffix() { finalize(); } - void setTotals(const Block & totals) - { - Port::Data data; - data.chunk = Chunk(totals.getColumns(), totals.rows()); - consumeTotals(std::move(data)); - } - - void setExtremes(const Block & extremes) - { - Port::Data data; - data.chunk = Chunk(extremes.getColumns(), extremes.rows()); - consumeExtremes(std::move(data)); - } + void setTotals(const Block & totals) { consumeTotals(Chunk(totals.getColumns(), totals.rows())); } + void setExtremes(const Block & extremes) { consumeExtremes(Chunk(extremes.getColumns(), extremes.rows())); } size_t getResultRows() const { return result_rows; } size_t getResultBytes() const { return result_bytes; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 8a707ae6554..720b606be4f 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -96,7 +96,7 @@ StringRef JSONEachRowRowInputFormat::readColumnName(ReadBuffer & buf) { // This is just an optimization: try to avoid copying the name into current_column_name - if (nested_prefix_length == 0 && buf.position() + 1 < buf.buffer().end()) + if (nested_prefix_length == 0 && !buf.eof() && buf.position() + 1 < buf.buffer().end()) { char * next_pos = find_first_symbols<'\\', '"'>(buf.position() + 1, buf.buffer().end()); diff --git a/src/Processors/Formats/LazyOutputFormat.cpp b/src/Processors/Formats/LazyOutputFormat.cpp index 72996de9593..46287d1cce9 100644 --- a/src/Processors/Formats/LazyOutputFormat.cpp +++ b/src/Processors/Formats/LazyOutputFormat.cpp @@ -15,24 +15,24 @@ Chunk LazyOutputFormat::getChunk(UInt64 milliseconds) return {}; } - Port::Data data; - if (!queue.tryPop(data, milliseconds)) + Chunk chunk; + if (!queue.tryPop(chunk, milliseconds)) return {}; - if (!data.exception) - info.update(data.chunk.getNumRows(), data.chunk.allocatedBytes()); + if (chunk) + info.update(chunk.getNumRows(), chunk.allocatedBytes()); - return data.getChunkOrTrow(); + return chunk; } Chunk LazyOutputFormat::getTotals() { - return totals.getChunkOrTrow(); + return std::move(totals); } Chunk LazyOutputFormat::getExtremes() { - return extremes.getChunkOrTrow(); + return std::move(extremes); } void LazyOutputFormat::setRowsBeforeLimit(size_t rows_before_limit) diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 9f24e54735c..06ec116f3dd 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -37,28 +37,28 @@ public: } protected: - void consume(Port::Data data) override + void consume(Chunk chunk) override { if (!finished_processing) - queue.emplace(std::move(data)); + queue.emplace(std::move(chunk)); } - void consumeTotals(Port::Data data) override { totals = std::move(data); } - void consumeExtremes(Port::Data data) override { extremes = std::move(data); } + void consumeTotals(Chunk chunk) override { totals = std::move(chunk); } + void consumeExtremes(Chunk chunk) override { extremes = std::move(chunk); } void finalize() override { finished_processing = true; /// In case we are waiting for result. - queue.emplace(Port::Data{}); + queue.emplace(Chunk()); } private: - ConcurrentBoundedQueue queue; - Port::Data totals; - Port::Data extremes; + ConcurrentBoundedQueue queue; + Chunk totals; + Chunk extremes; /// Is not used. static WriteBuffer out; diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index e680304ccee..ec1bdc59683 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include diff --git a/src/Processors/Port.h b/src/Processors/Port.h index c7401a18afe..ac71c394518 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -60,14 +60,6 @@ protected: /// Note: std::variant can be used. But move constructor for it can't be inlined. Chunk chunk; std::exception_ptr exception; - - Chunk getChunkOrTrow() - { - if (exception) - std::rethrow_exception(std::move(exception)); - - return std::move(chunk); - } }; private: @@ -311,7 +303,12 @@ public: Chunk ALWAYS_INLINE pull(bool set_not_needed = false) { - return pullData(set_not_needed).getChunkOrTrow(); + auto data_ = pullData(set_not_needed); + + if (data_.exception) + std::rethrow_exception(data_.exception); + + return std::move(data_.chunk); } bool ALWAYS_INLINE isFinished() const diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 6493302a807..12d1a0249b7 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1181,7 +1181,7 @@ void TCPHandler::receiveUnexpectedData() std::shared_ptr maybe_compressed_in; if (last_block_in.compression == Protocol::Compression::Enable) - maybe_compressed_in = std::make_shared(*in); + maybe_compressed_in = std::make_shared(*in, /* allow_different_codecs */ true); else maybe_compressed_in = in; @@ -1198,8 +1198,11 @@ void TCPHandler::initBlockInput() { if (!state.block_in) { + /// 'allow_different_codecs' is set to true, because some parts of compressed data can be precompressed in advance + /// with another codec that the rest of the data. Example: data sent by Distributed tables. + if (state.compression == Protocol::Compression::Enable) - state.maybe_compressed_in = std::make_shared(*in); + state.maybe_compressed_in = std::make_shared(*in, /* allow_different_codecs */ true); else state.maybe_compressed_in = in; diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 5d089eb9f80..ade75506a38 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -6,8 +6,9 @@ #include #include #include -#include #include +#include +#include #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include #include #include @@ -80,18 +82,26 @@ namespace StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor( - StorageDistributed & storage_, std::string path_, ConnectionPoolPtr pool_, ActionBlocker & monitor_blocker_, BackgroundSchedulePool & bg_pool) + StorageDistributed & storage_, + const DiskPtr & disk_, + const std::string & relative_path_, + ConnectionPoolPtr pool_, + ActionBlocker & monitor_blocker_, + BackgroundSchedulePool & bg_pool) : storage(storage_) , pool(std::move(pool_)) - , path{path_ + '/'} + , disk(disk_) + , relative_path(relative_path_) + , path(disk->getPath() + relative_path + '/') , should_batch_inserts(storage.global_context.getSettingsRef().distributed_directory_monitor_batch_inserts) + , dir_fsync(storage.getDistributedSettingsRef().fsync_directories) , min_batched_block_size_rows(storage.global_context.getSettingsRef().min_insert_block_size_rows) , min_batched_block_size_bytes(storage.global_context.getSettingsRef().min_insert_block_size_bytes) - , current_batch_file_path{path + "current_batch.txt"} - , default_sleep_time{storage.global_context.getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()} - , sleep_time{default_sleep_time} - , max_sleep_time{storage.global_context.getSettingsRef().distributed_directory_monitor_max_sleep_time_ms.totalMilliseconds()} - , log{&Poco::Logger::get(getLoggerName())} + , current_batch_file_path(path + "current_batch.txt") + , default_sleep_time(storage.global_context.getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()) + , sleep_time(default_sleep_time) + , max_sleep_time(storage.global_context.getSettingsRef().distributed_directory_monitor_max_sleep_time_ms.totalMilliseconds()) + , log(&Poco::Logger::get(getLoggerName())) , monitor_blocker(monitor_blocker_) , metric_pending_files(CurrentMetrics::DistributedFilesToInsert, 0) { @@ -134,6 +144,10 @@ void StorageDistributedDirectoryMonitor::shutdownAndDropAllData() task_handle->deactivate(); } + std::optional dir_sync_guard; + if (dir_fsync) + dir_sync_guard.emplace(disk, relative_path); + Poco::File(path).remove(true); } @@ -337,6 +351,10 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa throw; } + std::optional dir_sync_guard; + if (dir_fsync) + dir_sync_guard.emplace(disk, relative_path); + Poco::File{file_path}.remove(); metric_pending_files.sub(); @@ -444,10 +462,16 @@ struct StorageDistributedDirectoryMonitor::Batch StorageDistributedDirectoryMonitor & parent; const std::map & file_index_to_path; + bool fsync = false; + bool dir_fsync = false; + Batch( StorageDistributedDirectoryMonitor & parent_, const std::map & file_index_to_path_) - : parent(parent_), file_index_to_path(file_index_to_path_) + : parent(parent_) + , file_index_to_path(file_index_to_path_) + , fsync(parent.storage.getDistributedSettingsRef().fsync_after_insert) + , dir_fsync(parent.dir_fsync) {} bool isEnoughSize() const @@ -474,12 +498,20 @@ struct StorageDistributedDirectoryMonitor::Batch /// Temporary file is required for atomicity. String tmp_file{parent.current_batch_file_path + ".tmp"}; + std::optional dir_sync_guard; + if (dir_fsync) + dir_sync_guard.emplace(parent.disk, parent.relative_path); + if (Poco::File{tmp_file}.exists()) LOG_ERROR(parent.log, "Temporary file {} exists. Unclean shutdown?", backQuote(tmp_file)); { WriteBufferFromFile out{tmp_file, O_WRONLY | O_TRUNC | O_CREAT}; writeText(out); + + out.finalize(); + if (fsync) + out.sync(); } Poco::File{tmp_file}.renameTo(parent.current_batch_file_path); @@ -537,6 +569,10 @@ struct StorageDistributedDirectoryMonitor::Batch { LOG_TRACE(parent.log, "Sent a batch of {} files.", file_indices.size()); + std::optional dir_sync_guard; + if (dir_fsync) + dir_sync_guard.emplace(parent.disk, parent.relative_path); + for (UInt64 file_index : file_indices) Poco::File{file_index_to_path.at(file_index)}.remove(); } @@ -734,10 +770,16 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map metric_pending_files.sub(batch.file_indices.size()); } - /// current_batch.txt will not exist if there was no send - /// (this is the case when all batches that was pending has been marked as pending) - if (Poco::File{current_batch_file_path}.exists()) - Poco::File{current_batch_file_path}.remove(); + { + std::optional dir_sync_guard; + if (dir_fsync) + dir_sync_guard.emplace(disk, relative_path); + + /// current_batch.txt will not exist if there was no send + /// (this is the case when all batches that was pending has been marked as pending) + if (Poco::File{current_batch_file_path}.exists()) + Poco::File{current_batch_file_path}.remove(); + } } bool StorageDistributedDirectoryMonitor::isFileBrokenErrorCode(int code) @@ -759,6 +801,15 @@ void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_p const auto & broken_file_path = broken_path + file_name; Poco::File{broken_path}.createDirectory(); + + std::optional dir_sync_guard; + std::optional broken_dir_sync_guard; + if (dir_fsync) + { + broken_dir_sync_guard.emplace(disk, relative_path + "/broken/"); + dir_sync_guard.emplace(disk, relative_path); + } + Poco::File{file_path}.renameTo(broken_file_path); LOG_ERROR(log, "Renamed `{}` to `{}`", file_path, broken_file_path); @@ -781,14 +832,15 @@ std::string StorageDistributedDirectoryMonitor::getLoggerName() const return storage.getStorageID().getFullTableName() + ".DirectoryMonitor"; } -void StorageDistributedDirectoryMonitor::updatePath(const std::string & new_path) +void StorageDistributedDirectoryMonitor::updatePath(const std::string & new_relative_path) { task_handle->deactivate(); std::lock_guard lock{mutex}; { std::unique_lock metrics_lock(metrics_mutex); - path = new_path; + relative_path = new_relative_path; + path = disk->getPath() + relative_path + '/'; } current_batch_file_path = path + "current_batch.txt"; diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h index 1d34357b3b1..a6175b44d7b 100644 --- a/src/Storages/Distributed/DirectoryMonitor.h +++ b/src/Storages/Distributed/DirectoryMonitor.h @@ -14,6 +14,9 @@ namespace CurrentMetrics { class Increment; } namespace DB { +class IDisk; +using DiskPtr = std::shared_ptr; + class StorageDistributed; class ActionBlocker; class BackgroundSchedulePool; @@ -25,13 +28,18 @@ class StorageDistributedDirectoryMonitor { public: StorageDistributedDirectoryMonitor( - StorageDistributed & storage_, std::string path_, ConnectionPoolPtr pool_, ActionBlocker & monitor_blocker_, BackgroundSchedulePool & bg_pool); + StorageDistributed & storage_, + const DiskPtr & disk_, + const std::string & relative_path_, + ConnectionPoolPtr pool_, + ActionBlocker & monitor_blocker_, + BackgroundSchedulePool & bg_pool); ~StorageDistributedDirectoryMonitor(); static ConnectionPoolPtr createPool(const std::string & name, const StorageDistributed & storage); - void updatePath(const std::string & new_path); + void updatePath(const std::string & new_relative_path); void flushAllData(); @@ -70,9 +78,13 @@ private: StorageDistributed & storage; const ConnectionPoolPtr pool; + + DiskPtr disk; + std::string relative_path; std::string path; const bool should_batch_inserts = false; + const bool dir_fsync = false; const size_t min_batched_block_size_rows = 0; const size_t min_batched_block_size_bytes = 0; String current_batch_file_path; diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 040f33ea02e..7299953c88e 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -299,6 +300,10 @@ DistributedBlockOutputStream::runWritingJob(DistributedBlockOutputStream::JobRep const Block & shard_block = (num_shards > 1) ? job.current_shard_block : current_block; const Settings & settings = context.getSettingsRef(); + /// Do not initiate INSERT for empty block. + if (shard_block.rows() == 0) + return; + if (!job.is_local_job || !settings.prefer_localhost_replica) { if (!job.stream) @@ -368,7 +373,8 @@ void DistributedBlockOutputStream::writeSync(const Block & block) const Settings & settings = context.getSettingsRef(); const auto & shards_info = cluster->getShardsInfo(); bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; - size_t start = 0, end = shards_info.size(); + size_t start = 0; + size_t end = shards_info.size(); if (random_shard_insert) { start = storage.getRandomShardIndex(shards_info); @@ -582,19 +588,46 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_ void DistributedBlockOutputStream::writeToShard(const Block & block, const std::vector & dir_names) { + const auto & settings = context.getSettingsRef(); + const auto & distributed_settings = storage.getDistributedSettingsRef(); + + bool fsync = distributed_settings.fsync_after_insert; + bool dir_fsync = distributed_settings.fsync_directories; + + std::string compression_method = Poco::toUpper(settings.network_compression_method.toString()); + std::optional compression_level; + + if (compression_method == "ZSTD") + compression_level = settings.network_zstd_compression_level; + + CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs); + CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level); + /// tmp directory is used to ensure atomicity of transactions /// and keep monitor thread out from reading incomplete data std::string first_file_tmp_path{}; auto reservation = storage.getStoragePolicy()->reserveAndCheck(block.bytes()); - auto disk = reservation->getDisk()->getPath(); + const auto disk = reservation->getDisk(); + auto disk_path = disk->getPath(); auto data_path = storage.getRelativeDataPath(); + auto make_directory_sync_guard = [&](const std::string & current_path) + { + std::unique_ptr guard; + if (dir_fsync) + { + const std::string relative_path(data_path + current_path); + guard = std::make_unique(disk, relative_path); + } + return guard; + }; + auto it = dir_names.begin(); /// on first iteration write block to a temporary directory for subsequent /// hardlinking to ensure the inode is not freed until we're done { - const std::string path(disk + data_path + *it); + const std::string path(disk_path + data_path + *it); Poco::File(path).createDirectory(); const std::string tmp_path(path + "/tmp/"); @@ -606,8 +639,10 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std:: /// Write batch to temporary location { + auto tmp_dir_sync_guard = make_directory_sync_guard(*it + "/tmp/"); + WriteBufferFromFile out{first_file_tmp_path}; - CompressedWriteBuffer compress{out}; + CompressedWriteBuffer compress{out, compression_codec}; NativeBlockOutputStream stream{compress, DBMS_TCP_PROTOCOL_VERSION, block.cloneEmpty()}; /// Prepare the header. @@ -631,22 +666,28 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std:: stream.writePrefix(); stream.write(block); stream.writeSuffix(); + + out.finalize(); + if (fsync) + out.sync(); } // Create hardlink here to reuse increment number const std::string block_file_path(path + '/' + file_name); createHardLink(first_file_tmp_path, block_file_path); + auto dir_sync_guard = make_directory_sync_guard(*it); } ++it; /// Make hardlinks for (; it != dir_names.end(); ++it) { - const std::string path(disk + data_path + *it); + const std::string path(disk_path + data_path + *it); Poco::File(path).createDirectory(); - const std::string block_file_path(path + '/' + toString(storage.file_names_increment.get()) + ".bin"); + const std::string block_file_path(path + '/' + toString(storage.file_names_increment.get()) + ".bin"); createHardLink(first_file_tmp_path, block_file_path); + auto dir_sync_guard = make_directory_sync_guard(*it); } /// remove the temporary file, enabling the OS to reclaim inode after all threads diff --git a/src/Storages/Distributed/DistributedSettings.cpp b/src/Storages/Distributed/DistributedSettings.cpp new file mode 100644 index 00000000000..555aeba7c58 --- /dev/null +++ b/src/Storages/Distributed/DistributedSettings.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) + +void DistributedSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} + diff --git a/src/Storages/Distributed/DistributedSettings.h b/src/Storages/Distributed/DistributedSettings.h new file mode 100644 index 00000000000..9df787428df --- /dev/null +++ b/src/Storages/Distributed/DistributedSettings.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + + +namespace Poco::Util +{ + class AbstractConfiguration; +} + + +namespace DB +{ +class ASTStorage; + +#define LIST_OF_DISTRIBUTED_SETTINGS(M) \ + M(Bool, fsync_after_insert, false, "Do fsync for every inserted. Will decreases performance of inserts (only for async INSERT, i.e. insert_distributed_sync=false)", 0) \ + M(Bool, fsync_directories, false, "Do fsync for temporary directory (that is used for async INSERT only) after all part operations (writes, renames, etc.).", 0) \ + +DECLARE_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) + + +/** Settings for the Distributed family of engines. + */ +struct DistributedSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 42ece547e1c..cfb4c4e9646 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -120,9 +120,6 @@ public: /// Returns true if the storage supports deduplication of inserted data blocks. virtual bool supportsDeduplication() const { return false; } - /// Returns true if the storage supports settings. - virtual bool supportsSettings() const { return false; } - /// Returns true if the blocks shouldn't be pushed to associated views on insert. virtual bool noPushingToViews() const { return false; } diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 8ec8e718011..53871990810 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -36,7 +36,6 @@ class StorageKafka final : public ext::shared_ptr_helper, public I public: std::string getName() const override { return "Kafka"; } - bool supportsSettings() const override { return true; } bool noPushingToViews() const override { return true; } void startup() override; diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f85f9501fcf..5d50f29756c 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -398,7 +398,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( disk->createDirectories(part_download_path); - std::optional sync_guard; + std::optional sync_guard; if (data.getSettings()->fsync_part_directory) sync_guard.emplace(disk, part_download_path); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 104eedf060e..5d0f79f4679 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -835,12 +835,8 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_ String from = getFullRelativePath(); String to = storage.relative_data_path + new_relative_path + "/"; - std::optional sync_guard; - if (storage.getSettings()->fsync_part_directory) - sync_guard.emplace(volume->getDisk(), to); - if (!volume->getDisk()->exists(from)) - throw Exception("Part directory " + fullPath(volume->getDisk(), from) + " doesn't exist. Most likely it is logical error.", ErrorCodes::FILE_DOESNT_EXIST); + throw Exception("Part directory " + fullPath(volume->getDisk(), from) + " doesn't exist. Most likely it is a logical error.", ErrorCodes::FILE_DOESNT_EXIST); if (volume->getDisk()->exists(to)) { @@ -862,6 +858,10 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_ volume->getDisk()->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); volume->getDisk()->moveFile(from, to); relative_path = new_relative_path; + + std::optional sync_guard; + if (storage.getSettings()->fsync_part_directory) + sync_guard.emplace(volume->getDisk(), to); } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 2a52d7d53f0..dd293bf2502 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -1,5 +1,4 @@ #include -#include #include #include diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index a5f7ebce84f..7cdfd8e566d 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -112,13 +112,12 @@ private: String value = zookeeper.get(path + "/" + children.front()); -#if !defined(ARCADIA_BUILD) /// C++20; Replicated tables are unused in Arcadia. if (value.ends_with(suffix)) { handler(); return; } -#endif + if (my_node_it == children.begin()) throw Poco::Exception("Assertion failed in LeaderElection"); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index e5ffe8c025b..52c0b61b977 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -357,7 +357,6 @@ public: || merging_params.mode == MergingParams::VersionedCollapsing; } - bool supportsSettings() const override { return true; } NamesAndTypesList getVirtuals() const override; bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &, const StorageMetadataPtr & metadata_snapshot) const override; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 64a0e7329ee..a735b939cd5 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -780,7 +780,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor gathering_column_names.clear(); } - std::optional sync_guard; + std::optional sync_guard; if (data.getSettings()->fsync_part_directory) sync_guard.emplace(disk, new_part_tmp_path); @@ -910,7 +910,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor if (metadata_snapshot->hasSecondaryIndices()) { const auto & indices = metadata_snapshot->getSecondaryIndices(); - merged_stream = std::make_shared(merged_stream, indices.getSingleExpressionForIndices(metadata_snapshot->getColumns(), data.global_context)); + merged_stream = std::make_shared( + merged_stream, indices.getSingleExpressionForIndices(metadata_snapshot->getColumns(), data.global_context)); merged_stream = std::make_shared(merged_stream); } @@ -921,7 +922,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merging_columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, - data_settings->min_merge_bytes_to_use_direct_io, blocks_are_granules_size}; merged_stream->readPrefix(); @@ -1182,7 +1182,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor disk->createDirectories(new_part_tmp_path); - std::optional sync_guard; + std::optional sync_guard; if (data.getSettings()->fsync_part_directory) sync_guard.emplace(disk, new_part_tmp_path); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index d5e7009efd6..ef3b5eb7d24 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -24,9 +24,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( , plain_file(data_part->volume->getDisk()->writeFile( part_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION, settings.max_compress_block_size, - WriteMode::Rewrite, - settings.estimated_size, - settings.aio_threshold)) + WriteMode::Rewrite)) , plain_hashing(*plain_file) , marks_file(data_part->volume->getDisk()->writeFile( part_path + MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 8e6ffe9ee68..fd3338c8a70 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -39,13 +39,11 @@ MergeTreeDataPartWriterOnDisk::Stream::Stream( const std::string & marks_path_, const std::string & marks_file_extension_, const CompressionCodecPtr & compression_codec_, - size_t max_compress_block_size_, - size_t estimated_size_, - size_t aio_threshold_) : + size_t max_compress_block_size_) : escaped_column_name(escaped_column_name_), data_file_extension{data_file_extension_}, marks_file_extension{marks_file_extension_}, - plain_file(disk_->writeFile(data_path_ + data_file_extension, max_compress_block_size_, WriteMode::Rewrite, estimated_size_, aio_threshold_)), + plain_file(disk_->writeFile(data_path_ + data_file_extension, max_compress_block_size_, WriteMode::Rewrite)), plain_hashing(*plain_file), compressed_buf(plain_hashing, compression_codec_), compressed(compressed_buf), marks_file(disk_->writeFile(marks_path_ + marks_file_extension, 4096, WriteMode::Rewrite)), marks(*marks_file) { @@ -164,8 +162,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() data_part->volume->getDisk(), part_path + stream_name, INDEX_FILE_EXTENSION, part_path + stream_name, marks_file_extension, - default_codec, settings.max_compress_block_size, - 0, settings.aio_threshold)); + default_codec, settings.max_compress_block_size)); skip_indices_aggregators.push_back(index_helper->createIndexAggregator()); skip_index_accumulated_marks.push_back(0); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index a7b84c95e0a..704b38ba6d5 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -56,9 +56,7 @@ public: const std::string & marks_path_, const std::string & marks_file_extension_, const CompressionCodecPtr & compression_codec_, - size_t max_compress_block_size_, - size_t estimated_size_, - size_t aio_threshold_); + size_t max_compress_block_size_); String escaped_column_name; std::string data_file_extension; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index be735104e99..bb3b16d51e8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -80,14 +80,13 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( { const auto & columns = metadata_snapshot->getColumns(); for (const auto & it : columns_list) - addStreams(it.name, *it.type, columns.getCodecDescOrDefault(it.name, default_codec), settings.estimated_size); + addStreams(it.name, *it.type, columns.getCodecDescOrDefault(it.name, default_codec)); } void MergeTreeDataPartWriterWide::addStreams( const String & name, const IDataType & type, - const ASTPtr & effective_codec_desc, - size_t estimated_size) + const ASTPtr & effective_codec_desc) { IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & substream_type) { @@ -109,9 +108,7 @@ void MergeTreeDataPartWriterWide::addStreams( part_path + stream_name, DATA_FILE_EXTENSION, part_path + stream_name, marks_file_extension, compression_codec, - settings.max_compress_block_size, - estimated_size, - settings.aio_threshold); + settings.max_compress_block_size); }; IDataType::SubstreamPath stream_path; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 8c76c10abef..d897503a033 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -85,8 +85,7 @@ private: void addStreams( const String & name, const IDataType & type, - const ASTPtr & effective_codec_desc, - size_t estimated_size); + const ASTPtr & effective_codec_desc); /// Method for self check (used in debug-build only). Checks that written /// data and corresponding marks are consistent. Otherwise throws logical diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 98a08abab65..4e1f307137a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -59,6 +59,7 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; extern const int TOO_MANY_ROWS; extern const int CANNOT_PARSE_TEXT; + extern const int TOO_MANY_PARTITIONS; } @@ -706,6 +707,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( if (parts_with_ranges.empty()) return std::make_unique(); + auto max_partitions_to_read + = settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data.getSettings()->max_partitions_to_read; + if (max_partitions_to_read > 0) + { + std::set partitions; + for (auto & part_with_ranges : parts_with_ranges) + partitions.insert(part_with_ranges.data_part->info.partition_id); + if (partitions.size() > size_t(max_partitions_to_read)) + throw Exception( + ErrorCodes::TOO_MANY_PARTITIONS, + "Too many partitions to read. Current {}, max {}", + partitions.size(), + max_partitions_to_read); + } + ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size()); ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges); ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c93d4bceba0..de4d70d5e3e 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include @@ -362,7 +362,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->is_temp = true; - std::optional sync_guard; + std::optional sync_guard; if (new_data_part->isStoredOnDisk()) { /// The name could be non-unique in case of stale files from previous runs. diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 2dec16e7d10..9e315c08681 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -24,7 +24,6 @@ struct MergeTreeWriterSettings const Settings & global_settings, const MergeTreeSettingsPtr & storage_settings, bool can_use_adaptive_granularity_, - size_t aio_threshold_, bool rewrite_primary_key_, bool blocks_are_granules_size_ = false) : min_compress_block_size( @@ -32,7 +31,6 @@ struct MergeTreeWriterSettings , max_compress_block_size( storage_settings->max_compress_block_size ? storage_settings->max_compress_block_size : global_settings.max_compress_block_size) - , aio_threshold(aio_threshold_) , can_use_adaptive_granularity(can_use_adaptive_granularity_) , rewrite_primary_key(rewrite_primary_key_) , blocks_are_granules_size(blocks_are_granules_size_) @@ -41,14 +39,9 @@ struct MergeTreeWriterSettings size_t min_compress_block_size; size_t max_compress_block_size; - size_t aio_threshold; bool can_use_adaptive_granularity; bool rewrite_primary_key; bool blocks_are_granules_size; - - /// Used for AIO threshold comparison - /// FIXME currently doesn't work because WriteBufferAIO contain obscure bug(s) - size_t estimated_size = 0; }; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 9b344d19f8b..3e811336fc6 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -110,6 +110,7 @@ struct Settings; M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \ M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \ M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \ + M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \ \ /** Obsolete settings. Kept for backward compatibility only. */ \ M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \ diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index feaf46194d1..1605ec693cb 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -21,25 +21,6 @@ MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec_, bool blocks_are_granules_size) - : MergedBlockOutputStream( - data_part, - metadata_snapshot_, - columns_list_, - skip_indices, - default_codec_, - data_part->storage.global_context.getSettings().min_bytes_to_use_direct_io, - blocks_are_granules_size) -{ -} - -MergedBlockOutputStream::MergedBlockOutputStream( - const MergeTreeDataPartPtr & data_part, - const StorageMetadataPtr & metadata_snapshot_, - const NamesAndTypesList & columns_list_, - const MergeTreeIndices & skip_indices, - CompressionCodecPtr default_codec_, - size_t aio_threshold, - bool blocks_are_granules_size) : IMergedBlockOutputStream(data_part, metadata_snapshot_) , columns_list(columns_list_) , default_codec(default_codec_) @@ -48,7 +29,6 @@ MergedBlockOutputStream::MergedBlockOutputStream( storage.global_context.getSettings(), storage.getSettings(), data_part->index_granularity_info.is_adaptive, - aio_threshold, /* rewrite_primary_key = */ true, blocks_are_granules_size); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 3db0e45d207..d04df598218 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -21,15 +21,6 @@ public: CompressionCodecPtr default_codec_, bool blocks_are_granules_size = false); - MergedBlockOutputStream( - const MergeTreeDataPartPtr & data_part, - const StorageMetadataPtr & metadata_snapshot_, - const NamesAndTypesList & columns_list_, - const MergeTreeIndices & skip_indices, - CompressionCodecPtr default_codec_, - size_t aio_threshold, - bool blocks_are_granules_size = false); - Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } /// If the data is pre-sorted. diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 87f67ee92a5..41479f104f3 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -28,7 +28,6 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( global_settings, storage_settings, index_granularity_info ? index_granularity_info->is_adaptive : data_part->storage.canUseAdaptiveGranularity(), - global_settings.min_bytes_to_use_direct_io, /* rewrite_primary_key = */false); writer = data_part->getWriter( diff --git a/src/Storages/MergeTree/ReplicatedFetchList.h b/src/Storages/MergeTree/ReplicatedFetchList.h index 81d538abf9c..0ab631e53b4 100644 --- a/src/Storages/MergeTree/ReplicatedFetchList.h +++ b/src/Storages/MergeTree/ReplicatedFetchList.h @@ -3,9 +3,9 @@ #include #include #include -#include #include + namespace CurrentMetrics { extern const Metric ReplicatedFetch; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 6d5fab744a5..00ef3ee7292 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -12,7 +12,6 @@ #include #include -#include namespace DB diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index a2429cead3d..0854cc3653c 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -756,6 +756,7 @@ void registerStorageMergeTree(StorageFactory & factory) .supports_skipping_indices = true, .supports_sort_order = true, .supports_ttl = true, + .supports_parallel_insert = true, }; factory.registerStorage("MergeTree", create, features); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index a46da6072af..893c5167a97 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -29,7 +29,6 @@ class StorageRabbitMQ final: public ext::shared_ptr_helper, pub public: std::string getName() const override { return "RabbitMQ"; } - bool supportsSettings() const override { return true; } bool noPushingToViews() const override { return true; } void startup() override; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 80b25793806..249026d1011 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -367,6 +367,7 @@ void registerStorageEmbeddedRocksDB(StorageFactory & factory) { StorageFactory::StorageFeatures features{ .supports_sort_order = true, + .supports_parallel_insert = true, }; factory.registerStorage("EmbeddedRocksDB", create, features); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index ba95530a9a6..13bab7a00d9 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -996,6 +996,9 @@ void registerStorageBuffer(StorageFactory & factory) StorageBuffer::Thresholds{max_time, max_rows, max_bytes}, destination_id, static_cast(args.local_context.getSettingsRef().insert_allow_materialized_columns)); + }, + { + .supports_parallel_insert = true, }); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 4ce7efb60b4..afd7d6b876e 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -363,6 +363,7 @@ StorageDistributed::StorageDistributed( const ASTPtr & sharding_key_, const String & storage_policy_name_, const String & relative_data_path_, + const DistributedSettings & distributed_settings_, bool attach_, ClusterPtr owned_cluster_) : IStorage(id_) @@ -374,6 +375,7 @@ StorageDistributed::StorageDistributed( , cluster_name(global_context.getMacros()->expand(cluster_name_)) , has_sharding_key(sharding_key_) , relative_data_path(relative_data_path_) + , distributed_settings(distributed_settings_) , rng(randomSeed()) { StorageInMemoryMetadata storage_metadata; @@ -417,9 +419,10 @@ StorageDistributed::StorageDistributed( const ASTPtr & sharding_key_, const String & storage_policy_name_, const String & relative_data_path_, + const DistributedSettings & distributed_settings_, bool attach, ClusterPtr owned_cluster_) - : StorageDistributed(id_, columns_, constraints_, String{}, String{}, cluster_name_, context_, sharding_key_, storage_policy_name_, relative_data_path_, attach, std::move(owned_cluster_)) + : StorageDistributed(id_, columns_, constraints_, String{}, String{}, cluster_name_, context_, sharding_key_, storage_policy_name_, relative_data_path_, distributed_settings_, attach, std::move(owned_cluster_)) { remote_table_function_ptr = std::move(remote_table_function_ptr_); } @@ -540,7 +543,7 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta /// Ban an attempt to make async insert into the table belonging to DatabaseMemory if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync) { - throw Exception("Storage " + getName() + " must has own data directory to enable asynchronous inserts", + throw Exception("Storage " + getName() + " must have own data directory to enable asynchronous inserts", ErrorCodes::BAD_ARGUMENTS); } @@ -558,8 +561,10 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta /// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster return std::make_shared( - context, *this, metadata_snapshot, createInsertToRemoteTableQuery(remote_database, remote_table, metadata_snapshot->getSampleBlockNonMaterialized()), cluster, - insert_sync, timeout); + context, *this, metadata_snapshot, + createInsertToRemoteTableQuery( + remote_database, remote_table, metadata_snapshot->getSampleBlockNonMaterialized()), + cluster, insert_sync, timeout); } @@ -599,7 +604,7 @@ void StorageDistributed::startup() return; for (const DiskPtr & disk : data_volume->getDisks()) - createDirectoryMonitors(disk->getPath()); + createDirectoryMonitors(disk); for (const String & path : getDataPaths()) { @@ -679,9 +684,9 @@ StoragePolicyPtr StorageDistributed::getStoragePolicy() const return storage_policy; } -void StorageDistributed::createDirectoryMonitors(const std::string & disk) +void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk) { - const std::string path(disk + relative_data_path); + const std::string path(disk->getPath() + relative_data_path); Poco::File{path}.createDirectories(); std::filesystem::directory_iterator begin(path); @@ -712,10 +717,10 @@ void StorageDistributed::createDirectoryMonitors(const std::string & disk) } -StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(const std::string & disk, const std::string & name) +StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(const DiskPtr & disk, const std::string & name) { - const std::string path(disk + relative_data_path + name); - const std::string key(disk + name); + const std::string & disk_path = disk->getPath(); + const std::string key(disk_path + name); std::lock_guard lock(cluster_nodes_mutex); auto & node_data = cluster_nodes_data[key]; @@ -723,7 +728,10 @@ StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor( { node_data.connection_pool = StorageDistributedDirectoryMonitor::createPool(name, *this); node_data.directory_monitor = std::make_unique( - *this, path, node_data.connection_pool, monitors_blocker, global_context.getDistributedSchedulePool()); + *this, disk, relative_data_path + name, + node_data.connection_pool, + monitors_blocker, + global_context.getDistributedSchedulePool()); } return *node_data.directory_monitor; } @@ -930,7 +938,7 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data) std::lock_guard lock(cluster_nodes_mutex); for (auto & node : cluster_nodes_data) - node.second.directory_monitor->updatePath(new_path); + node.second.directory_monitor->updatePath(new_path_to_table_data); } relative_data_path = new_path_to_table_data; @@ -952,6 +960,8 @@ void registerStorageDistributed(StorageFactory & factory) * - constant expression with string result, like currentDatabase(); * -- string literal as specific case; * - empty string means 'use default database from cluster'. + * + * Distributed engine also supports SETTINGS clause. */ ASTs & engine_args = args.engine_args; @@ -993,6 +1003,13 @@ void registerStorageDistributed(StorageFactory & factory) ", but should be one of integer type", ErrorCodes::TYPE_MISMATCH); } + /// TODO: move some arguments from the arguments to the SETTINGS. + DistributedSettings distributed_settings; + if (args.storage_def->settings) + { + distributed_settings.loadFromQuery(*args.storage_def); + } + return StorageDistributed::create( args.table_id, args.columns, args.constraints, remote_database, remote_table, cluster_name, @@ -1000,9 +1017,12 @@ void registerStorageDistributed(StorageFactory & factory) sharding_key, storage_policy, args.relative_data_path, + distributed_settings, args.attach); }, { + .supports_settings = true, + .supports_parallel_insert = true, .source_access_type = AccessType::REMOTE, }); } diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index ce7e48c85a9..585efafddfb 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -23,6 +24,9 @@ class Context; class IVolume; using VolumePtr = std::shared_ptr; +class IDisk; +using DiskPtr = std::shared_ptr; + class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; @@ -103,9 +107,9 @@ public: std::string getClusterName() const { return cluster_name; } /// Returns empty string if tables is used by TableFunctionRemote /// create directory monitors for each existing subdirectory - void createDirectoryMonitors(const std::string & disk); + void createDirectoryMonitors(const DiskPtr & disk); /// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name - StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const std::string & disk, const std::string & name); + StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const DiskPtr & disk, const std::string & name); /// Return list of metrics for all created monitors /// (note that monitors are created lazily, i.e. until at least one INSERT executed) std::vector getDirectoryMonitorsStatuses() const; @@ -127,6 +131,8 @@ public: size_t getRandomShardIndex(const Cluster::ShardsInfo & shards); + const DistributedSettings & getDistributedSettingsRef() const { return distributed_settings; } + String remote_database; String remote_table; ASTPtr remote_table_function_ptr; @@ -162,6 +168,7 @@ protected: const ASTPtr & sharding_key_, const String & storage_policy_name_, const String & relative_data_path_, + const DistributedSettings & distributed_settings_, bool attach_, ClusterPtr owned_cluster_ = {}); @@ -175,6 +182,7 @@ protected: const ASTPtr & sharding_key_, const String & storage_policy_name_, const String & relative_data_path_, + const DistributedSettings & distributed_settings_, bool attach, ClusterPtr owned_cluster_ = {}); @@ -188,6 +196,8 @@ protected: /// Other volumes will be ignored. It's needed to allow using the same multi-volume policy both for Distributed and other engines. VolumePtr data_volume; + DistributedSettings distributed_settings; + struct ClusterNodeData { std::unique_ptr directory_monitor; diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index de9060769cb..18dd24e10db 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -47,14 +47,20 @@ public: bool has_force_restore_data_flag; }; + /// Analog of the IStorage::supports*() helpers + /// (But the former cannot be replaced with StorageFeatures due to nesting) struct StorageFeatures { bool supports_settings = false; bool supports_skipping_indices = false; bool supports_sort_order = false; bool supports_ttl = false; + /// See also IStorage::supportsReplication() bool supports_replication = false; + /// See also IStorage::supportsDeduplication() bool supports_deduplication = false; + /// See also IStorage::supportsParallelInsert() + bool supports_parallel_insert = false; AccessType source_access_type = AccessType::NONE; }; @@ -85,6 +91,7 @@ public: .supports_ttl = false, .supports_replication = false, .supports_deduplication = false, + .supports_parallel_insert = false, .source_access_type = AccessType::NONE, }); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 85888ee4b6a..a5935ba3bf4 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -52,6 +52,7 @@ namespace ErrorCodes extern const int UNKNOWN_IDENTIFIER; extern const int INCORRECT_FILE_NAME; extern const int FILE_DOESNT_EXIST; + extern const int TIMEOUT_EXCEEDED; extern const int INCOMPATIBLE_COLUMNS; } @@ -215,6 +216,17 @@ StorageFile::StorageFile(CommonArguments args) setInMemoryMetadata(storage_metadata); } + +static std::chrono::seconds getLockTimeout(const Context & context) +{ + const Settings & settings = context.getSettingsRef(); + Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds(); + if (settings.max_execution_time.totalSeconds() != 0 && settings.max_execution_time.totalSeconds() < lock_timeout) + lock_timeout = settings.max_execution_time.totalSeconds(); + return std::chrono::seconds{lock_timeout}; +} + + class StorageFileSource : public SourceWithProgress { public: @@ -261,7 +273,9 @@ public: { if (storage->use_table_fd) { - unique_lock = std::unique_lock(storage->rwlock); + unique_lock = std::unique_lock(storage->rwlock, getLockTimeout(context)); + if (!unique_lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); /// We could use common ReadBuffer and WriteBuffer in storage to leverage cache /// and add ability to seek unseekable files, but cache sync isn't supported. @@ -280,7 +294,9 @@ public: } else { - shared_lock = std::shared_lock(storage->rwlock); + shared_lock = std::shared_lock(storage->rwlock, getLockTimeout(context)); + if (!shared_lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); } } @@ -391,8 +407,8 @@ private: bool finished_generate = false; - std::shared_lock shared_lock; - std::unique_lock unique_lock; + std::shared_lock shared_lock; + std::unique_lock unique_lock; }; @@ -450,13 +466,17 @@ public: explicit StorageFileBlockOutputStream( StorageFile & storage_, const StorageMetadataPtr & metadata_snapshot_, + std::unique_lock && lock_, const CompressionMethod compression_method, const Context & context, const std::optional & format_settings) : storage(storage_) , metadata_snapshot(metadata_snapshot_) - , lock(storage.rwlock) + , lock(std::move(lock_)) { + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + std::unique_ptr naked_buffer = nullptr; if (storage.use_table_fd) { @@ -512,7 +532,7 @@ public: private: StorageFile & storage; StorageMetadataPtr metadata_snapshot; - std::unique_lock lock; + std::unique_lock lock; std::unique_ptr write_buf; BlockOutputStreamPtr writer; bool prefix_written{false}; @@ -533,8 +553,12 @@ BlockOutputStreamPtr StorageFile::write( Poco::File(Poco::Path(path).makeParent()).createDirectories(); } - return std::make_shared(*this, metadata_snapshot, - chooseCompressionMethod(path, compression_method), context, + return std::make_shared( + *this, + metadata_snapshot, + std::unique_lock{rwlock, getLockTimeout(context)}, + chooseCompressionMethod(path, compression_method), + context, format_settings); } @@ -562,8 +586,6 @@ void StorageFile::rename(const String & new_path_to_table_data, const StorageID if (path_new == paths[0]) return; - std::unique_lock lock(rwlock); - Poco::File(Poco::Path(path_new).parent()).createDirectories(); Poco::File(paths[0]).renameTo(path_new); @@ -580,8 +602,6 @@ void StorageFile::truncate( if (paths.size() != 1) throw Exception("Can't truncate table '" + getStorageID().getNameForLogs() + "' in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); - std::unique_lock lock(rwlock); - if (use_table_fd) { if (0 != ::ftruncate(table_fd, 0)) diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 92287c98fc9..c316412f808 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -98,7 +98,7 @@ private: std::atomic table_fd_was_used{false}; /// To detect repeating reads from stdin off_t table_fd_init_offset = -1; /// Initial position of fd, used for repeating reads - mutable std::shared_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Poco::Logger * log = &Poco::Logger::get("StorageFile"); }; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 86cc6afe33f..06e9bb8a2d6 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -39,6 +39,7 @@ namespace DB namespace ErrorCodes { + extern const int TIMEOUT_EXCEEDED; extern const int LOGICAL_ERROR; extern const int DUPLICATE_COLUMN; extern const int SIZES_OF_MARKS_FILES_ARE_INCONSISTENT; @@ -50,7 +51,6 @@ namespace ErrorCodes class LogSource final : public SourceWithProgress { public: - static Block getHeader(const NamesAndTypesList & columns) { Block res; @@ -113,90 +113,6 @@ private: }; -class LogBlockOutputStream final : public IBlockOutputStream -{ -public: - explicit LogBlockOutputStream(StorageLog & storage_, const StorageMetadataPtr & metadata_snapshot_) - : storage(storage_) - , metadata_snapshot(metadata_snapshot_) - , lock(storage.rwlock) - , marks_stream( - storage.disk->writeFile(storage.marks_file_path, 4096, WriteMode::Rewrite)) - { - } - - ~LogBlockOutputStream() override - { - try - { - if (!done) - { - /// Rollback partial writes. - streams.clear(); - storage.file_checker.repair(); - } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } - void write(const Block & block) override; - void writeSuffix() override; - -private: - StorageLog & storage; - StorageMetadataPtr metadata_snapshot; - std::unique_lock lock; - bool done = false; - - struct Stream - { - Stream(const DiskPtr & disk, const String & data_path, CompressionCodecPtr codec, size_t max_compress_block_size) : - plain(disk->writeFile(data_path, max_compress_block_size, WriteMode::Append)), - compressed(*plain, std::move(codec), max_compress_block_size), - plain_offset(disk->getFileSize(data_path)) - { - } - - std::unique_ptr plain; - CompressedWriteBuffer compressed; - - size_t plain_offset; /// How many bytes were in the file at the time the LogBlockOutputStream was created. - - void finalize() - { - compressed.next(); - plain->next(); - } - }; - - using Mark = StorageLog::Mark; - using MarksForColumns = std::vector>; - - using FileStreams = std::map; - FileStreams streams; - - using WrittenStreams = std::set; - - std::unique_ptr marks_stream; /// Declared below `lock` to make the file open when rwlock is captured. - - using SerializeState = IDataType::SerializeBinaryBulkStatePtr; - using SerializeStates = std::map; - SerializeStates serialize_states; - - IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenStreams & written_streams); - - void writeData(const String & name, const IDataType & type, const IColumn & column, - MarksForColumns & out_marks, - WrittenStreams & written_streams); - - void writeMarks(MarksForColumns && marks); -}; - - Chunk LogSource::generate() { Block res; @@ -204,7 +120,7 @@ Chunk LogSource::generate() if (rows_read == rows_limit) return {}; - if (storage.disk->isDirectoryEmpty(storage.table_path)) + if (storage.file_checker.empty()) return {}; /// How many rows to read for the next block. @@ -281,6 +197,101 @@ void LogSource::readData(const String & name, const IDataType & type, IColumn & } +class LogBlockOutputStream final : public IBlockOutputStream +{ +public: + explicit LogBlockOutputStream( + StorageLog & storage_, const StorageMetadataPtr & metadata_snapshot_, std::unique_lock && lock_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , lock(std::move(lock_)) + , marks_stream( + storage.disk->writeFile(storage.marks_file_path, 4096, WriteMode::Rewrite)) + { + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + /// If there were no files, add info to rollback in case of error. + if (storage.file_checker.empty()) + { + for (const auto & file : storage.files) + storage.file_checker.setEmpty(file.second.data_file_path); + storage.file_checker.save(); + } + } + + ~LogBlockOutputStream() override + { + try + { + if (!done) + { + /// Rollback partial writes. + streams.clear(); + storage.file_checker.repair(); + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } + void write(const Block & block) override; + void writeSuffix() override; + +private: + StorageLog & storage; + StorageMetadataPtr metadata_snapshot; + std::unique_lock lock; + bool done = false; + + struct Stream + { + Stream(const DiskPtr & disk, const String & data_path, CompressionCodecPtr codec, size_t max_compress_block_size) : + plain(disk->writeFile(data_path, max_compress_block_size, WriteMode::Append)), + compressed(*plain, std::move(codec), max_compress_block_size), + plain_offset(disk->getFileSize(data_path)) + { + } + + std::unique_ptr plain; + CompressedWriteBuffer compressed; + + size_t plain_offset; /// How many bytes were in the file at the time the LogBlockOutputStream was created. + + void finalize() + { + compressed.next(); + plain->next(); + } + }; + + using Mark = StorageLog::Mark; + using MarksForColumns = std::vector>; + + using FileStreams = std::map; + FileStreams streams; + + using WrittenStreams = std::set; + + std::unique_ptr marks_stream; /// Declared below `lock` to make the file open when rwlock is captured. + + using SerializeState = IDataType::SerializeBinaryBulkStatePtr; + using SerializeStates = std::map; + SerializeStates serialize_states; + + IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenStreams & written_streams); + + void writeData(const String & name, const IDataType & type, const IColumn & column, + MarksForColumns & out_marks, + WrittenStreams & written_streams); + + void writeMarks(MarksForColumns && marks); +}; + + void LogBlockOutputStream::write(const Block & block) { metadata_snapshot->check(block, true); @@ -474,10 +485,6 @@ StorageLog::StorageLog( addFiles(column.name, *column.type); marks_file_path = table_path + DBMS_STORAGE_LOG_MARKS_FILE_NAME; - - if (!attach) - for (const auto & file : files) - file_checker.setEmpty(file.second.data_file_path); } @@ -507,9 +514,11 @@ void StorageLog::addFiles(const String & column_name, const IDataType & type) } -void StorageLog::loadMarks() +void StorageLog::loadMarks(std::chrono::seconds lock_timeout) { - std::unique_lock lock(rwlock); + std::unique_lock lock(rwlock, lock_timeout); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); if (loaded_marks) return; @@ -552,8 +561,6 @@ void StorageLog::rename(const String & new_path_to_table_data, const StorageID & { assert(table_path != new_path_to_table_data); { - std::unique_lock lock(rwlock); - disk->moveDirectory(table_path, new_path_to_table_data); table_path = new_path_to_table_data; @@ -569,8 +576,6 @@ void StorageLog::rename(const String & new_path_to_table_data, const StorageID & void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) { - std::shared_lock lock(rwlock); - files.clear(); file_count = 0; loaded_marks = false; @@ -610,6 +615,17 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount(const StorageMeta return it->second.marks; } + +static std::chrono::seconds getLockTimeout(const Context & context) +{ + const Settings & settings = context.getSettingsRef(); + Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds(); + if (settings.max_execution_time.totalSeconds() != 0 && settings.max_execution_time.totalSeconds() < lock_timeout) + lock_timeout = settings.max_execution_time.totalSeconds(); + return std::chrono::seconds{lock_timeout}; +} + + Pipe StorageLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -620,11 +636,15 @@ Pipe StorageLog::read( unsigned num_streams) { metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); - loadMarks(); + + auto lock_timeout = getLockTimeout(context); + loadMarks(lock_timeout); NamesAndTypesList all_columns = Nested::collect(metadata_snapshot->getColumns().getAllPhysical().addTypes(column_names)); - std::shared_lock lock(rwlock); + std::shared_lock lock(rwlock, lock_timeout); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); Pipes pipes; @@ -653,18 +673,28 @@ Pipe StorageLog::read( max_read_buffer_size)); } + /// No need to hold lock while reading because we read fixed range of data that does not change while appending more data. return Pipe::unitePipes(std::move(pipes)); } -BlockOutputStreamPtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) +BlockOutputStreamPtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context) { - loadMarks(); - return std::make_shared(*this, metadata_snapshot); + auto lock_timeout = getLockTimeout(context); + loadMarks(lock_timeout); + + std::unique_lock lock(rwlock, lock_timeout); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + return std::make_shared(*this, metadata_snapshot, std::move(lock)); } -CheckResults StorageLog::checkData(const ASTPtr & /* query */, const Context & /* context */) +CheckResults StorageLog::checkData(const ASTPtr & /* query */, const Context & context) { - std::shared_lock lock(rwlock); + std::shared_lock lock(rwlock, getLockTimeout(context)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + return file_checker.check(); } diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 51fd334d882..a88b6dfb6ff 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -84,7 +84,7 @@ private: DiskPtr disk; String table_path; - mutable std::shared_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Files files; @@ -105,7 +105,7 @@ private: /// Read marks files if they are not already read. /// It is done lazily, so that with a large number of tables, the server starts quickly. /// You can not call with a write locked `rwlock`. - void loadMarks(); + void loadMarks(std::chrono::seconds lock_timeout); /** For normal columns, the number of rows in the block is specified in the marks. * For array columns and nested structures, there are more than one group of marks that correspond to different files diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 93f00206e6b..8651caecdfa 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -303,6 +303,9 @@ void registerStorageMemory(StorageFactory & factory) ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); return StorageMemory::create(args.table_id, args.columns, args.constraints); + }, + { + .supports_parallel_insert = true, }); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 95873b7f3c1..74df6dd185b 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 3263f124afa..9dd62439814 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -14,7 +14,6 @@ #include #include #include -#include #include diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 499f7329cd9..f324d502834 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -29,6 +29,9 @@ void registerStorageNull(StorageFactory & factory) ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); return StorageNull::create(args.table_id, args.columns, args.constraints); + }, + { + .supports_parallel_insert = true, }); } diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index b7b948af4ba..fed9dd04e76 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -25,7 +25,6 @@ public: bool supportsReplication() const override { return getNested()->supportsReplication(); } bool supportsParallelInsert() const override { return getNested()->supportsParallelInsert(); } bool supportsDeduplication() const override { return getNested()->supportsDeduplication(); } - bool supportsSettings() const override { return getNested()->supportsSettings(); } bool noPushingToViews() const override { return getNested()->noPushingToViews(); } bool hasEvenlyDistributedRead() const override { return getNested()->hasEvenlyDistributedRead(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f1b6c3c7e00..ebf1e43ca04 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2483,23 +2483,23 @@ void StorageReplicatedMergeTree::cloneReplicaIfNeeded(zkutil::ZooKeeperPtr zooke if (get_is_lost.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/is_lost': {}", Coordination::errorMessage(get_is_lost.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/is_lost': {}", source_replica_name, Coordination::errorMessage(get_is_lost.error)); continue; } else if (get_is_lost.data != "0") { - LOG_INFO(log, "Not cloning {}, it's lost"); + LOG_INFO(log, "Not cloning {}, it's lost", source_replica_name); continue; } if (get_log_pointer.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/log_pointer': {}", Coordination::errorMessage(get_log_pointer.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/log_pointer': {}", source_replica_name, Coordination::errorMessage(get_log_pointer.error)); continue; } if (get_queue.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/queue': {}", Coordination::errorMessage(get_queue.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/queue': {}", source_replica_name, Coordination::errorMessage(get_queue.error)); continue; } @@ -4958,8 +4958,13 @@ void StorageReplicatedMergeTree::fetchPartition( const String & from_, const Context & query_context) { - String auxiliary_zookeeper_name = extractZooKeeperName(from_); - String from = extractZooKeeperPath(from_); + Macros::MacroExpansionInfo info; + info.expand_special_macros_only = false; + info.table_id = getStorageID(); + info.table_id.uuid = UUIDHelpers::Nil; + auto expand_from = query_context.getMacros()->expand(from_, info); + String auxiliary_zookeeper_name = extractZooKeeperName(expand_from); + String from = extractZooKeeperPath(expand_from); if (from.empty()) throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index bc6afffddeb..db4fbff78cd 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -44,13 +44,13 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_FILE_NAME; + extern const int TIMEOUT_EXCEEDED; } class StripeLogSource final : public SourceWithProgress { public: - static Block getHeader( StorageStripeLog & storage, const StorageMetadataPtr & metadata_snapshot, @@ -98,6 +98,9 @@ public: protected: Chunk generate() override { + if (storage.file_checker.empty()) + return {}; + Block res; start(); @@ -154,10 +157,11 @@ private: class StripeLogBlockOutputStream final : public IBlockOutputStream { public: - explicit StripeLogBlockOutputStream(StorageStripeLog & storage_, const StorageMetadataPtr & metadata_snapshot_) + explicit StripeLogBlockOutputStream( + StorageStripeLog & storage_, const StorageMetadataPtr & metadata_snapshot_, std::unique_lock && lock_) : storage(storage_) , metadata_snapshot(metadata_snapshot_) - , lock(storage.rwlock) + , lock(std::move(lock_)) , data_out_file(storage.table_path + "data.bin") , data_out_compressed(storage.disk->writeFile(data_out_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append)) , data_out(std::make_unique( @@ -167,6 +171,15 @@ public: , index_out(std::make_unique(*index_out_compressed)) , block_out(*data_out, 0, metadata_snapshot->getSampleBlock(), false, index_out.get(), storage.disk->getFileSize(data_out_file)) { + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + if (storage.file_checker.empty()) + { + storage.file_checker.setEmpty(storage.table_path + "data.bin"); + storage.file_checker.setEmpty(storage.table_path + "index.mrk"); + storage.file_checker.save(); + } } ~StripeLogBlockOutputStream() override @@ -220,7 +233,7 @@ public: private: StorageStripeLog & storage; StorageMetadataPtr metadata_snapshot; - std::unique_lock lock; + std::unique_lock lock; String data_out_file; std::unique_ptr data_out_compressed; @@ -261,9 +274,6 @@ StorageStripeLog::StorageStripeLog( { /// create directories if they do not exist disk->createDirectories(table_path); - - file_checker.setEmpty(table_path + "data.bin"); - file_checker.setEmpty(table_path + "index.mrk"); } else { @@ -283,8 +293,6 @@ void StorageStripeLog::rename(const String & new_path_to_table_data, const Stora { assert(table_path != new_path_to_table_data); { - std::unique_lock lock(rwlock); - disk->moveDirectory(table_path, new_path_to_table_data); table_path = new_path_to_table_data; @@ -294,6 +302,16 @@ void StorageStripeLog::rename(const String & new_path_to_table_data, const Stora } +static std::chrono::seconds getLockTimeout(const Context & context) +{ + const Settings & settings = context.getSettingsRef(); + Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds(); + if (settings.max_execution_time.totalSeconds() != 0 && settings.max_execution_time.totalSeconds() < lock_timeout) + lock_timeout = settings.max_execution_time.totalSeconds(); + return std::chrono::seconds{lock_timeout}; +} + + Pipe StorageStripeLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -303,7 +321,9 @@ Pipe StorageStripeLog::read( const size_t /*max_block_size*/, unsigned num_streams) { - std::shared_lock lock(rwlock); + std::shared_lock lock(rwlock, getLockTimeout(context)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); @@ -342,24 +362,28 @@ Pipe StorageStripeLog::read( } -BlockOutputStreamPtr StorageStripeLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) +BlockOutputStreamPtr StorageStripeLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context) { - return std::make_shared(*this, metadata_snapshot); + std::unique_lock lock(rwlock, getLockTimeout(context)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + return std::make_shared(*this, metadata_snapshot, std::move(lock)); } -CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, const Context & /* context */) +CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, const Context & context) { - std::shared_lock lock(rwlock); + std::shared_lock lock(rwlock, getLockTimeout(context)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + return file_checker.check(); } void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) { - std::shared_lock lock(rwlock); - disk->clearDirectory(table_path); - file_checker = FileChecker{disk, table_path + "sizes.json"}; } diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index 1f30ddc8d8b..5782e2526d3 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -68,7 +68,7 @@ private: size_t max_compress_block_size; FileChecker file_checker; - mutable std::shared_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Poco::Logger * log; }; diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 81eec735c8a..fe8a25ba13b 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -46,6 +47,7 @@ namespace DB namespace ErrorCodes { + extern const int TIMEOUT_EXCEEDED; extern const int DUPLICATE_COLUMN; extern const int INCORRECT_FILE_NAME; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -55,7 +57,6 @@ namespace ErrorCodes class TinyLogSource final : public SourceWithProgress { public: - static Block getHeader(const NamesAndTypesList & columns) { Block res; @@ -66,10 +67,17 @@ public: return Nested::flatten(res); } - TinyLogSource(size_t block_size_, const NamesAndTypesList & columns_, StorageTinyLog & storage_, size_t max_read_buffer_size_) + TinyLogSource( + size_t block_size_, + const NamesAndTypesList & columns_, + StorageTinyLog & storage_, + size_t max_read_buffer_size_, + FileChecker::Map file_sizes_) : SourceWithProgress(getHeader(columns_)) - , block_size(block_size_), columns(columns_), storage(storage_), lock(storage_.rwlock) - , max_read_buffer_size(max_read_buffer_size_) {} + , block_size(block_size_), columns(columns_), storage(storage_) + , max_read_buffer_size(max_read_buffer_size_), file_sizes(std::move(file_sizes_)) + { + } String getName() const override { return "TinyLog"; } @@ -80,19 +88,21 @@ private: size_t block_size; NamesAndTypesList columns; StorageTinyLog & storage; - std::shared_lock lock; bool is_finished = false; size_t max_read_buffer_size; + FileChecker::Map file_sizes; struct Stream { - Stream(const DiskPtr & disk, const String & data_path, size_t max_read_buffer_size_) - : plain(disk->readFile(data_path, std::min(max_read_buffer_size_, disk->getFileSize(data_path)))), - compressed(*plain) + Stream(const DiskPtr & disk, const String & data_path, size_t max_read_buffer_size_, size_t file_size) + : plain(file_size ? disk->readFile(data_path, std::min(max_read_buffer_size_, file_size)) : std::make_unique(nullptr, 0)), + limited(std::make_unique(*plain, file_size, false)), + compressed(*limited) { } std::unique_ptr plain; + std::unique_ptr limited; CompressedReadBuffer compressed; }; @@ -107,79 +117,11 @@ private: }; -class TinyLogBlockOutputStream final : public IBlockOutputStream -{ -public: - explicit TinyLogBlockOutputStream(StorageTinyLog & storage_, const StorageMetadataPtr & metadata_snapshot_) - : storage(storage_), metadata_snapshot(metadata_snapshot_), lock(storage_.rwlock) - { - } - - ~TinyLogBlockOutputStream() override - { - try - { - if (!done) - { - /// Rollback partial writes. - streams.clear(); - storage.file_checker.repair(); - } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } - - void write(const Block & block) override; - void writeSuffix() override; - -private: - StorageTinyLog & storage; - StorageMetadataPtr metadata_snapshot; - std::unique_lock lock; - bool done = false; - - struct Stream - { - Stream(const DiskPtr & disk, const String & data_path, CompressionCodecPtr codec, size_t max_compress_block_size) : - plain(disk->writeFile(data_path, max_compress_block_size, WriteMode::Append)), - compressed(*plain, std::move(codec), max_compress_block_size) - { - } - - std::unique_ptr plain; - CompressedWriteBuffer compressed; - - void finalize() - { - compressed.next(); - plain->finalize(); - } - }; - - using FileStreams = std::map>; - FileStreams streams; - - using SerializeState = IDataType::SerializeBinaryBulkStatePtr; - using SerializeStates = std::map; - SerializeStates serialize_states; - - using WrittenStreams = std::set; - - IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenStreams & written_streams); - void writeData(const String & name, const IDataType & type, const IColumn & column, WrittenStreams & written_streams); -}; - - Chunk TinyLogSource::generate() { Block res; - if (is_finished || (!streams.empty() && streams.begin()->second->compressed.eof())) + if (is_finished || file_sizes.empty() || (!streams.empty() && streams.begin()->second->compressed.eof())) { /** Close the files (before destroying the object). * When many sources are created, but simultaneously reading only a few of them, @@ -190,10 +132,6 @@ Chunk TinyLogSource::generate() return {}; } - /// if there are no files in the folder, it means that the table is empty - if (storage.disk->isDirectoryEmpty(storage.table_path)) - return {}; - for (const auto & name_type : columns) { MutableColumnPtr column = name_type.type->createColumn(); @@ -231,18 +169,104 @@ void TinyLogSource::readData(const String & name, const IDataType & type, IColum String stream_name = IDataType::getFileNameForStream(name, path); if (!streams.count(stream_name)) - streams[stream_name] = std::make_unique(storage.disk, storage.files[stream_name].data_file_path, max_read_buffer_size); + { + String file_path = storage.files[stream_name].data_file_path; + streams[stream_name] = std::make_unique( + storage.disk, file_path, max_read_buffer_size, file_sizes[fileName(file_path)]); + } return &streams[stream_name]->compressed; }; if (deserialize_states.count(name) == 0) - type.deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]); + type.deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]); type.deserializeBinaryBulkWithMultipleStreams(column, limit, settings, deserialize_states[name]); } +class TinyLogBlockOutputStream final : public IBlockOutputStream +{ +public: + explicit TinyLogBlockOutputStream( + StorageTinyLog & storage_, + const StorageMetadataPtr & metadata_snapshot_, + std::unique_lock && lock_) + : storage(storage_), metadata_snapshot(metadata_snapshot_), lock(std::move(lock_)) + { + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + /// If there were no files, add info to rollback in case of error. + if (storage.file_checker.empty()) + { + for (const auto & file : storage.files) + storage.file_checker.setEmpty(file.second.data_file_path); + storage.file_checker.save(); + } + } + + ~TinyLogBlockOutputStream() override + { + try + { + if (!done) + { + /// Rollback partial writes. + LOG_WARNING(storage.log, "Rollback partial writes"); + streams.clear(); + storage.file_checker.repair(); + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } + + void write(const Block & block) override; + void writeSuffix() override; + +private: + StorageTinyLog & storage; + StorageMetadataPtr metadata_snapshot; + std::unique_lock lock; + bool done = false; + + struct Stream + { + Stream(const DiskPtr & disk, const String & data_path, CompressionCodecPtr codec, size_t max_compress_block_size) : + plain(disk->writeFile(data_path, max_compress_block_size, WriteMode::Append)), + compressed(*plain, std::move(codec), max_compress_block_size) + { + } + + std::unique_ptr plain; + CompressedWriteBuffer compressed; + + void finalize() + { + compressed.next(); + plain->finalize(); + } + }; + + using FileStreams = std::map>; + FileStreams streams; + + using SerializeState = IDataType::SerializeBinaryBulkStatePtr; + using SerializeStates = std::map; + SerializeStates serialize_states; + + using WrittenStreams = std::set; + + IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenStreams & written_streams); + void writeData(const String & name, const IDataType & type, const IColumn & column, WrittenStreams & written_streams); +}; + + IDataType::OutputStreamGetter TinyLogBlockOutputStream::createStreamGetter( const String & name, WrittenStreams & written_streams) @@ -311,12 +335,12 @@ void TinyLogBlockOutputStream::writeSuffix() for (auto & pair : streams) column_files.push_back(storage.files[pair.first].data_file_path); + streams.clear(); + done = true; + for (const auto & file : column_files) storage.file_checker.update(file); storage.file_checker.save(); - - streams.clear(); - done = true; } @@ -377,10 +401,6 @@ StorageTinyLog::StorageTinyLog( for (const auto & col : storage_metadata.getColumns().getAllPhysical()) addFiles(col.name, *col.type); - - if (!attach) - for (const auto & file : files) - file_checker.setEmpty(file.second.data_file_path); } @@ -410,8 +430,6 @@ void StorageTinyLog::rename(const String & new_path_to_table_data, const Storage { assert(table_path != new_path_to_table_data); { - std::unique_lock lock(rwlock); - disk->moveDirectory(table_path, new_path_to_table_data); table_path = new_path_to_table_data; @@ -424,6 +442,16 @@ void StorageTinyLog::rename(const String & new_path_to_table_data, const Storage } +static std::chrono::seconds getLockTimeout(const Context & context) +{ + const Settings & settings = context.getSettingsRef(); + Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds(); + if (settings.max_execution_time.totalSeconds() != 0 && settings.max_execution_time.totalSeconds() < lock_timeout) + lock_timeout = settings.max_execution_time.totalSeconds(); + return std::chrono::seconds{lock_timeout}; +} + + Pipe StorageTinyLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -437,28 +465,40 @@ Pipe StorageTinyLog::read( // When reading, we lock the entire storage, because we only have one file // per column and can't modify it concurrently. + const Settings & settings = context.getSettingsRef(); + + std::shared_lock lock{rwlock, getLockTimeout(context)}; + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + /// No need to hold lock while reading because we read fixed range of data that does not change while appending more data. return Pipe(std::make_shared( - max_block_size, Nested::collect(metadata_snapshot->getColumns().getAllPhysical().addTypes(column_names)), *this, context.getSettingsRef().max_read_buffer_size)); + max_block_size, + Nested::collect(metadata_snapshot->getColumns().getAllPhysical().addTypes(column_names)), + *this, + settings.max_read_buffer_size, + file_checker.getFileSizes())); } -BlockOutputStreamPtr StorageTinyLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) +BlockOutputStreamPtr StorageTinyLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context) { - return std::make_shared(*this, metadata_snapshot); + return std::make_shared(*this, metadata_snapshot, std::unique_lock{rwlock, getLockTimeout(context)}); } -CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, const Context & /* context */) +CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, const Context & context) { - std::shared_lock lock(rwlock); + std::shared_lock lock(rwlock, getLockTimeout(context)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + return file_checker.check(); } void StorageTinyLog::truncate( const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) { - std::unique_lock lock(rwlock); - disk->clearDirectory(table_path); files.clear(); @@ -468,14 +508,6 @@ void StorageTinyLog::truncate( addFiles(column.name, *column.type); } -void StorageTinyLog::drop() -{ - std::unique_lock lock(rwlock); - if (disk->exists(table_path)) - disk->removeRecursive(table_path); - files.clear(); -} - void registerStorageTinyLog(StorageFactory & factory) { diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index 7d2b7473a21..1398af24f82 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -44,8 +44,6 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; - void drop() override; - protected: StorageTinyLog( DiskPtr disk_, @@ -71,7 +69,7 @@ private: Files files; FileChecker file_checker; - mutable std::shared_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Poco::Logger * log; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 39dc74822e0..ee39390a0f5 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,4 +1,4 @@ -// autogenerated by ./StorageSystemContributors.sh +// autogenerated by src/Storages/System/StorageSystemContributors.sh const char * auto_contributors[] { "0xflotus", "20018712", @@ -11,6 +11,7 @@ const char * auto_contributors[] { "Alberto", "Aleksandr Karo", "Aleksandra (Ася)", + "Aleksandrov Vladimir", "Aleksei Levushkin", "Aleksey", "Aleksey Akulovich", @@ -117,6 +118,7 @@ const char * auto_contributors[] { "BanyRule", "Baudouin Giard", "BayoNet", + "Bertrand Junqua", "Bharat Nallan", "Big Elephant", "Bill", @@ -132,6 +134,7 @@ const char * auto_contributors[] { "Carbyn", "Chao Wang", "Chen Yufei", + "Chienlung Cheung", "Ciprian Hacman", "Clement Rodriguez", "Clément Rodriguez", @@ -178,6 +181,8 @@ const char * auto_contributors[] { "Elizaveta Mironyuk", "Emmanuel Donin de Rosière", "Eric", + "Eric Daniel", + "Erixonich", "Ernest Poletaev", "Eugene Klimov", "Eugene Konkov", @@ -187,6 +192,7 @@ const char * auto_contributors[] { "Evgeniy Gatov", "Evgeniy Udodov", "Evgeny Konkov", + "Evgeny Markov", "Ewout", "Fabian Stäber", "Fabiano Francesconi", @@ -218,6 +224,7 @@ const char * auto_contributors[] { "Hamoon", "Hasnat", "Hiroaki Nakamura", + "HuFuwang", "Hui Wang", "Igor", "Igor Hatarist", @@ -239,6 +246,7 @@ const char * auto_contributors[] { "Ilya Skrypitsa", "Ilya Yatsishin", "ImgBotApp", + "Islam Israfilov (Islam93)", "Ivan", "Ivan A. Torgashov", "Ivan Babrou", @@ -264,6 +272,8 @@ const char * auto_contributors[] { "Kang Liu", "Karl Pietrzak", "Keiji Yoshida", + "Ken Chen", + "Kevin Chiang", "Kiran", "Kirill Danshin", "Kirill Malev", @@ -299,12 +309,14 @@ const char * auto_contributors[] { "Marek Vavrusa", "Marek Vavruša", "Marek Vavruša", + "Mark Andreev", "Mark Papadakis", "Maroun Maroun", "Marsel Arduanov", "Marti Raudsepp", "Martijn Bakker", "Masha", + "Matthew Peveler", "Matwey V. Kornilov", "Max", "Max Akhmedov", @@ -349,6 +361,7 @@ const char * auto_contributors[] { "Milad Arabi", "Mohammad Hossein Sekhavat", "MovElb", + "Mr.General", "Murat Kabilov", "MyroTk", "NIKITA MIKHAILOV", @@ -359,6 +372,7 @@ const char * auto_contributors[] { "Nico Piderman", "Nicolae Vartolomei", "Nik", + "Nikhil Nadig", "Nikhil Raman", "Nikita Lapkov", "Nikita Mikhailov", @@ -384,6 +398,7 @@ const char * auto_contributors[] { "Orivej Desh", "Oskar Wojciski", "OuO", + "PHO", "Paramtamtam", "Patrick Zippenfenig", "Pavel", @@ -442,6 +457,7 @@ const char * auto_contributors[] { "Sergey Zaikin", "Sergi Vladykin", "SevaCode", + "Sherry Wang", "Silviu Caragea", "Simon Liu", "Simon Podlipsky", @@ -452,13 +468,17 @@ const char * auto_contributors[] { "Stanislav Pavlovichev", "Stas Pavlovichev", "Stefan Thies", + "Stepan", "Stepan Herold", + "Steve-金勇", + "Stig Bakken", "Stupnikov Andrey", "SuperBot", "Sébastien Launay", "TAC", "TCeason", "Tagir Kuskarov", + "Tai White", "Tangaev", "Tema Novikov", "The-Alchemist", @@ -467,6 +487,7 @@ const char * auto_contributors[] { "Tsarkova Anastasia", "Ubuntu", "Ubus", + "UnamedRus", "V", "VDimir", "Vadim", @@ -499,6 +520,7 @@ const char * auto_contributors[] { "Vladimir Chebotarev", "Vladimir Golovchenko", "Vladimir Goncharov", + "Vladimir Klimontovich", "Vladimir Kolobaev", "Vladimir Kopysov", "Vladimir Kozbin", @@ -564,16 +586,20 @@ const char * auto_contributors[] { "asiana21", "avasiliev", "avsharapov", + "awesomeleo", "benamazing", "bgranvea", "bharatnc", "blazerer", "bluebirddm", "bobrovskij artemij", + "booknouse", "bseng", "cekc", "champtar", + "chang.chen", "chengy8934", + "chenqi", "chenxing-xc", "chenxing.xc", "chertus", @@ -591,6 +617,7 @@ const char * auto_contributors[] { "dgrr", "dimarub2000", "dinosaur", + "dkxiaohei", "dmi-feo", "dmitrii", "dmitriiut", @@ -605,8 +632,10 @@ const char * auto_contributors[] { "exprmntr", "ezhaka", "f1yegor", + "fastio", "favstovol", "felixoid", + "felixxdu", "feng lv", "fenglv", "fessmage", @@ -622,8 +651,10 @@ const char * auto_contributors[] { "ggerogery", "giordyb", "glockbender", + "glushkovds", "gyuton", "hao.he", + "hchen9", "hcz", "heng zhao", "hexiaoting", @@ -640,6 +671,8 @@ const char * auto_contributors[] { "javartisan", "javi", "javi santana", + "jetgm", + "jianmei zhang", "kmeaw", "koshachy", "kreuzerkrieg", @@ -653,6 +686,7 @@ const char * auto_contributors[] { "levysh", "liangqian", "libenwang", + "lichengxiang", "linceyou", "litao91", "liu-bov", @@ -685,6 +719,8 @@ const char * auto_contributors[] { "moscas", "myrrc", "nagorny", + "nauta", + "nautaa", "never lee", "nicelulu", "nikitamikhaylov", @@ -693,6 +729,7 @@ const char * auto_contributors[] { "nvartolomei", "oandrew", "objatie_groba", + "ocadaruma", "ogorbacheva", "olegkv", "olgarev", @@ -700,6 +737,7 @@ const char * auto_contributors[] { "palasonicq", "peshkurov", "philip.han", + "pingyu", "potya", "proller", "pufit", @@ -716,7 +754,9 @@ const char * auto_contributors[] { "roman", "romanzhukov", "root", + "roverxu", "santaux", + "satanson", "sdk2", "serebrserg", "sev7e0", @@ -725,6 +765,7 @@ const char * auto_contributors[] { "shangshujie", "shedx", "simon-says", + "spongedc", "spyros87", "stavrolia", "stepenhu", @@ -738,6 +779,7 @@ const char * auto_contributors[] { "taiyang-li", "tao jiang", "tavplubix", + "tiger.yan", "topvisor", "tyrionhuang", "ubuntu", @@ -755,10 +797,13 @@ const char * auto_contributors[] { "vxider", "vzakaznikov", "wangchao", + "weeds085490", "xPoSx", + "yangshuai", "yhgcn", "ylchou", "yonesko", + "yuefoo", "yulu86", "yuluxu", "zamulla", @@ -768,6 +813,7 @@ const char * auto_contributors[] { "zhen ni", "zhukai", "zvrr", + "zvvr", "zzsmdfj", "Šimon Podlipský", "Артем Стрельцов", @@ -781,6 +827,7 @@ const char * auto_contributors[] { "小路", "张健", "张风啸", + "徐炘", "极客青年", "谢磊", "贾顺名(Jarvis)", diff --git a/src/Storages/System/StorageSystemTableEngines.cpp b/src/Storages/System/StorageSystemTableEngines.cpp index e63923f69b6..3f06faf6736 100644 --- a/src/Storages/System/StorageSystemTableEngines.cpp +++ b/src/Storages/System/StorageSystemTableEngines.cpp @@ -8,26 +8,31 @@ namespace DB NamesAndTypesList StorageSystemTableEngines::getNamesAndTypes() { - return {{"name", std::make_shared()}, - {"supports_settings", std::make_shared()}, - {"supports_skipping_indices", std::make_shared()}, - {"supports_sort_order", std::make_shared()}, - {"supports_ttl", std::make_shared()}, - {"supports_replication", std::make_shared()}, - {"supports_deduplication", std::make_shared()}}; + return { + {"name", std::make_shared()}, + {"supports_settings", std::make_shared()}, + {"supports_skipping_indices", std::make_shared()}, + {"supports_sort_order", std::make_shared()}, + {"supports_ttl", std::make_shared()}, + {"supports_replication", std::make_shared()}, + {"supports_deduplication", std::make_shared()}, + {"supports_parallel_insert", std::make_shared()}, + }; } void StorageSystemTableEngines::fillData(MutableColumns & res_columns, const Context &, const SelectQueryInfo &) const { for (const auto & pair : StorageFactory::instance().getAllStorages()) { - res_columns[0]->insert(pair.first); - res_columns[1]->insert(pair.second.features.supports_settings); - res_columns[2]->insert(pair.second.features.supports_skipping_indices); - res_columns[3]->insert(pair.second.features.supports_sort_order); - res_columns[4]->insert(pair.second.features.supports_ttl); - res_columns[5]->insert(pair.second.features.supports_replication); - res_columns[6]->insert(pair.second.features.supports_deduplication); + int i = 0; + res_columns[i++]->insert(pair.first); + res_columns[i++]->insert(pair.second.features.supports_settings); + res_columns[i++]->insert(pair.second.features.supports_skipping_indices); + res_columns[i++]->insert(pair.second.features.supports_sort_order); + res_columns[i++]->insert(pair.second.features.supports_ttl); + res_columns[i++]->insert(pair.second.features.supports_replication); + res_columns[i++]->insert(pair.second.features.supports_deduplication); + res_columns[i++]->insert(pair.second.features.supports_parallel_insert); } } diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 835aebab900..99dfc55ed69 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -80,6 +80,24 @@ TEST(TransformQueryForExternalDatabase, InWithSingleElement) state.context, state.columns); } +TEST(TransformQueryForExternalDatabase, InWithTable) +{ + const State & state = State::instance(); + + check("SELECT column FROM test.table WHERE 1 IN external_table", + R"(SELECT "column" FROM "test"."table")", + state.context, state.columns); + check("SELECT column FROM test.table WHERE 1 IN (x)", + R"(SELECT "column" FROM "test"."table")", + state.context, state.columns); + check("SELECT column, field, value FROM test.table WHERE column IN (field, value)", + R"(SELECT "column", "field", "value" FROM "test"."table" WHERE "column" IN ("field", "value"))", + state.context, state.columns); + check("SELECT column FROM test.table WHERE column NOT IN hello AND column = 123", + R"(SELECT "column" FROM "test"."table" WHERE ("column" = 123))", + state.context, state.columns); +} + TEST(TransformQueryForExternalDatabase, Like) { const State & state = State::instance(); diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index f35fb1c8a34..42daf8cfc26 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -138,6 +138,12 @@ bool isCompatible(const IAST & node) if (name == "tuple" && function->arguments->children.size() <= 1) return false; + /// If the right hand side of IN is an identifier (example: x IN table), then it's not compatible. + if ((name == "in" || name == "notIn") + && (function->arguments->children.size() != 2 + || function->arguments->children[1]->as())) + return false; + for (const auto & expr : function->arguments->children) if (!isCompatible(*expr)) return false; diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 27aa9e3ac3f..9169ff54b87 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -17,6 +17,7 @@ SRCS( ConstraintsDescription.cpp Distributed/DirectoryMonitor.cpp Distributed/DistributedBlockOutputStream.cpp + Distributed/DistributedSettings.cpp IStorage.cpp IndicesDescription.cpp JoinSettings.cpp diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index a031490b88b..914b7083fca 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -211,6 +211,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, con ASTPtr{}, String{}, String{}, + DistributedSettings{}, false, cluster) : StorageDistributed::create( @@ -224,6 +225,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, con ASTPtr{}, String{}, String{}, + DistributedSettings{}, false, cluster); diff --git a/tests/integration/README.md b/tests/integration/README.md index 346926677bd..cdfb6b1a70a 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -12,7 +12,7 @@ You must install latest Docker from https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/#set-up-the-repository Don't use Docker from your system repository. -* [pip](https://pypi.python.org/pypi/pip) and `libpq-dev`. To install: `sudo apt-get install python3-pip libpq-dev zlib1g-dev libcrypto++-dev libssl-dev libkrb5-dev` +* [pip](https://pypi.python.org/pypi/pip) and `libpq-dev`. To install: `sudo apt-get install python3-pip libpq-dev zlib1g-dev libcrypto++-dev libssl-dev` * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` * [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: diff --git a/tests/integration/test_alter_settings_on_cluster/__init__.py b/tests/integration/test_alter_settings_on_cluster/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_alter_settings_on_cluster/configs/config.d/clusters.xml b/tests/integration/test_alter_settings_on_cluster/configs/config.d/clusters.xml new file mode 100644 index 00000000000..26c9caa63db --- /dev/null +++ b/tests/integration/test_alter_settings_on_cluster/configs/config.d/clusters.xml @@ -0,0 +1,17 @@ + + + + + true + + ch1 + 9000 + + + ch2 + 9000 + + + + + diff --git a/tests/integration/test_alter_settings_on_cluster/configs/config.d/distributed_ddl.xml b/tests/integration/test_alter_settings_on_cluster/configs/config.d/distributed_ddl.xml new file mode 100644 index 00000000000..6a88929c8ac --- /dev/null +++ b/tests/integration/test_alter_settings_on_cluster/configs/config.d/distributed_ddl.xml @@ -0,0 +1,5 @@ + + + /clickhouse/task_queue/ddl + + diff --git a/tests/integration/test_alter_settings_on_cluster/test.py b/tests/integration/test_alter_settings_on_cluster/test.py new file mode 100644 index 00000000000..6ab3d446b59 --- /dev/null +++ b/tests/integration/test_alter_settings_on_cluster/test.py @@ -0,0 +1,54 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +ch1 = cluster.add_instance( + "ch1", + main_configs=[ + "configs/config.d/clusters.xml", + "configs/config.d/distributed_ddl.xml", + ], + with_zookeeper=True, +) +ch2 = cluster.add_instance( + "ch2", + main_configs=[ + "configs/config.d/clusters.xml", + "configs/config.d/distributed_ddl.xml", + ], + with_zookeeper=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + ch1.query("CREATE DATABASE test_default_database ON CLUSTER 'cluster';") + yield cluster + + finally: + cluster.shutdown() + + +def test_default_database_on_cluster(started_cluster): + ch1.query( + database="test_default_database", + sql="CREATE TABLE test_local_table (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_local_table', 'r1') ORDER BY tuple();", + ) + + ch2.query( + database="test_default_database", + sql="CREATE TABLE test_local_table (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_local_table', 'r2') ORDER BY tuple();", + ) + + ch1.query( + database="test_default_database", + sql="ALTER TABLE test_local_table ON CLUSTER 'cluster' MODIFY SETTING old_parts_lifetime = 100;", + ) + + for node in [ch1, ch2]: + assert node.query( + database="test_default_database", + sql="SHOW CREATE test_local_table FORMAT TSV", + ).endswith("old_parts_lifetime = 100\n") diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py index 0758dc38ba7..df7b74fcae1 100644 --- a/tests/integration/test_distributed_load_balancing/test.py +++ b/tests/integration/test_distributed_load_balancing/test.py @@ -106,7 +106,7 @@ def get_node(query_node, table='dist', *args, **kwargs): LIMIT 1 ) a JOIN system.clusters c - ON a._shard_num = c.shard_num AND cluster = 'shards_cluster' + ON a._shard_num = c.shard_num WHERE cluster = 'shards_cluster' """.format(query_id=query_id)) return rows.strip() diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index ae4f4473475..3a3dc464155 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -239,7 +239,7 @@ def test_progress(): , output: "6\\t0\\n7\\t0\\n" , stats { rows: 8 - blocks: 5 + blocks: 4 allocated_bytes: 324 applied_limit: true rows_before_limit: 8 diff --git a/tests/integration/test_insert_into_distributed/test.py b/tests/integration/test_insert_into_distributed/test.py index 52beaf06ec2..d71d1075c70 100644 --- a/tests/integration/test_insert_into_distributed/test.py +++ b/tests/integration/test_insert_into_distributed/test.py @@ -36,7 +36,7 @@ CREATE TABLE distributed (x UInt32) ENGINE = Distributed('test_cluster', 'defaul remote.query("CREATE TABLE local2 (d Date, x UInt32, s String) ENGINE = MergeTree(d, x, 8192)") instance_test_inserts_batching.query(''' -CREATE TABLE distributed (d Date, x UInt32) ENGINE = Distributed('test_cluster', 'default', 'local2') +CREATE TABLE distributed (d Date, x UInt32) ENGINE = Distributed('test_cluster', 'default', 'local2') SETTINGS fsync_after_insert=1, fsync_directories=1 ''') instance_test_inserts_local_cluster.query( diff --git a/tests/integration/test_passing_max_partitions_to_read_remotely/__init__.py b/tests/integration/test_passing_max_partitions_to_read_remotely/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_passing_max_partitions_to_read_remotely/test.py b/tests/integration/test_passing_max_partitions_to_read_remotely/test.py new file mode 100644 index 00000000000..45b3dd00b2a --- /dev/null +++ b/tests/integration/test_passing_max_partitions_to_read_remotely/test.py @@ -0,0 +1,28 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +ch1 = cluster.add_instance("ch1") +ch2 = cluster.add_instance("ch2") + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + ch1.query("CREATE DATABASE test_default_database;") + yield cluster + + finally: + cluster.shutdown() + + +def test_default_database_on_cluster(started_cluster): + ch1.query( + database="test_default_database", + sql="CREATE TABLE test_local_table ENGINE MergeTree PARTITION BY i ORDER BY tuple() SETTINGS max_partitions_to_read = 1 AS SELECT arrayJoin([1, 2]) i;", + ) + + assert ch2.query( + sql="SELECT * FROM remote('ch1:9000', test_default_database, test_local_table) ORDER BY i FORMAT TSV SETTINGS max_partitions_to_read = 0;", + ) == "1\n2\n" diff --git a/tests/performance/array_index_low_cardinality_numbers.xml b/tests/performance/array_index_low_cardinality_numbers.xml index a9f0703c9a8..e64913ce26e 100644 --- a/tests/performance/array_index_low_cardinality_numbers.xml +++ b/tests/performance/array_index_low_cardinality_numbers.xml @@ -3,15 +3,19 @@ CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] - ) ENGINE = MergeTree() ORDER BY tuple() + ) ENGINE = StripeLog INSERT INTO perf_lc_num (num) SELECT toUInt8(number) - FROM numbers(1500000000) + FROM numbers(100000000) + + 1 + + SELECT count() FROM perf_lc_num WHERE num = 42 SELECT count() FROM perf_lc_num WHERE arr[1] = 42 SELECT count() FROM perf_lc_num WHERE has(arr, 42) diff --git a/tests/performance/optimize_window_funnel.xml b/tests/performance/optimize_window_funnel.xml new file mode 100644 index 00000000000..0d928fd0f4e --- /dev/null +++ b/tests/performance/optimize_window_funnel.xml @@ -0,0 +1,12 @@ + + CREATE TABLE action(uid UInt64, event String, time DateTime) ENGINE = MergeTree ORDER BY uid + + INSERT INTO action SELECT arrayJoin(groupArray(number)), 'a', now() from numbers(1000000) + INSERT INTO action SELECT arrayJoin(groupArray(number)), 'b', now() + INTERVAL 6 hour from numbers(1000000) + INSERT INTO action SELECT arrayJoin(groupArray(number)), 'c', now() + INTERVAL 12 hour from numbers(1000000) + INSERT INTO action SELECT arrayJoin(groupArray(number)), 'd', now() + INTERVAL 18 hour from numbers(1000000) + + SELECT level, count() from (select windowFunnel(86400)(time, event='a', event='b', event='c', event='d') level from action group by uid) group by level FORMAT Null + + DROP TABLE IF EXISTS action + diff --git a/tests/queries/0_stateless/00233_position_function_sql_comparibilty.reference b/tests/queries/0_stateless/00233_position_function_sql_comparibilty.reference new file mode 100644 index 00000000000..71c9a23879f --- /dev/null +++ b/tests/queries/0_stateless/00233_position_function_sql_comparibilty.reference @@ -0,0 +1,13 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/00233_position_function_sql_comparibilty.sql b/tests/queries/0_stateless/00233_position_function_sql_comparibilty.sql new file mode 100644 index 00000000000..ae9409cd0c0 --- /dev/null +++ b/tests/queries/0_stateless/00233_position_function_sql_comparibilty.sql @@ -0,0 +1,16 @@ +SET send_logs_level = 'fatal'; +select 1 = position('' in ''); +select 1 = position('' in 'abc'); +select 0 = position('abc' in ''); +select 1 = position('abc' in 'abc'); +select 2 = position('bc' in 'abc'); +select 3 = position('c' in 'abc'); + +select 1 = position('' in ''); +select 1 = position('' in 'абв'); +select 0 = position('абв' in ''); +select 1 = position('абв' in 'абв'); +select 3 = position('бв' in 'абв'); +select 5 = position('в' in 'абв'); + +select 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); diff --git a/tests/queries/0_stateless/00306_insert_values_and_expressions.sql b/tests/queries/0_stateless/00306_insert_values_and_expressions.sql index 10a1415f287..01a66282bb8 100644 --- a/tests/queries/0_stateless/00306_insert_values_and_expressions.sql +++ b/tests/queries/0_stateless/00306_insert_values_and_expressions.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS insert; CREATE TABLE insert (i UInt64, s String, u UUID, d Date, t DateTime, a Array(UInt32)) ENGINE = Memory; -INSERT INTO insert VALUES (1, 'Hello', 'ab41bdd6-5cd4-11e7-907b-a6006ad3dba0', '2016-01-01', '2016-01-02 03:04:05', [1, 2, 3]), (1 + 1, concat('Hello', ', world'), toUUID(0), toDate('2016-01-01') + 1, toStartOfMinute(toDateTime('2016-01-02 03:04:05')), [[0,1],[2]][1]), (round(pi()), concat('hello', ', world!'), toUUID(toString('ab41bdd6-5cd4-11e7-907b-a6006ad3dba0')), toDate(toDateTime('2016-01-03 03:04:05')), toStartOfHour(toDateTime('2016-01-02 03:04:05')), []), (4, 'World', 'ab41bdd6-5cd4-11e7-907b-a6006ad3dba0', '2016-01-04', '2016-12-11 10:09:08', [3,2,1]); +INSERT INTO insert VALUES (1, 'Hello', 'ab41bdd6-5cd4-11e7-907b-a6006ad3dba0', '2016-01-01', '2016-01-02 03:04:05', [1, 2, 3]), (1 + 1, concat('Hello', ', world'), toUUID('00000000-0000-0000-0000-000000000000'), toDate('2016-01-01') + 1, toStartOfMinute(toDateTime('2016-01-02 03:04:05')), [[0,1],[2]][1]), (round(pi()), concat('hello', ', world!'), toUUID(toString('ab41bdd6-5cd4-11e7-907b-a6006ad3dba0')), toDate(toDateTime('2016-01-03 03:04:05')), toStartOfHour(toDateTime('2016-01-02 03:04:05')), []), (4, 'World', 'ab41bdd6-5cd4-11e7-907b-a6006ad3dba0', '2016-01-04', '2016-12-11 10:09:08', [3,2,1]); SELECT * FROM insert ORDER BY i; DROP TABLE insert; diff --git a/tests/queries/0_stateless/00735_conditional.reference b/tests/queries/0_stateless/00735_conditional.reference index b37fce430ec..6bee974769d 100644 --- a/tests/queries/0_stateless/00735_conditional.reference +++ b/tests/queries/0_stateless/00735_conditional.reference @@ -95,7 +95,6 @@ value vs value 2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime 2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime 1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') -00000000-0000-0000-0000-000000000000 00000000-0000-0001-0000-000000000000 00000000-0000-0001-0000-000000000000 UUID UUID UUID column vs value 0 1 1 Int8 Int8 Int8 0 1 1 Int8 Int16 Int16 @@ -169,4 +168,3 @@ column vs value 2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime 2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime 1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') -00000000-0000-0000-0000-000000000000 00000000-0000-0001-0000-000000000000 00000000-0000-0001-0000-000000000000 UUID UUID UUID diff --git a/tests/queries/0_stateless/00735_conditional.sql b/tests/queries/0_stateless/00735_conditional.sql index 0c272ac69f8..04439f4062e 100644 --- a/tests/queries/0_stateless/00735_conditional.sql +++ b/tests/queries/0_stateless/00735_conditional.sql @@ -14,7 +14,6 @@ SELECT toInt8(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toInt8(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toInt8(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toInt8(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt8(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt8(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -31,7 +30,6 @@ SELECT toInt16(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toInt16(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toInt16(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toInt16(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt16(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt16(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -48,7 +46,6 @@ SELECT toInt32(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toInt32(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toInt32(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toInt32(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt32(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt32(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -65,7 +62,6 @@ SELECT toInt64(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toInt64(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt64(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toInt64(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toInt64(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toInt64(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt64(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toInt64(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -82,7 +78,6 @@ SELECT toUInt8(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x) SELECT toUInt8(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toUInt8(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUInt8(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt8(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt8(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -99,7 +94,6 @@ SELECT toUInt16(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x SELECT toUInt16(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toUInt16(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUInt16(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt16(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt16(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -116,7 +110,6 @@ SELECT toUInt32(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x SELECT toUInt32(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toUInt32(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUInt32(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt32(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt32(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -133,7 +126,6 @@ SELECT toUInt64(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x SELECT toUInt64(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt64(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toUInt64(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUInt64(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toUInt64(0) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt64(0) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toUInt64(0) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -150,7 +142,6 @@ SELECT toDate(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), SELECT toDate(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDate('2000-01-01') AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDate(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT toDate(0) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -167,28 +158,10 @@ SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat32(1) AS y, ((x > y) ? x : y) SELECT toDateTime(0, 'Europe/Moscow') AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime('2000-01-01 00:00:00', 'Europe/Moscow') AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT toDateTime(0, 'Europe/Moscow') AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toDateTime(0, 'Europe/Moscow') AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT toDateTime(0, 'Europe/Moscow') AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT toUUID(0) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT toUUID(0) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT toUUID(0) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } - SELECT 'column vs value'; SELECT materialize(toInt8(0)) AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); @@ -203,7 +176,6 @@ SELECT materialize(toInt8(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toInt8(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt8(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt8(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toInt8(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt8(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt8(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt8(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -220,7 +192,6 @@ SELECT materialize(toInt16(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toInt16(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt16(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt16(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toInt16(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt16(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt16(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt16(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -237,7 +208,6 @@ SELECT materialize(toInt32(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toInt32(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toInt32(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt32(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toInt32(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt32(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt32(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt32(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -254,7 +224,6 @@ SELECT materialize(toInt64(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toInt64(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt64(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt64(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toInt64(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toInt64(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt64(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toInt64(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -271,7 +240,6 @@ SELECT materialize(toUInt8(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt8(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt8(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt8(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUInt8(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt8(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt8(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt8(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -288,7 +256,6 @@ SELECT materialize(toUInt16(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt16(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt16(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt16(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUInt16(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt16(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt16(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt16(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -305,7 +272,6 @@ SELECT materialize(toUInt32(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt32(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toUInt32(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt32(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUInt32(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt32(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt32(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt32(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -322,7 +288,6 @@ SELECT materialize(toUInt64(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, SELECT materialize(toUInt64(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt64(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt64(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUInt64(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toUInt64(0)) AS x, toDecimal32(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt64(0)) AS x, toDecimal64(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toUInt64(0)) AS x, toDecimal128(1, 0) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -339,7 +304,6 @@ SELECT materialize(toDate(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, t SELECT materialize(toDate(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDate('2000-01-01')) AS x, toDateTime('2000-01-01 00:00:01', 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDate(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } SELECT materialize(toDate(0)) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } @@ -356,24 +320,6 @@ SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat32(1) AS y, ((x SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime('2000-01-01 00:00:00', 'Europe/Moscow')) AS x, toDate('2000-01-02') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } SELECT materialize(toDateTime(0, 'Europe/Moscow')) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } - -SELECT materialize(toUUID(0)) AS x, toInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toUInt8(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toUInt16(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toUInt32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toUInt64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toFloat32(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toFloat64(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toDate(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 43 } -SELECT materialize(toUUID(0)) AS x, toDateTime(1, 'Europe/Moscow') AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toUUID(1) AS y, ((x > y) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -SELECT materialize(toUUID(0)) AS x, toDecimal32(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toDecimal64(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } -SELECT materialize(toUUID(0)) AS x, toDecimal128(1, 0) AS y, ((x = 0) ? x : y) AS z, toTypeName(x), toTypeName(y), toTypeName(z); -- { serverError 386 } diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.reference b/tests/queries/0_stateless/00878_join_unexpected_results.reference index 65fcbc257ca..a389cb47a96 100644 --- a/tests/queries/0_stateless/00878_join_unexpected_results.reference +++ b/tests/queries/0_stateless/00878_join_unexpected_results.reference @@ -23,8 +23,6 @@ join_use_nulls = 1 - \N \N - -1 1 \N \N -2 2 \N \N - 1 1 1 1 2 2 \N \N @@ -51,8 +49,6 @@ join_use_nulls = 0 - - - -1 1 0 0 -2 2 0 0 - 1 1 1 1 2 2 0 0 diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.sql b/tests/queries/0_stateless/00878_join_unexpected_results.sql index 6f6cd6e6479..0aef5208b26 100644 --- a/tests/queries/0_stateless/00878_join_unexpected_results.sql +++ b/tests/queries/0_stateless/00878_join_unexpected_results.sql @@ -30,11 +30,11 @@ select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; select '-'; select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; select '-'; -select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; +select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 } select '-'; select t.*, s.* from t left join s on (s.a=t.a) order by t.a; select '-'; -select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; +select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 } select 'join_use_nulls = 0'; set join_use_nulls = 0; @@ -58,11 +58,11 @@ select '-'; select '-'; -- select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO select '-'; -select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; +select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 } select '-'; select t.*, s.* from t left join s on (s.a=t.a) order by t.a; select '-'; -select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; +select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 } drop table t; drop table s; diff --git a/tests/queries/0_stateless/00977_int_div.reference b/tests/queries/0_stateless/00977_int_div.reference index 39e135d8eed..e5882f4aaba 100644 --- a/tests/queries/0_stateless/00977_int_div.reference +++ b/tests/queries/0_stateless/00977_int_div.reference @@ -81,3 +81,43 @@ -1 -1 -1 +-1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +-1 +-2 +-3 +-4 +-5 +-6 +-7 +-8 +-9 +-10 +-1 +-2 +-3 +-4 +-5 +-6 +-7 +-8 +-9 +-10 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 +-1 diff --git a/tests/queries/0_stateless/00977_int_div.sql b/tests/queries/0_stateless/00977_int_div.sql index 08085020ca5..4184475e3a0 100644 --- a/tests/queries/0_stateless/00977_int_div.sql +++ b/tests/queries/0_stateless/00977_int_div.sql @@ -23,3 +23,9 @@ SELECT intDiv(toInt64(number), -1) FROM numbers(1, 10); SELECT intDivOrZero(toInt64(number), -1) FROM numbers(1, 10); SELECT intDiv(number, -number) FROM numbers(1, 10); SELECT intDivOrZero(number, -number) FROM numbers(1, 10); + +SELECT -1 DIV number FROM numbers(1, 10); +SELECT toInt32(number) DIV -1 FROM numbers(1, 10); +SELECT toInt64(number) DIV -1 FROM numbers(1, 10); +SELECT number DIV -number FROM numbers(1, 10); +SELECT -1 DIV 0; -- { serverError 153 } diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql index 225bdd2bdc6..01af9451381 100644 --- a/tests/queries/0_stateless/01016_simhash_minhash.sql +++ b/tests/queries/0_stateless/01016_simhash_minhash.sql @@ -108,4 +108,8 @@ SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinH SELECT 'wordShingleMinHashCaseInsensitiveUTF8'; SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 } +SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 } +SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 } + DROP TABLE defaults; diff --git a/tests/queries/0_stateless/01018_Distributed__shard_num.reference b/tests/queries/0_stateless/01018_Distributed__shard_num.reference index 679383f969e..f4b92011759 100644 --- a/tests/queries/0_stateless/01018_Distributed__shard_num.reference +++ b/tests/queries/0_stateless/01018_Distributed__shard_num.reference @@ -29,8 +29,6 @@ remote(Distributed) JOIN system.clusters 1 10 localhost ::1 9000 1 20 localhost ::1 9000 -1 10 localhost ::1 9000 -1 20 localhost ::1 9000 dist_3 100 foo foo 100 foo diff --git a/tests/queries/0_stateless/01018_Distributed__shard_num.sql b/tests/queries/0_stateless/01018_Distributed__shard_num.sql index ddb8505a415..f6d5f23eca8 100644 --- a/tests/queries/0_stateless/01018_Distributed__shard_num.sql +++ b/tests/queries/0_stateless/01018_Distributed__shard_num.sql @@ -48,11 +48,12 @@ FROM (SELECT *, _shard_num FROM dist_1) a JOIN system.clusters b ON a._shard_num = b.shard_num WHERE b.cluster = 'test_cluster_two_shards_localhost'; + SELECT _shard_num, key, b.host_name, b.host_address, b.port FROM dist_1 a JOIN system.clusters b ON _shard_num = b.shard_num -WHERE b.cluster = 'test_cluster_two_shards_localhost'; +WHERE b.cluster = 'test_cluster_two_shards_localhost'; -- { serverError 403 } -- rewrite does not work with aliases, hence Missing columns (47) SELECT a._shard_num, key FROM dist_1 a; -- { serverError 47; } diff --git a/tests/queries/0_stateless/01044_great_circle_angle.reference b/tests/queries/0_stateless/01044_great_circle_angle.reference index b0e80d4cdae..60a616c7187 100644 --- a/tests/queries/0_stateless/01044_great_circle_angle.reference +++ b/tests/queries/0_stateless/01044_great_circle_angle.reference @@ -11,7 +11,7 @@ 179 -0.06 ██ -████ +████▏ ██████▏ ████████▎ ██████████▎ @@ -19,25 +19,25 @@ ██████████████▍ ████████████████▌ ██████████████████▌ -████████████████████▌ +████████████████████▋ ██████████████████████▋ ████████████████████████▋ ██████████████████████████▌ ████████████████████████████▍ ██████████████████████████████▍ -████████████████████████████████▎ +████████████████████████████████▍ ██████████████████████████████████▎ ████████████████████████████████████▏ ██████████████████████████████████████ ███████████████████████████████████████▊ █████████████████████████████████████████▋ ███████████████████████████████████████████▌ -█████████████████████████████████████████████▎ -███████████████████████████████████████████████ -████████████████████████████████████████████████▋ +█████████████████████████████████████████████▍ +███████████████████████████████████████████████▏ +████████████████████████████████████████████████▊ ██████████████████████████████████████████████████▌ ████████████████████████████████████████████████████▏ -█████████████████████████████████████████████████████▋ +█████████████████████████████████████████████████████▊ ███████████████████████████████████████████████████████▍ █████████████████████████████████████████████████████████ ██████████████████████████████████████████████████████████▌ @@ -50,8 +50,8 @@ ████████████████████████████████████████████████████████████████████ █████████████████████████████████████████████████████████████████████▏ ██████████████████████████████████████████████████████████████████████▎ -███████████████████████████████████████████████████████████████████████▎ -████████████████████████████████████████████████████████████████████████▎ +███████████████████████████████████████████████████████████████████████▍ +████████████████████████████████████████████████████████████████████████▍ █████████████████████████████████████████████████████████████████████████▎ ██████████████████████████████████████████████████████████████████████████▏ ███████████████████████████████████████████████████████████████████████████ @@ -61,11 +61,11 @@ █████████████████████████████████████████████████████████████████████████████▌ █████████████████████████████████████████████████████████████████████████████▊ ██████████████████████████████████████████████████████████████████████████████▎ -██████████████████████████████████████████████████████████████████████████████▌ +██████████████████████████████████████████████████████████████████████████████▋ ██████████████████████████████████████████████████████████████████████████████▋ ██████████████████████████████████████████████████████████████████████████████▊ ██████████████████████████████████████████████████████████████████████████████▊ -██████████████████████████████████████████████████████████████████████████████▋ +██████████████████████████████████████████████████████████████████████████████▊ ██████████████████████████████████████████████████████████████████████████████▋ ██████████████████████████████████████████████████████████████████████████████▍ ██████████████████████████████████████████████████████████████████████████████ @@ -75,7 +75,7 @@ ███████████████████████████████████████████████████████████████████████████▌ ██████████████████████████████████████████████████████████████████████████▌ █████████████████████████████████████████████████████████████████████████▌ -████████████████████████████████████████████████████████████████████████▎ +████████████████████████████████████████████████████████████████████████▍ ███████████████████████████████████████████████████████████████████████ █████████████████████████████████████████████████████████████████████▋ ████████████████████████████████████████████████████████████████████ @@ -97,5 +97,5 @@ ██████████████████████▋ ██████████████████▌ ██████████████▏ -█████████▌ +█████████▋ ████▊ diff --git a/tests/queries/0_stateless/01081_PartialSortingTransform_full_column.sql b/tests/queries/0_stateless/01081_PartialSortingTransform_full_column.sql index 768a20c8ca4..eaf1278d9ba 100644 --- a/tests/queries/0_stateless/01081_PartialSortingTransform_full_column.sql +++ b/tests/queries/0_stateless/01081_PartialSortingTransform_full_column.sql @@ -2,7 +2,7 @@ drop table if exists test_01081; create table test_01081 (key Int) engine=MergeTree() order by key; insert into test_01081 select * from system.numbers limit 10; -select 1 from remote('127.{1,2}', currentDatabase(), test_01081) lhs join system.one as rhs on rhs.dummy = 1 order by 1; +select 1 from remote('127.{1,2}', currentDatabase(), test_01081) lhs join system.one as rhs on rhs.dummy = 1 order by 1; -- { serverError 403 } -- With multiple blocks triggers: -- @@ -11,6 +11,6 @@ select 1 from remote('127.{1,2}', currentDatabase(), test_01081) lhs join system -- _dummy Int Int32(size = 0), 1 UInt8 Const(size = 0, UInt8(size = 1)). insert into test_01081 select * from system.numbers limit 10; -select 1 from remote('127.{1,2}', currentDatabase(), test_01081) lhs join system.one as rhs on rhs.dummy = 1 order by 1; +select 1 from remote('127.{1,2}', currentDatabase(), test_01081) lhs join system.one as rhs on rhs.dummy = 1 order by 1; -- { serverError 403 } drop table if exists test_01081; diff --git a/tests/queries/0_stateless/01101_literal_column_clash.reference b/tests/queries/0_stateless/01101_literal_column_clash.reference index 47458541f79..e63f96416af 100644 --- a/tests/queries/0_stateless/01101_literal_column_clash.reference +++ b/tests/queries/0_stateless/01101_literal_column_clash.reference @@ -8,7 +8,3 @@ xyzabc 2 1 0 0 3 \N 1 2 \N 0 \N 1 0 \N 3 -1 -1 -1 hello\0\0\0 -1 hello\0\0\0 diff --git a/tests/queries/0_stateless/01101_literal_column_clash.sql b/tests/queries/0_stateless/01101_literal_column_clash.sql index 2c665d3833b..f96aad4bacf 100644 --- a/tests/queries/0_stateless/01101_literal_column_clash.sql +++ b/tests/queries/0_stateless/01101_literal_column_clash.sql @@ -17,7 +17,6 @@ select null, isConstant(null), * from (select 2 x, null) a right join (select 3 -- other cases with joins and constants -select cast(1, 'UInt8') from (select arrayJoin([1, 2]) as a) t1 left join (select 1 as b) t2 on b = ignore('UInt8'); - -select isConstant('UInt8'), toFixedString('hello', toUInt8(substring('UInt8', 5, 1))) from (select arrayJoin([1, 2]) as a) t1 left join (select 1 as b) t2 on b = ignore('UInt8'); +select cast(1, 'UInt8') from (select arrayJoin([1, 2]) as a) t1 left join (select 1 as b) t2 on b = ignore('UInt8'); -- { serverError 403 } +select isConstant('UInt8'), toFixedString('hello', toUInt8(substring('UInt8', 5, 1))) from (select arrayJoin([1, 2]) as a) t1 left join (select 1 as b) t2 on b = ignore('UInt8'); -- { serverError 403 } diff --git a/tests/queries/0_stateless/01322_cast_keep_nullable.reference b/tests/queries/0_stateless/01322_cast_keep_nullable.reference index cfe80a7c1b1..8ad99a10170 100644 --- a/tests/queries/0_stateless/01322_cast_keep_nullable.reference +++ b/tests/queries/0_stateless/01322_cast_keep_nullable.reference @@ -4,7 +4,6 @@ 1 Nullable(Int32) 2 Nullable(Float32) 2 Nullable(UInt8) -00000000-0000-0002-0000-000000000000 Nullable(UUID) 3 Nullable(Int32) \N Nullable(Int32) 42 Nullable(Int32) diff --git a/tests/queries/0_stateless/01322_cast_keep_nullable.sql b/tests/queries/0_stateless/01322_cast_keep_nullable.sql index ef5f5747a7d..2f6fd80c35f 100644 --- a/tests/queries/0_stateless/01322_cast_keep_nullable.sql +++ b/tests/queries/0_stateless/01322_cast_keep_nullable.sql @@ -10,7 +10,6 @@ SELECT CAST(toNullable(toInt8(1)) AS Int32) as x, toTypeName(x); SELECT CAST(toNullable(toFloat32(2)), 'Float32') as x, toTypeName(x); SELECT CAST(toNullable(toFloat32(2)), 'UInt8') as x, toTypeName(x); -SELECT CAST(toNullable(toFloat32(2)), 'UUID') as x, toTypeName(x); SELECT CAST(if(1 = 1, toNullable(toInt8(3)), NULL) AS Int32) as x, toTypeName(x); SELECT CAST(if(1 = 0, toNullable(toInt8(3)), NULL) AS Int32) as x, toTypeName(x); diff --git a/tests/queries/0_stateless/01326_build_id.reference b/tests/queries/0_stateless/01326_build_id.reference index b6a7d89c68e..d00491fd7e5 100644 --- a/tests/queries/0_stateless/01326_build_id.reference +++ b/tests/queries/0_stateless/01326_build_id.reference @@ -1 +1 @@ -16 +1 diff --git a/tests/queries/0_stateless/01326_build_id.sql b/tests/queries/0_stateless/01326_build_id.sql index f9bf2731374..4451ec360aa 100644 --- a/tests/queries/0_stateless/01326_build_id.sql +++ b/tests/queries/0_stateless/01326_build_id.sql @@ -1 +1 @@ -SELECT length(buildId()); +SELECT length(buildId()) >= 16; diff --git a/tests/queries/0_stateless/01383_log_broken_table.sh b/tests/queries/0_stateless/01383_log_broken_table.sh index 37cd6e239e5..5cc0f24a87f 100755 --- a/tests/queries/0_stateless/01383_log_broken_table.sh +++ b/tests/queries/0_stateless/01383_log_broken_table.sh @@ -25,7 +25,7 @@ function test_func() $CLICKHOUSE_CLIENT --query "SELECT count(), sum(x + y + z) FROM log" > "${CLICKHOUSE_TMP}"/select_result 2>&1; - grep -o -F 'File not found' "${CLICKHOUSE_TMP}"/select_result || cat "${CLICKHOUSE_TMP}"/select_result + cat "${CLICKHOUSE_TMP}"/select_result [[ $MAX_MEM -gt 200000000 ]] && break; done @@ -33,9 +33,9 @@ function test_func() $CLICKHOUSE_CLIENT --query "DROP TABLE log"; } -test_func TinyLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)' -test_func StripeLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)' -test_func Log | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)' +test_func TinyLog | grep -v -P '^(Memory limit|0\t0|[1-9]000000\t)' +test_func StripeLog | grep -v -P '^(Memory limit|0\t0|[1-9]000000\t)' +test_func Log | grep -v -P '^(Memory limit|0\t0|[1-9]000000\t)' rm "${CLICKHOUSE_TMP}/insert_result" rm "${CLICKHOUSE_TMP}/select_result" diff --git a/tests/queries/0_stateless/01412_mod_float.reference b/tests/queries/0_stateless/01412_mod_float.reference index 77e3431f7a9..94f7f6d3af7 100644 --- a/tests/queries/0_stateless/01412_mod_float.reference +++ b/tests/queries/0_stateless/01412_mod_float.reference @@ -1,3 +1,7 @@ 1 -1 1 -1 0.125 -0.125 0.125 -0.125 1 -1 1 -1 +1 -1 1 -1 +0.125 -0.125 0.125 -0.125 +nan +nan diff --git a/tests/queries/0_stateless/01412_mod_float.sql b/tests/queries/0_stateless/01412_mod_float.sql index 147cb88ff24..f2a5f2fcee6 100644 --- a/tests/queries/0_stateless/01412_mod_float.sql +++ b/tests/queries/0_stateless/01412_mod_float.sql @@ -1,3 +1,7 @@ WITH 8.5 AS a, 2.5 AS b SELECT a % b, -a % b, a % -b, -a % -b; WITH 10.125 AS a, 2.5 AS b SELECT a % b, -a % b, a % -b, -a % -b; WITH 8.5 AS a, 2.5 AS b SELECT mod(a, b), MOD(-a, b), modulo(a, -b), moduloOrZero(-a, -b); +WITH 8.5 AS a, 2.5 AS b SELECT a MOD b, -a MOD b, a MOD -b, -a MOD -b; +WITH 10.125 AS a, 2.5 AS b SELECT a MOD b, -a MOD b, a MOD -b, -a MOD -b; +SELECT 3.5 % 0; +SELECT 3.5 MOD 0; diff --git a/tests/queries/0_stateless/01422_map_skip_null.reference b/tests/queries/0_stateless/01422_map_skip_null.reference index 7211e0ac75d..83456126278 100644 --- a/tests/queries/0_stateless/01422_map_skip_null.reference +++ b/tests/queries/0_stateless/01422_map_skip_null.reference @@ -1,7 +1,4 @@ -([],[]) -([],[]) -([],[]) -([2],[11]) -([2],[22]) +([1,2],[0,11]) +([1,2],[0,22]) ([2],[33]) ([2],[33]) diff --git a/tests/queries/0_stateless/01422_map_skip_null.sql b/tests/queries/0_stateless/01422_map_skip_null.sql index 9af46758289..683757a473b 100644 --- a/tests/queries/0_stateless/01422_map_skip_null.sql +++ b/tests/queries/0_stateless/01422_map_skip_null.sql @@ -1,7 +1,7 @@ -select minMap(arrayJoin([([1], [null]), ([1], [null])])); -select maxMap(arrayJoin([([1], [null]), ([1], [null])])); +select minMap(arrayJoin([([1], [null]), ([1], [null])])); -- { serverError 43 } +select maxMap(arrayJoin([([1], [null]), ([1], [null])])); -- { serverError 43 } select sumMap(arrayJoin([([1], [null]), ([1], [null])])); -- { serverError 43 } -select sumMapWithOverflow(arrayJoin([([1], [null]), ([1], [null])])); +select sumMapWithOverflow(arrayJoin([([1], [null]), ([1], [null])])); -- { serverError 43 } select minMap(arrayJoin([([1, 2], [null, 11]), ([1, 2], [null, 22])])); select maxMap(arrayJoin([([1, 2], [null, 11]), ([1, 2], [null, 22])])); diff --git a/tests/queries/0_stateless/01470_columns_transformers2.reference b/tests/queries/0_stateless/01470_columns_transformers2.reference new file mode 100644 index 00000000000..18c0f5c7e89 --- /dev/null +++ b/tests/queries/0_stateless/01470_columns_transformers2.reference @@ -0,0 +1 @@ +100 10 324 120.00 B 8.00 B 23.00 B diff --git a/tests/queries/0_stateless/01470_columns_transformers2.sql b/tests/queries/0_stateless/01470_columns_transformers2.sql new file mode 100644 index 00000000000..3691ef1e65d --- /dev/null +++ b/tests/queries/0_stateless/01470_columns_transformers2.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS columns_transformers; + +CREATE TABLE columns_transformers (i int, j int, k int, a_bytes int, b_bytes int, c_bytes int) Engine=TinyLog; +INSERT INTO columns_transformers VALUES (100, 10, 324, 120, 8, 23); +SELECT * EXCEPT 'bytes', COLUMNS('bytes') APPLY formatReadableSize FROM columns_transformers; + +DROP TABLE IF EXISTS columns_transformers; diff --git a/tests/queries/0_stateless/01478_not_equi-join_on.reference b/tests/queries/0_stateless/01478_not_equi-join_on.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01478_not_equi-join_on.sql b/tests/queries/0_stateless/01478_not_equi-join_on.sql new file mode 100644 index 00000000000..b52af5fcb46 --- /dev/null +++ b/tests/queries/0_stateless/01478_not_equi-join_on.sql @@ -0,0 +1,7 @@ +SELECT * FROM (SELECT NULL AS a, 1 AS b) AS foo +RIGHT JOIN (SELECT 1024 AS b) AS bar +ON 1 = foo.b; -- { serverError 403 } + +SELECT * FROM (SELECT NULL AS a, 1 AS b) AS foo +RIGHT JOIN (SELECT 1024 AS b) AS bar +ON 1 = bar.b; -- { serverError 403 } diff --git a/tests/queries/0_stateless/01499_log_deadlock.reference b/tests/queries/0_stateless/01499_log_deadlock.reference new file mode 100644 index 00000000000..166be640db5 --- /dev/null +++ b/tests/queries/0_stateless/01499_log_deadlock.reference @@ -0,0 +1,3 @@ +6 +6 +6 diff --git a/tests/queries/0_stateless/01499_log_deadlock.sql b/tests/queries/0_stateless/01499_log_deadlock.sql new file mode 100644 index 00000000000..e98b37f2455 --- /dev/null +++ b/tests/queries/0_stateless/01499_log_deadlock.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (x UInt8) ENGINE = TinyLog; + +INSERT INTO t VALUES (1), (2), (3); +INSERT INTO t SELECT * FROM t; +SELECT count() FROM t; + +DROP TABLE t; + + +CREATE TABLE t (x UInt8) ENGINE = Log; + +INSERT INTO t VALUES (1), (2), (3); +INSERT INTO t SELECT * FROM t; +SELECT count() FROM t; + +DROP TABLE t; + + +CREATE TABLE t (x UInt8) ENGINE = StripeLog; + +INSERT INTO t VALUES (1), (2), (3); +INSERT INTO t SELECT * FROM t; +SELECT count() FROM t; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/01502_bar_overflow.sql b/tests/queries/0_stateless/01502_bar_overflow.sql index cb3de7ac20b..4829b487f52 100644 --- a/tests/queries/0_stateless/01502_bar_overflow.sql +++ b/tests/queries/0_stateless/01502_bar_overflow.sql @@ -1 +1 @@ -SELECT bar((greatCircleAngle(100, -1, number, number) - number) * 2, -9223372036854775808, 1023, 100) FROM numbers(1048575); -- { serverError 12 } +SELECT bar((greatCircleAngle(100, -1, number, number) - number) * 2, -9223372036854775808, 1023, 100) FROM numbers(1048575) FORMAT Null; diff --git a/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.reference b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.reference new file mode 100644 index 00000000000..4bf85ae79f3 --- /dev/null +++ b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.reference @@ -0,0 +1,6 @@ +Testing TinyLog +Done TinyLog +Testing StripeLog +Done StripeLog +Testing Log +Done Log diff --git a/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh new file mode 100755 index 00000000000..856f4c1516f --- /dev/null +++ b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +set -e + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +function thread_create { + while true; do + $CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS $1 (x UInt64, s Array(Nullable(String))) ENGINE = $2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|57)' + sleep 0.0$RANDOM + done +} + +function thread_drop { + while true; do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS $1" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|57)' + sleep 0.0$RANDOM + done +} + +function thread_rename { + while true; do + $CLICKHOUSE_CLIENT --query "RENAME TABLE $1 TO $2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|57)' + sleep 0.0$RANDOM + done +} + +function thread_select { + while true; do + $CLICKHOUSE_CLIENT --query "SELECT * FROM $1 FORMAT Null" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +function thread_insert { + while true; do + $CLICKHOUSE_CLIENT --query "INSERT INTO $1 SELECT rand64(1), [toString(rand64(2))] FROM numbers($2)" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +function thread_insert_select { + while true; do + $CLICKHOUSE_CLIENT --query "INSERT INTO $1 SELECT * FROM $2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +export -f thread_create +export -f thread_drop +export -f thread_rename +export -f thread_select +export -f thread_insert +export -f thread_insert_select + + +# Do randomized queries and expect nothing extraordinary happens. + +function test_with_engine { + echo "Testing $1" + + timeout 10 bash -c "thread_create t1 $1" & + timeout 10 bash -c "thread_create t2 $1" & + timeout 10 bash -c 'thread_drop t1' & + timeout 10 bash -c 'thread_drop t2' & + timeout 10 bash -c 'thread_rename t1 t2' & + timeout 10 bash -c 'thread_rename t2 t1' & + timeout 10 bash -c 'thread_select t1' & + timeout 10 bash -c 'thread_select t2' & + timeout 10 bash -c 'thread_insert t1 5' & + timeout 10 bash -c 'thread_insert t2 10' & + timeout 10 bash -c 'thread_insert_select t1 t2' & + timeout 10 bash -c 'thread_insert_select t2 t1' & + + wait + echo "Done $1" +} + +test_with_engine TinyLog +test_with_engine StripeLog +test_with_engine Log diff --git a/tests/queries/0_stateless/01505_log_distributed_deadlock.reference b/tests/queries/0_stateless/01505_log_distributed_deadlock.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01505_log_distributed_deadlock.sql b/tests/queries/0_stateless/01505_log_distributed_deadlock.sql new file mode 100644 index 00000000000..2b0b2b97188 --- /dev/null +++ b/tests/queries/0_stateless/01505_log_distributed_deadlock.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t_local; +DROP TABLE IF EXISTS t_dist; + +create table t_local(a int) engine Log; +create table t_dist (a int) engine Distributed(test_shard_localhost, currentDatabase(), 't_local', cityHash64(a)); + +set insert_distributed_sync = 1; + +insert into t_dist values (1); + +DROP TABLE t_local; +DROP TABLE t_dist; diff --git a/tests/queries/0_stateless/01516_create_table_primary_key.sql b/tests/queries/0_stateless/01516_create_table_primary_key.sql index 0611612f89f..c0e9ce4a34d 100644 --- a/tests/queries/0_stateless/01516_create_table_primary_key.sql +++ b/tests/queries/0_stateless/01516_create_table_primary_key.sql @@ -18,10 +18,6 @@ ATTACH TABLE primary_key_test(v Int32) ENGINE=ReplacingMergeTree ORDER BY v PRIM SELECT * FROM primary_key_test FINAL; DROP TABLE primary_key_test; -CREATE TABLE primary_key_test(v Int32, PRIMARY KEY(v), PRIMARY KEY(v)) ENGINE=ReplacingMergeTree ORDER BY v; -- { clientError 36; } - -CREATE TABLE primary_key_test(v Int32, PRIMARY KEY(v)) ENGINE=ReplacingMergeTree ORDER BY v PRIMARY KEY(v); -- { clientError 36; } - CREATE TABLE primary_key_test(v1 Int32, v2 Int32, PRIMARY KEY(v1, v2)) ENGINE=ReplacingMergeTree ORDER BY (v1, v2); INSERT INTO primary_key_test VALUES (1, 1), (1, 1), (1, 1); DETACH TABLE primary_key_test; @@ -36,10 +32,6 @@ ATTACH TABLE primary_key_test(v1 Int32, v2 Int32) ENGINE=ReplacingMergeTree ORDE SELECT * FROM primary_key_test FINAL; DROP TABLE primary_key_test; -CREATE TABLE primary_key_test(v1 Int32, v2 Int32, PRIMARY KEY(v1, v2), PRIMARY KEY(v1, v2)) ENGINE=ReplacingMergeTree ORDER BY (v1, v2); -- { clientError 36; } - -CREATE TABLE primary_key_test(v1 Int32, v2 Int32, PRIMARY KEY(v1, v2)) ENGINE=ReplacingMergeTree ORDER BY (v1, v2) PRIMARY KEY(v1, v2); -- { clientError 36; } - CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY v1; -- { serverError 36; } CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY (v1, gcd(v1, v2)); diff --git a/tests/queries/0_stateless/01554_bloom_filter_index_big_integer_uuid.reference b/tests/queries/0_stateless/01554_bloom_filter_index_big_integer_uuid.reference index 39beae69d65..f3b92461df7 100644 --- a/tests/queries/0_stateless/01554_bloom_filter_index_big_integer_uuid.reference +++ b/tests/queries/0_stateless/01554_bloom_filter_index_big_integer_uuid.reference @@ -7,6 +7,6 @@ 1 1 2 -00000000-0000-0001-0000-000000000000 -00000000-0000-0001-0000-000000000000 -00000000-0000-0002-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 diff --git a/tests/queries/0_stateless/01554_bloom_filter_index_big_integer_uuid.sql b/tests/queries/0_stateless/01554_bloom_filter_index_big_integer_uuid.sql index 265f931c556..3472f41092d 100644 --- a/tests/queries/0_stateless/01554_bloom_filter_index_big_integer_uuid.sql +++ b/tests/queries/0_stateless/01554_bloom_filter_index_big_integer_uuid.sql @@ -19,7 +19,7 @@ SELECT x FROM 01154_test WHERE x IN (1, 2); DROP TABLE 01154_test; CREATE TABLE 01154_test (x UUID, INDEX ix_x x TYPE bloom_filter(0.01) GRANULARITY 1) ENGINE = MergeTree() ORDER BY x SETTINGS index_granularity=8192; -INSERT INTO 01154_test VALUES (toUUID(1)), (toUUID(2)), (toUUID(3)); -SELECT x FROM 01154_test WHERE x = toUUID(1); -SELECT x FROM 01154_test WHERE x IN (toUUID(1), toUUID(2)); +INSERT INTO 01154_test VALUES (toUUID('00000000-0000-0000-0000-000000000001')), (toUUID('00000000-0000-0000-0000-000000000002')), (toUUID('00000000-0000-0000-0000-000000000003')); +SELECT x FROM 01154_test WHERE x = toUUID('00000000-0000-0000-0000-000000000001'); +SELECT x FROM 01154_test WHERE x IN (toUUID('00000000-0000-0000-0000-000000000001'), toUUID('00000000-0000-0000-0000-000000000002')); DROP TABLE 01154_test; diff --git a/tests/queries/0_stateless/01621_summap_check_types.reference b/tests/queries/0_stateless/01621_summap_check_types.reference new file mode 100644 index 00000000000..1aeb897d4d0 --- /dev/null +++ b/tests/queries/0_stateless/01621_summap_check_types.reference @@ -0,0 +1 @@ +([1,2],[1,2],[1,0]) diff --git a/tests/queries/0_stateless/01621_summap_check_types.sql b/tests/queries/0_stateless/01621_summap_check_types.sql new file mode 100644 index 00000000000..a950f3ea094 --- /dev/null +++ b/tests/queries/0_stateless/01621_summap_check_types.sql @@ -0,0 +1,5 @@ +select initializeAggregation('sumMap', [1, 2], [1, 2], [1, null]); + +CREATE TEMPORARY TABLE sum_map_overflow (events Array(UInt8), counts Array(UInt8)); +INSERT INTO sum_map_overflow VALUES ([1], [255]), ([1], [2]); +SELECT [NULL], sumMapWithOverflow(events, [NULL], [[(NULL)]], counts) FROM sum_map_overflow; -- { serverError 43 } diff --git a/tests/queries/0_stateless/01622_byte_size.reference b/tests/queries/0_stateless/01622_byte_size.reference new file mode 100644 index 00000000000..cae5bc94f9e --- /dev/null +++ b/tests/queries/0_stateless/01622_byte_size.reference @@ -0,0 +1,66 @@ + +# byteSize + +byteSize for numbers #0 +1 UInt8 1 UInt16 2 UInt32 4 UInt64 8 UInt256 32 +2 UInt8 1 UInt16 2 UInt32 4 UInt64 8 UInt256 32 +1 Int8 1 Int16 2 Int32 4 Int64 8 Int128 16 UInt256 32 +2 Int8 1 Int16 2 Int32 4 Int64 8 Int128 16 UInt256 32 +1 Float32 4 Float64 8 +2 Float32 4 Float64 8 + +byteSize for numbers #1 +1 97 Date 2 DateTime 4 DateTime64(3) 8 UUID 16 +2 97 Date 2 DateTime 4 DateTime64(3) 8 UUID 16 + +byteSize for constants +1 1 256 2 65536 4 4294967296 8 0.5 8 1e-10 8 +2020-01-01 2 2020-01-01 01:02:03 4 2020-01-01 01:02:03.000 8 +UUID 16 + +byteSize for strings +1 39 9 a 10 \0\0\0\0\0\0\0\0 8 abcde\0\0\0 8 +2 43 abced 14 9 abcde\0\0\0 8 \0\0\0\0\0\0\0\0 8 +constants: 9 a 10 abcde 14 + +byteSize for simple array +1 60 [] 8 [] 8 [] 8 [] 8 [] 8 [] 8 [] 8 +2 92 [1] 9 [-1] 9 [256] 12 [1.1] 12 [1.1000] 12 ['2020-01-01'] 10 ['61f0c404-5cb3-11e7-907b-a6006ad3dba0'] 24 +3 124 [1,1] 10 [-1,-1] 10 [256,256] 16 [1.1,1.1] 16 [1.1000,1.1000] 16 ['2020-01-01','2020-01-01'] 12 ['61f0c404-5cb3-11e7-907b-a6006ad3dba0','61f0c404-5cb3-11e7-907b-a6006ad3dba0'] 40 +4 156 [1,1,1] 11 [-1,-1,-1] 11 [256,256,256] 20 [1.1,1.1,1.1] 20 [1.1000,1.1000,1.1000] 20 ['2020-01-01','2020-01-01','2020-01-01'] 14 ['61f0c404-5cb3-11e7-907b-a6006ad3dba0','61f0c404-5cb3-11e7-907b-a6006ad3dba0','61f0c404-5cb3-11e7-907b-a6006ad3dba0'] 56 +constants: [] 8 [1,1] 10 [-1,-1] 10 Array(UInt16) 12 Array(Float64) 24 +constants: [1.1000,1.1000] 16 ['2020-01-01','2020-01-01'] 12 +constants: ['61f0c404-5cb3-11e7-907b-a6006ad3dba0','61f0c404-5cb3-11e7-907b-a6006ad3dba0'] 40 + +byteSize for int array of arrays +1 52 [] 8 [[]] 16 +2 102 [1,2] 16 [[],[1,2]] 32 +3 155 [0,256] 16 [[],[1,2],[0,256]] 48 +4 226 [256,65536] 16 [[],[1,2],[0,256],[256,65536]] 64 +constants: [[],[1,2],[0,65536]] Array(Array(UInt32)) 48 + +byteSize for string array of arrays +1 52 [] 8 [[]] 16 +2 102 [''] 17 [[],['']] 33 +3 155 ['','a'] 27 [[],[''],['','a']] 60 +4 226 ['','a','abced'] 41 [[],[''],['','a'],['','a','abced']] 101 +constants: [[],[''],['','a']] 60 + +byteSize for others: Nullable, Tuple, LowCardinality +1 42 \N 5 \N 10 (1,NULL) 14 9 +2 45 1 5 a 11 (1,'a') 15 a 10 +3 57 256 5 abcde 15 (256,'abcde') 19 abcde 14 +constants: \N 1 (65536,NULL) 5 (65536,'a') 15 +constants: abced LowCardinality(String) 14 + +byteSize for complex fields +1 12 [] 8 +2 25 [(NULL,[])] 21 +3 25 [('a\0\0\0',[])] 21 +4 45 [('a\0\0\0',[(NULL,'a')])] 41 +5 65 [('a\0\0\0',[(NULL,'a'),(NULL,'a')])] 61 +6 124 [(NULL,[]),('a\0\0\0',[]),('a\0\0\0',[(NULL,'a')]),('a\0\0\0',[(NULL,'a'),(NULL,'a')])] 120 +constants: (NULL,[]) 9 ('a\0\0\0',[]) 13 ('a\0\0\0',[(NULL,'a')]) 24 ('a\0\0\0',[(NULL,'a'),(NULL,'a')]) 35 +constants: [(NULL,[]),('a\0\0\0',[]),('a\0\0\0',[(NULL,'a')]),('a\0\0\0',[(NULL,'a'),(NULL,'a')])] +constants: Array(Tuple(Nullable(FixedString(4)), Array(Tuple(Nullable(Nothing), String)))) +constants: 93 diff --git a/tests/queries/0_stateless/01622_byte_size.sql b/tests/queries/0_stateless/01622_byte_size.sql new file mode 100644 index 00000000000..9f9de4e58e9 --- /dev/null +++ b/tests/queries/0_stateless/01622_byte_size.sql @@ -0,0 +1,205 @@ +-- +-- byteSize +-- +select ''; +select '# byteSize'; + +set allow_experimental_bigint_types = 1; + +-- numbers #0 -- +select ''; +select 'byteSize for numbers #0'; +drop table if exists test_byte_size_number0; +create table test_byte_size_number0 +( + key Int32, + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + u256 UInt256, + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + i128 Int128, + i256 Int256, + f32 Float32, + f64 Float64 +) engine MergeTree order by key; + +insert into test_byte_size_number0 values(1, 8, 16, 32, 64, 256, -8, -16, -32, -64, -128, -256, 32.32, 64.64); +insert into test_byte_size_number0 values(2, 8, 16, 32, 64, 256, -8, -16, -32, -64, -128, -256, 32.32, 64.64); + +select key, toTypeName(u8), byteSize(u8), toTypeName(u16), byteSize(u16), toTypeName(u32), byteSize(u32), toTypeName(u64), byteSize(u64), toTypeName(u256), byteSize(u256) from test_byte_size_number0 order by key; +select key, toTypeName(i8), byteSize(i8), toTypeName(i16), byteSize(i16), toTypeName(i32), byteSize(i32), toTypeName(i64), byteSize(i64), toTypeName(i128), byteSize(i128), toTypeName(u256), byteSize(u256) from test_byte_size_number0 order by key; +select key, toTypeName(f32), byteSize(f32), toTypeName(f64), byteSize(f64) from test_byte_size_number0 order by key; + +drop table if exists test_byte_size_number0; + + +-- numbers #1 -- +select ''; +select 'byteSize for numbers #1'; +drop table if exists test_byte_size_number1; +create table test_byte_size_number1 +( + key Int32, + date Date, + dt DateTime, + dt64 DateTime64(3), + en8 Enum8('a'=1, 'b'=2, 'c'=3, 'd'=4), + en16 Enum16('c'=100, 'l'=101, 'i'=102, 'ck'=103, 'h'=104, 'o'=105, 'u'=106, 's'=107, 'e'=108), + dec32 Decimal32(4), + dec64 Decimal64(8), + dec128 Decimal128(16), + dec256 Decimal256(16), + uuid UUID +) engine MergeTree order by key; + +insert into test_byte_size_number1 values(1, '2020-01-01', '2020-01-01 01:02:03', '2020-02-02 01:02:03', 'a', 'ck', 32.32, 64.64, 128.128, 256.256, generateUUIDv4()); +insert into test_byte_size_number1 values(2, '2020-01-01', '2020-01-01 01:02:03', '2020-02-02 01:02:03', 'a', 'ck', 32.32, 64.64, 128.128, 256.256, generateUUIDv4()); + +select key, byteSize(*), toTypeName(date), byteSize(date), toTypeName(dt), byteSize(dt), toTypeName(dt64), byteSize(dt64), toTypeName(uuid), byteSize(uuid) from test_byte_size_number1 order by key; + +drop table if exists test_byte_size_number1; + + +-- constant numbers -- +select ''; +select 'byteSize for constants'; +select 0x1, byteSize(0x1), 0x100, byteSize(0x100), 0x10000, byteSize(0x10000), 0x100000000, byteSize(0x100000000), 0.5, byteSize(0.5), 1e-10, byteSize(1e-10); +select toDate('2020-01-01'), byteSize(toDate('2020-01-01')), toDateTime('2020-01-01 01:02:03'), byteSize(toDateTime('2020-01-01 01:02:03')), toDateTime64('2020-01-01 01:02:03',3), byteSize(toDateTime64('2020-01-01 01:02:03',3)); +select toTypeName(generateUUIDv4()), byteSize(generateUUIDv4()); + + +-- strings -- +select ''; +select 'byteSize for strings'; +drop table if exists test_byte_size_string; +create table test_byte_size_string +( + key Int32, + str1 String, + str2 String, + fstr1 FixedString(8), + fstr2 FixedString(8) +) engine MergeTree order by key; + +insert into test_byte_size_string values(1, '', 'a', '', 'abcde'); +insert into test_byte_size_string values(2, 'abced', '', 'abcde', ''); + +select key, byteSize(*), str1, byteSize(str1), str2, byteSize(str2), fstr1, byteSize(fstr1), fstr2, byteSize(fstr2) from test_byte_size_string order by key; +select 'constants: ', '', byteSize(''), 'a', byteSize('a'), 'abcde', byteSize('abcde'); + +drop table if exists test_byte_size_string; + + +-- simple arrays -- +drop table if exists test_byte_size_array; +create table test_byte_size_array +( + key Int32, + uints8 Array(UInt8), + ints8 Array(Int8), + ints32 Array(Int32), + floats32 Array(Float32), + decs32 Array(Decimal32(4)), + dates Array(Date), + uuids Array(UUID) +) engine MergeTree order by key; + +insert into test_byte_size_array values(1, [], [], [], [], [], [], []); +insert into test_byte_size_array values(2, [1], [-1], [256], [1.1], [1.1], ['2020-01-01'], ['61f0c404-5cb3-11e7-907b-a6006ad3dba0']); +insert into test_byte_size_array values(3, [1,1], [-1,-1], [256,256], [1.1,1.1], [1.1,1.1], ['2020-01-01','2020-01-01'], ['61f0c404-5cb3-11e7-907b-a6006ad3dba0','61f0c404-5cb3-11e7-907b-a6006ad3dba0']); +insert into test_byte_size_array values(4, [1,1,1], [-1,-1,-1], [256,256,256], [1.1,1.1,1.1], [1.1,1.1,1.1], ['2020-01-01','2020-01-01','2020-01-01'], ['61f0c404-5cb3-11e7-907b-a6006ad3dba0','61f0c404-5cb3-11e7-907b-a6006ad3dba0','61f0c404-5cb3-11e7-907b-a6006ad3dba0']); + +select ''; +select 'byteSize for simple array'; +select key, byteSize(*), uints8, byteSize(uints8), ints8, byteSize(ints8), ints32, byteSize(ints32), floats32, byteSize(floats32), decs32, byteSize(decs32), dates, byteSize(dates), uuids, byteSize(uuids) from test_byte_size_array order by key; + +select 'constants:', [], byteSize([]), [1,1], byteSize([1,1]), [-1,-1], byteSize([-1,-1]), toTypeName([256,256]), byteSize([256,256]), toTypeName([1.1,1.1]), byteSize([1.1,1.1]); +select 'constants:', [toDecimal32(1.1,4),toDecimal32(1.1,4)], byteSize([toDecimal32(1.1,4),toDecimal32(1.1,4)]), [toDate('2020-01-01'),toDate('2020-01-01')], byteSize([toDate('2020-01-01'),toDate('2020-01-01')]); +select 'constants:', [toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'),toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0')], byteSize([toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'),toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0')]); + +drop table if exists test_byte_size_array; + + +-- complex arrays -- +drop table if exists test_byte_size_complex_array; +create table test_byte_size_complex_array +( + key Int32, + ints Array(Int32), + int_ints Array(Array(Int32)), + strs Array(String), + str_strs Array(Array(String)) +) engine MergeTree order by key; + +insert into test_byte_size_complex_array values(1, [], [[]], [], [[]]); +insert into test_byte_size_complex_array values(2, [1,2], [[], [1,2]], [''], [[], ['']]); +insert into test_byte_size_complex_array values(3, [0,256], [[], [1,2], [0,256]], ['','a'], [[], [''], ['','a']]); +insert into test_byte_size_complex_array values(4, [256,65536], [[], [1,2], [0,256], [256,65536]], ['','a','abced'], [[], [''], ['','a'], ['','a','abced']]); + +select ''; +select 'byteSize for int array of arrays'; +select key, byteSize(*), ints, byteSize(ints), int_ints, byteSize(int_ints) from test_byte_size_complex_array order by key; +select 'constants:', [[], [1,2], [0,0x10000]],toTypeName([[], [1,2], [0,0x10000]]), byteSize([[], [1,2], [0,0x10000]]); + +select ''; +select 'byteSize for string array of arrays'; +-- select key, byteSize(*), strs, byteSize(strs), str_strs, byteSize(str_strs) from test_byte_size_complex_array order by key; +select key, byteSize(*), strs, byteSize(strs), str_strs, byteSize(str_strs) from test_byte_size_complex_array order by key; +select 'constants:', [[], [''], ['','a']], byteSize([[], [''], ['','a']]); + +drop table if exists test_byte_size_complex_array; + + +-- others -- +drop table if exists test_byte_size_other; +create table test_byte_size_other +( + key Int32, + opt_int32 Nullable(Int32), + opt_str Nullable(String), + tuple Tuple(Int32, Nullable(String)), + strings LowCardinality(String) +) engine MergeTree order by key; + +insert into test_byte_size_other values(1, NULL, NULL, tuple(1, NULL), ''); +insert into test_byte_size_other values(2, 1, 'a', tuple(1, 'a'), 'a'); +insert into test_byte_size_other values(3, 256, 'abcde', tuple(256, 'abcde'), 'abcde'); + +select ''; +select 'byteSize for others: Nullable, Tuple, LowCardinality'; +select key, byteSize(*), opt_int32, byteSize(opt_int32), opt_str, byteSize(opt_str), tuple, byteSize(tuple), strings, byteSize(strings) from test_byte_size_other order by key; +select 'constants:', NULL, byteSize(NULL), tuple(0x10000, NULL), byteSize(tuple(0x10000, NULL)), tuple(0x10000, toNullable('a')), byteSize(tuple(0x10000, toNullable('a'))); +select 'constants:', toLowCardinality('abced'),toTypeName(toLowCardinality('abced')), byteSize(toLowCardinality('abced')); + +drop table if exists test_byte_size_other; + + +-- more complex fields -- +drop table if exists test_byte_size_more_complex; +create table test_byte_size_more_complex +( + key Int32, + complex1 Array(Tuple(Nullable(FixedString(4)), Array(Tuple(Nullable(String), String)))) +) engine MergeTree order by key; + +insert into test_byte_size_more_complex values(1, []); +insert into test_byte_size_more_complex values(2, [tuple(NULL, [])]); +insert into test_byte_size_more_complex values(3, [tuple('a', [])]); +insert into test_byte_size_more_complex values(4, [tuple('a', [tuple(NULL, 'a')])]); +insert into test_byte_size_more_complex values(5, [tuple('a', [tuple(NULL, 'a'), tuple(NULL, 'a')])]); +insert into test_byte_size_more_complex values(6, [tuple(NULL, []), tuple('a', []), tuple('a', [tuple(NULL, 'a')]), tuple('a', [tuple(NULL, 'a'), tuple(NULL, 'a')])]); + +select ''; +select 'byteSize for complex fields'; +select key, byteSize(*), complex1, byteSize(complex1) from test_byte_size_more_complex order by key; +select 'constants:', tuple(NULL, []), byteSize(tuple(NULL, [])), tuple(toNullable(toFixedString('a',4)), []), byteSize(tuple(toNullable(toFixedString('a',4)), [])), tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a')]), byteSize(tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a')])), tuple(toFixedString('a',4), [tuple(NULL, 'a'), tuple(NULL, 'a')]), byteSize(tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a'), tuple(NULL, 'a')])); +select 'constants:', [tuple(NULL, []), tuple(toNullable(toFixedString('a',4)), []), tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a')]), tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a'), tuple(NULL, 'a')])]; +select 'constants:', toTypeName([tuple(NULL, []), tuple(toNullable(toFixedString('a',4)), []), tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a')]), tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a'), tuple(NULL, 'a')])]); +select 'constants:', byteSize([tuple(NULL, []), tuple(toNullable(toFixedString('a',4)), []), tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a')]), tuple(toNullable(toFixedString('a',4)), [tuple(NULL, 'a'), tuple(NULL, 'a')])]); + +drop table if exists test_byte_size_more_complex; diff --git a/tests/queries/0_stateless/01623_byte_size_const.reference b/tests/queries/0_stateless/01623_byte_size_const.reference new file mode 100644 index 00000000000..905722bdeb9 --- /dev/null +++ b/tests/queries/0_stateless/01623_byte_size_const.reference @@ -0,0 +1,4 @@ +9 1 +16 1 +16 1 +0 diff --git a/tests/queries/0_stateless/01623_byte_size_const.sql b/tests/queries/0_stateless/01623_byte_size_const.sql new file mode 100644 index 00000000000..584f67be80d --- /dev/null +++ b/tests/queries/0_stateless/01623_byte_size_const.sql @@ -0,0 +1,3 @@ +SELECT byteSize(123, 456.7) AS x, isConstant(x); +SELECT byteSize(number, number + 1) AS x, isConstant(x) FROM numbers(2); +SELECT byteSize(); diff --git a/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_aggregating_merge_tree.reference b/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_aggregating_merge_tree.reference new file mode 100644 index 00000000000..c780ad9e04a --- /dev/null +++ b/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_aggregating_merge_tree.reference @@ -0,0 +1,3 @@ +8746326176292337648 +---mutation--- +11916226932045201400 diff --git a/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_aggregating_merge_tree.sql b/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_aggregating_merge_tree.sql new file mode 100644 index 00000000000..a61a10f9849 --- /dev/null +++ b/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_aggregating_merge_tree.sql @@ -0,0 +1,219 @@ +DROP TABLE IF EXISTS simple_agf_summing_mt; + +CREATE TABLE simple_agf_summing_mt +( + a Int64, + min_aggreg AggregateFunction(min, UInt64), + min_simple SimpleAggregateFunction(min, UInt64), + max_aggreg AggregateFunction(max, UInt64), + max_simple SimpleAggregateFunction(max, UInt64), + sum_aggreg AggregateFunction(sum, UInt64), + sum_simple SimpleAggregateFunction(sum, UInt64), + sumov_aggreg AggregateFunction(sumWithOverflow, UInt64), + sumov_simple SimpleAggregateFunction(sumWithOverflow, UInt64), + gbitand_aggreg AggregateFunction(groupBitAnd, UInt64), + gbitand_simple SimpleAggregateFunction(groupBitAnd, UInt64), + gbitor_aggreg AggregateFunction(groupBitOr, UInt64), + gbitor_simple SimpleAggregateFunction(groupBitOr, UInt64), + gbitxor_aggreg AggregateFunction(groupBitXor, UInt64), + gbitxor_simple SimpleAggregateFunction(groupBitXor, UInt64), + gra_aggreg AggregateFunction(groupArrayArray, Array(UInt64)), + gra_simple SimpleAggregateFunction(groupArrayArray, Array(UInt64)), + grp_aggreg AggregateFunction(groupUniqArrayArray, Array(UInt64)), + grp_simple SimpleAggregateFunction(groupUniqArrayArray, Array(UInt64)), + aggreg_map AggregateFunction(sumMap, Tuple(Array(String), Array(UInt64))), + simple_map SimpleAggregateFunction(sumMap, Tuple(Array(String), Array(UInt64))), + aggreg_map_min AggregateFunction(minMap, Tuple(Array(String), Array(UInt64))), + simple_map_min SimpleAggregateFunction(minMap, Tuple(Array(String), Array(UInt64))), + aggreg_map_max AggregateFunction(maxMap, Tuple(Array(String), Array(UInt64))), + simple_map_max SimpleAggregateFunction(maxMap, Tuple(Array(String), Array(UInt64))) +) +ENGINE = SummingMergeTree +ORDER BY a; + +INSERT INTO simple_agf_summing_mt SELECT + number % 51 AS a, + minState(number), + min(number), + maxState(number), + max(number), + sumState(number), + sum(number), + sumWithOverflowState(number), + sumWithOverflow(number), + groupBitAndState(number + 111111111), + groupBitAnd(number + 111111111), + groupBitOrState(number + 111111111), + groupBitOr(number + 111111111), + groupBitXorState(number + 111111111), + groupBitXor(number + 111111111), + groupArrayArrayState([toUInt64(number % 1000)]), + groupArrayArray([toUInt64(number % 1000)]), + groupUniqArrayArrayState([toUInt64(number % 500)]), + groupUniqArrayArray([toUInt64(number % 500)]), + sumMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) +FROM numbers(10000) +GROUP BY a; + +INSERT INTO simple_agf_summing_mt SELECT + number % 1151 AS a, + minState(number), + min(number), + maxState(number), + max(number), + sumState(number), + sum(number), + sumWithOverflowState(number), + sumWithOverflow(number), + groupBitAndState(number + 111111111), + groupBitAnd(number + 111111111), + groupBitOrState(number + 111111111), + groupBitOr(number + 111111111), + groupBitXorState(number + 111111111), + groupBitXor(number + 111111111), + groupArrayArrayState([toUInt64(number % 1000)]), + groupArrayArray([toUInt64(number % 1000)]), + groupUniqArrayArrayState([toUInt64(number % 500)]), + groupUniqArrayArray([toUInt64(number % 500)]), + sumMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) +FROM numbers(1000000) +GROUP BY a; + +OPTIMIZE TABLE simple_agf_summing_mt FINAL; + +SELECT cityHash64(groupArray(cityHash64(*))) FROM ( + SELECT + a % 31 AS g, + minMerge(min_aggreg) AS minagg, + min(min_simple) AS mins, + minagg = mins AS M, + maxMerge(max_aggreg) AS maxagg, + max(max_simple) AS maxs, + maxagg = maxs AS MX, + sumMerge(sum_aggreg) AS sumagg, + sum(sum_simple) AS sums, + sumagg = sums AS S, + sumWithOverflowMerge(sumov_aggreg) AS sumaggov, + sumWithOverflow(sumov_simple) AS sumsov, + sumaggov = sumsov AS SO, + groupBitAndMerge(gbitand_aggreg) AS gbitandaggreg, + groupBitAnd(gbitand_simple) AS gbitandsimple, + gbitandaggreg = gbitandsimple AS BIT_AND, + groupBitOrMerge(gbitor_aggreg) AS gbitoraggreg, + groupBitOr(gbitor_simple) AS gbitorsimple, + gbitoraggreg = gbitorsimple AS BIT_OR, + groupBitXorMerge(gbitxor_aggreg) AS gbitxoraggreg, + groupBitXor(gbitxor_simple) AS gbitxorsimple, + gbitxoraggreg = gbitxorsimple AS BITXOR, + arraySort(groupArrayArrayMerge(gra_aggreg)) AS graa, + arraySort(groupArrayArray(gra_simple)) AS gras, + graa = gras AS GAA, + arraySort(groupUniqArrayArrayMerge(grp_aggreg)) AS gra, + arraySort(groupUniqArrayArray(grp_simple)) AS grs, + gra = grs AS T, + sumMapMerge(aggreg_map) AS smmapagg, + sumMap(simple_map) AS smmaps, + smmapagg = smmaps AS SM, + minMapMerge(aggreg_map_min) AS minmapapagg, + minMap(simple_map_min) AS minmaps, + minmapapagg = minmaps AS SMIN, + maxMapMerge(aggreg_map_max) AS maxmapapagg, + maxMap(simple_map_max) AS maxmaps, + maxmapapagg = maxmaps AS SMAX + FROM simple_agf_summing_mt + GROUP BY g + ORDER BY g +); + +SELECT '---mutation---'; + +ALTER TABLE simple_agf_summing_mt + DELETE WHERE (a % 3) = 0 +SETTINGS mutations_sync = 1; + +INSERT INTO simple_agf_summing_mt SELECT + number % 11151 AS a, + minState(number), + min(number), + maxState(number), + max(number), + sumState(number), + sum(number), + sumWithOverflowState(number), + sumWithOverflow(number), + groupBitAndState((number % 3) + 111111110), + groupBitAnd((number % 3) + 111111110), + groupBitOrState(number + 111111111), + groupBitOr(number + 111111111), + groupBitXorState(number + 111111111), + groupBitXor(number + 111111111), + groupArrayArrayState([toUInt64(number % 100)]), + groupArrayArray([toUInt64(number % 100)]), + groupUniqArrayArrayState([toUInt64(number % 50)]), + groupUniqArrayArray([toUInt64(number % 50)]), + sumMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) +FROM numbers(1000000) +GROUP BY a; + +OPTIMIZE TABLE simple_agf_summing_mt FINAL; + +SELECT cityHash64(groupArray(cityHash64(*))) FROM ( + SELECT + a % 31 AS g, + minMerge(min_aggreg) AS minagg, + min(min_simple) AS mins, + minagg = mins AS M, + maxMerge(max_aggreg) AS maxagg, + max(max_simple) AS maxs, + maxagg = maxs AS MX, + sumMerge(sum_aggreg) AS sumagg, + sum(sum_simple) AS sums, + sumagg = sums AS S, + sumWithOverflowMerge(sumov_aggreg) AS sumaggov, + sumWithOverflow(sumov_simple) AS sumsov, + sumaggov = sumsov AS SO, + groupBitAndMerge(gbitand_aggreg) AS gbitandaggreg, + groupBitAnd(gbitand_simple) AS gbitandsimple, + gbitandaggreg = gbitandsimple AS BIT_AND, + groupBitOrMerge(gbitor_aggreg) AS gbitoraggreg, + groupBitOr(gbitor_simple) AS gbitorsimple, + gbitoraggreg = gbitorsimple AS BIT_OR, + groupBitXorMerge(gbitxor_aggreg) AS gbitxoraggreg, + groupBitXor(gbitxor_simple) AS gbitxorsimple, + gbitxoraggreg = gbitxorsimple AS BITXOR, + arraySort(groupArrayArrayMerge(gra_aggreg)) AS graa, + arraySort(groupArrayArray(gra_simple)) AS gras, + graa = gras AS GAA, + arraySort(groupUniqArrayArrayMerge(grp_aggreg)) AS gra, + arraySort(groupUniqArrayArray(grp_simple)) AS grs, + gra = grs AS T, + sumMapMerge(aggreg_map) AS smmapagg, + sumMap(simple_map) AS smmaps, + smmapagg = smmaps AS SM, + minMapMerge(aggreg_map_min) AS minmapapagg, + minMap(simple_map_min) AS minmaps, + minmapapagg = minmaps AS SMIN, + maxMapMerge(aggreg_map_max) AS maxmapapagg, + maxMap(simple_map_max) AS maxmaps, + maxmapapagg = maxmaps AS SMAX + FROM simple_agf_summing_mt + GROUP BY g + ORDER BY g +); + +DROP TABLE simple_agf_summing_mt; diff --git a/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_summing_merge_tree.reference b/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_summing_merge_tree.reference new file mode 100644 index 00000000000..c780ad9e04a --- /dev/null +++ b/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_summing_merge_tree.reference @@ -0,0 +1,3 @@ +8746326176292337648 +---mutation--- +11916226932045201400 diff --git a/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_summing_merge_tree.sql b/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_summing_merge_tree.sql new file mode 100644 index 00000000000..c29d64a7f01 --- /dev/null +++ b/tests/queries/0_stateless/01630_simple_aggregate_all_functions_in_summing_merge_tree.sql @@ -0,0 +1,219 @@ +DROP TABLE IF EXISTS simple_agf_aggregating_mt; + +CREATE TABLE simple_agf_aggregating_mt +( + a Int64, + min_aggreg AggregateFunction(min, UInt64), + min_simple SimpleAggregateFunction(min, UInt64), + max_aggreg AggregateFunction(max, UInt64), + max_simple SimpleAggregateFunction(max, UInt64), + sum_aggreg AggregateFunction(sum, UInt64), + sum_simple SimpleAggregateFunction(sum, UInt64), + sumov_aggreg AggregateFunction(sumWithOverflow, UInt64), + sumov_simple SimpleAggregateFunction(sumWithOverflow, UInt64), + gbitand_aggreg AggregateFunction(groupBitAnd, UInt64), + gbitand_simple SimpleAggregateFunction(groupBitAnd, UInt64), + gbitor_aggreg AggregateFunction(groupBitOr, UInt64), + gbitor_simple SimpleAggregateFunction(groupBitOr, UInt64), + gbitxor_aggreg AggregateFunction(groupBitXor, UInt64), + gbitxor_simple SimpleAggregateFunction(groupBitXor, UInt64), + gra_aggreg AggregateFunction(groupArrayArray, Array(UInt64)), + gra_simple SimpleAggregateFunction(groupArrayArray, Array(UInt64)), + grp_aggreg AggregateFunction(groupUniqArrayArray, Array(UInt64)), + grp_simple SimpleAggregateFunction(groupUniqArrayArray, Array(UInt64)), + aggreg_map AggregateFunction(sumMap, Tuple(Array(String), Array(UInt64))), + simple_map SimpleAggregateFunction(sumMap, Tuple(Array(String), Array(UInt64))), + aggreg_map_min AggregateFunction(minMap, Tuple(Array(String), Array(UInt64))), + simple_map_min SimpleAggregateFunction(minMap, Tuple(Array(String), Array(UInt64))), + aggreg_map_max AggregateFunction(maxMap, Tuple(Array(String), Array(UInt64))), + simple_map_max SimpleAggregateFunction(maxMap, Tuple(Array(String), Array(UInt64))) +) +ENGINE = AggregatingMergeTree +ORDER BY a; + +INSERT INTO simple_agf_aggregating_mt SELECT + number % 51 AS a, + minState(number), + min(number), + maxState(number), + max(number), + sumState(number), + sum(number), + sumWithOverflowState(number), + sumWithOverflow(number), + groupBitAndState(number + 111111111), + groupBitAnd(number + 111111111), + groupBitOrState(number + 111111111), + groupBitOr(number + 111111111), + groupBitXorState(number + 111111111), + groupBitXor(number + 111111111), + groupArrayArrayState([toUInt64(number % 1000)]), + groupArrayArray([toUInt64(number % 1000)]), + groupUniqArrayArrayState([toUInt64(number % 500)]), + groupUniqArrayArray([toUInt64(number % 500)]), + sumMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) +FROM numbers(10000) +GROUP BY a; + +INSERT INTO simple_agf_aggregating_mt SELECT + number % 1151 AS a, + minState(number), + min(number), + maxState(number), + max(number), + sumState(number), + sum(number), + sumWithOverflowState(number), + sumWithOverflow(number), + groupBitAndState(number + 111111111), + groupBitAnd(number + 111111111), + groupBitOrState(number + 111111111), + groupBitOr(number + 111111111), + groupBitXorState(number + 111111111), + groupBitXor(number + 111111111), + groupArrayArrayState([toUInt64(number % 1000)]), + groupArrayArray([toUInt64(number % 1000)]), + groupUniqArrayArrayState([toUInt64(number % 500)]), + groupUniqArrayArray([toUInt64(number % 500)]), + sumMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) +FROM numbers(1000000) +GROUP BY a; + +OPTIMIZE TABLE simple_agf_aggregating_mt FINAL; + +SELECT cityHash64(groupArray(cityHash64(*))) FROM ( + SELECT + a % 31 AS g, + minMerge(min_aggreg) AS minagg, + min(min_simple) AS mins, + minagg = mins AS M, + maxMerge(max_aggreg) AS maxagg, + max(max_simple) AS maxs, + maxagg = maxs AS MX, + sumMerge(sum_aggreg) AS sumagg, + sum(sum_simple) AS sums, + sumagg = sums AS S, + sumWithOverflowMerge(sumov_aggreg) AS sumaggov, + sumWithOverflow(sumov_simple) AS sumsov, + sumaggov = sumsov AS SO, + groupBitAndMerge(gbitand_aggreg) AS gbitandaggreg, + groupBitAnd(gbitand_simple) AS gbitandsimple, + gbitandaggreg = gbitandsimple AS BIT_AND, + groupBitOrMerge(gbitor_aggreg) AS gbitoraggreg, + groupBitOr(gbitor_simple) AS gbitorsimple, + gbitoraggreg = gbitorsimple AS BIT_OR, + groupBitXorMerge(gbitxor_aggreg) AS gbitxoraggreg, + groupBitXor(gbitxor_simple) AS gbitxorsimple, + gbitxoraggreg = gbitxorsimple AS BITXOR, + arraySort(groupArrayArrayMerge(gra_aggreg)) AS graa, + arraySort(groupArrayArray(gra_simple)) AS gras, + graa = gras AS GAA, + arraySort(groupUniqArrayArrayMerge(grp_aggreg)) AS gra, + arraySort(groupUniqArrayArray(grp_simple)) AS grs, + gra = grs AS T, + sumMapMerge(aggreg_map) AS smmapagg, + sumMap(simple_map) AS smmaps, + smmapagg = smmaps AS SM, + minMapMerge(aggreg_map_min) AS minmapapagg, + minMap(simple_map_min) AS minmaps, + minmapapagg = minmaps AS SMIN, + maxMapMerge(aggreg_map_max) AS maxmapapagg, + maxMap(simple_map_max) AS maxmaps, + maxmapapagg = maxmaps AS SMAX + FROM simple_agf_aggregating_mt + GROUP BY g + ORDER BY g +); + +SELECT '---mutation---'; + +ALTER TABLE simple_agf_aggregating_mt + DELETE WHERE (a % 3) = 0 +SETTINGS mutations_sync = 1; + +INSERT INTO simple_agf_aggregating_mt SELECT + number % 11151 AS a, + minState(number), + min(number), + maxState(number), + max(number), + sumState(number), + sum(number), + sumWithOverflowState(number), + sumWithOverflow(number), + groupBitAndState((number % 3) + 111111110), + groupBitAnd((number % 3) + 111111110), + groupBitOrState(number + 111111111), + groupBitOr(number + 111111111), + groupBitXorState(number + 111111111), + groupBitXor(number + 111111111), + groupArrayArrayState([toUInt64(number % 100)]), + groupArrayArray([toUInt64(number % 100)]), + groupUniqArrayArrayState([toUInt64(number % 50)]), + groupUniqArrayArray([toUInt64(number % 50)]), + sumMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + minMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMapState((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))), + maxMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) +FROM numbers(1000000) +GROUP BY a; + +OPTIMIZE TABLE simple_agf_aggregating_mt FINAL; + +SELECT cityHash64(groupArray(cityHash64(*))) FROM ( +SELECT + a % 31 AS g, + minMerge(min_aggreg) AS minagg, + min(min_simple) AS mins, + minagg = mins AS M, + maxMerge(max_aggreg) AS maxagg, + max(max_simple) AS maxs, + maxagg = maxs AS MX, + sumMerge(sum_aggreg) AS sumagg, + sum(sum_simple) AS sums, + sumagg = sums AS S, + sumWithOverflowMerge(sumov_aggreg) AS sumaggov, + sumWithOverflow(sumov_simple) AS sumsov, + sumaggov = sumsov AS SO, + groupBitAndMerge(gbitand_aggreg) AS gbitandaggreg, + groupBitAnd(gbitand_simple) AS gbitandsimple, + gbitandaggreg = gbitandsimple AS BIT_AND, + groupBitOrMerge(gbitor_aggreg) AS gbitoraggreg, + groupBitOr(gbitor_simple) AS gbitorsimple, + gbitoraggreg = gbitorsimple AS BIT_OR, + groupBitXorMerge(gbitxor_aggreg) AS gbitxoraggreg, + groupBitXor(gbitxor_simple) AS gbitxorsimple, + gbitxoraggreg = gbitxorsimple AS BITXOR, + arraySort(groupArrayArrayMerge(gra_aggreg)) AS graa, + arraySort(groupArrayArray(gra_simple)) AS gras, + graa = gras AS GAA, + arraySort(groupUniqArrayArrayMerge(grp_aggreg)) AS gra, + arraySort(groupUniqArrayArray(grp_simple)) AS grs, + gra = grs AS T, + sumMapMerge(aggreg_map) AS smmapagg, + sumMap(simple_map) AS smmaps, + smmapagg = smmaps AS SM, + minMapMerge(aggreg_map_min) AS minmapapagg, + minMap(simple_map_min) AS minmaps, + minmapapagg = minmaps AS SMIN, + maxMapMerge(aggreg_map_max) AS maxmapapagg, + maxMap(simple_map_max) AS maxmaps, + maxmapapagg = maxmaps AS SMAX + FROM simple_agf_aggregating_mt + GROUP BY g + ORDER BY g +); + +DROP TABLE simple_agf_aggregating_mt; diff --git a/tests/queries/0_stateless/01632_group_array_msan.reference b/tests/queries/0_stateless/01632_group_array_msan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01632_group_array_msan.sql b/tests/queries/0_stateless/01632_group_array_msan.sql new file mode 100644 index 00000000000..0000f158d4e --- /dev/null +++ b/tests/queries/0_stateless/01632_group_array_msan.sql @@ -0,0 +1 @@ +SELECT groupArrayMerge(1048577)(y * 1048576) FROM (SELECT groupArrayState(9223372036854775807)(x) AS y FROM (SELECT 1048576 AS x)) FORMAT Null; diff --git a/tests/queries/0_stateless/01632_max_partitions_to_read.reference b/tests/queries/0_stateless/01632_max_partitions_to_read.reference new file mode 100644 index 00000000000..ea2526e1301 --- /dev/null +++ b/tests/queries/0_stateless/01632_max_partitions_to_read.reference @@ -0,0 +1,6 @@ +2021-01-01 1 2 +2021-01-02 4 5 +2021-01-01 1 2 +2021-01-02 4 5 +2021-01-01 1 2 +2021-01-02 4 5 diff --git a/tests/queries/0_stateless/01632_max_partitions_to_read.sql b/tests/queries/0_stateless/01632_max_partitions_to_read.sql new file mode 100644 index 00000000000..b91405569bc --- /dev/null +++ b/tests/queries/0_stateless/01632_max_partitions_to_read.sql @@ -0,0 +1,17 @@ +drop table if exists p; + +create table p(d Date, i int, j int) engine MergeTree partition by d order by i settings max_partitions_to_read = 1; + +insert into p values ('2021-01-01', 1, 2), ('2021-01-02', 4, 5); + +select * from p order by i; -- { serverError 565 } + +select * from p order by i settings max_partitions_to_read = 2; + +select * from p order by i settings max_partitions_to_read = 0; -- unlimited + +alter table p modify setting max_partitions_to_read = 2; + +select * from p order by i; + +drop table if exists p; diff --git a/tests/queries/0_stateless/01632_tinylog_read_write.reference b/tests/queries/0_stateless/01632_tinylog_read_write.reference new file mode 100644 index 00000000000..a965a70ed4e --- /dev/null +++ b/tests/queries/0_stateless/01632_tinylog_read_write.reference @@ -0,0 +1 @@ +Done diff --git a/tests/queries/0_stateless/01632_tinylog_read_write.sh b/tests/queries/0_stateless/01632_tinylog_read_write.sh new file mode 100755 index 00000000000..3f41bcc5924 --- /dev/null +++ b/tests/queries/0_stateless/01632_tinylog_read_write.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT --multiquery --query "DROP TABLE IF EXISTS test; CREATE TABLE IF NOT EXISTS test (x UInt64, s Array(Nullable(String))) ENGINE = TinyLog;" + +function thread_select { + while true; do + $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" + sleep 0.0$RANDOM + done +} + +function thread_insert { + while true; do + $CLICKHOUSE_CLIENT --query "INSERT INTO test VALUES (1, ['Hello'])" + sleep 0.0$RANDOM + done +} + +export -f thread_select +export -f thread_insert + + +# Do randomized queries and expect nothing extraordinary happens. + +timeout 10 bash -c 'thread_select' & +timeout 10 bash -c 'thread_select' & +timeout 10 bash -c 'thread_select' & +timeout 10 bash -c 'thread_select' & + +timeout 10 bash -c 'thread_insert' & +timeout 10 bash -c 'thread_insert' & +timeout 10 bash -c 'thread_insert' & +timeout 10 bash -c 'thread_insert' & + +wait +echo "Done" + +$CLICKHOUSE_CLIENT --multiquery --query "DROP TABLE IF EXISTS test;" diff --git a/tests/queries/0_stateless/01633_limit_fuzz.reference b/tests/queries/0_stateless/01633_limit_fuzz.reference new file mode 100644 index 00000000000..207bbdca0ba --- /dev/null +++ b/tests/queries/0_stateless/01633_limit_fuzz.reference @@ -0,0 +1 @@ +(1025,1),(1026,1),(1027,1),(1028,1),(1029,1),(1030,1),(1031,1),(1032,1),(1033,1),(1034,1),(1035,1),(1036,1),(1037,1),(1038,1),(1039,1),(1040,1),(1041,1),(1042,1),(1043,1),(1044,1),(1045,1),(1046,1),(1047,1),(1048,1),(1049,1),(1050,1),(1051,1),(1052,1),(1053,1),(1054,1),(1055,1),(1056,1),(1057,1),(1058,1),(1059,1),(1060,1),(1061,1),(1062,1),(1063,1),(1064,1),(1065,1),(1066,1),(1067,1),(1068,1),(1069,1),(1070,1),(1071,1),(1072,1),(1073,1),(1074,1),(1075,1),(1076,1),(1077,1),(1078,1),(1079,1),(1080,1),(1081,1),(1082,1),(1083,1),(1084,1),(1085,1),(1086,1),(1087,1),(1088,1),(1089,1),(1090,1),(1091,1),(1092,1),(1093,1),(1094,1),(1095,1),(1096,1),(1097,1),(1098,1),(1099,1),(1100,1),(1101,1),(1102,1),(1103,1),(1104,1),(1105,1),(1106,1),(1107,1),(1108,1),(1109,1),(1110,1),(1111,1),(1112,1),(1113,1),(1114,1),(1115,1),(1116,1),(1117,1),(1118,1),(1119,1),(1120,1),(1121,1),(1122,1),(1123,1),(1124,1),(1125,1),(1126,1),(1127,1),(1128,1),(1129,1),(1130,1),(1131,1),(1132,1),(1133,1),(1134,1),(1135,1),(1136,1),(1137,1),(1138,1),(1139,1),(1140,1),(1141,1),(1142,1),(1143,1),(1144,1),(1145,1),(1146,1),(1147,1),(1148,1),(1149,1),(1150,1),(1151,1),(1152,1),(1153,1),(1154,1),(1155,1),(1156,1),(1157,1),(1158,1),(1159,1),(1160,1),(1161,1),(1162,1),(1163,1),(1164,1),(1165,1),(1166,1),(1167,1),(1168,1),(1169,1),(1170,1),(1171,1),(1172,1),(1173,1),(1174,1),(1175,1),(1176,1),(1177,1),(1178,1),(1179,1),(1180,1),(1181,1),(1182,1),(1183,1),(1184,1),(1185,1),(1186,1),(1187,1),(1188,1),(1189,1),(1190,1),(1191,1),(1192,1),(1193,1),(1194,1),(1195,1),(1196,1),(1197,1),(1198,1),(1199,1),(1200,1),(1201,1),(1202,1),(1203,1),(1204,1),(1205,1),(1206,1),(1207,1),(1208,1),(1209,1),(1210,1),(1211,1),(1212,1),(1213,1),(1214,1),(1215,1),(1216,1),(1217,1),(1218,1),(1219,1),(1220,1),(1221,1),(1222,1),(1223,1),(1224,1),(1225,1),(1226,1),(1227,1),(1228,1),(1229,1),(1230,1),(1231,1),(1232,1),(1233,1),(1234,1),(1235,1),(1236,1),(1237,1),(1238,1),(1239,1),(1240,1),(1241,1),(1242,1),(1243,1),(1244,1),(1245,1),(1246,1),(1247,1),(1248,1),(1249,1),(1250,1),(1251,1),(1252,1),(1253,1),(1254,1),(1255,1),(1256,1),(1257,1),(1258,1),(1259,1),(1260,1),(1261,1),(1262,1),(1263,1),(1264,1),(1265,1),(1266,1),(1267,1),(1268,1),(1269,1),(1270,1),(1271,1),(1272,1),(1273,1),(1274,1),(1275,1),(1276,1),(1277,1),(1278,1),(1279,1),(1280,1),(1281,1),(1282,1),(1283,1),(1284,1),(1285,1),(1286,1),(1287,1),(1288,1),(1289,1),(1290,1),(1291,1),(1292,1),(1293,1),(1294,1),(1295,1),(1296,1),(1297,1),(1298,1),(1299,1),(1300,1),(1301,1),(1302,1),(1303,1),(1304,1),(1305,1),(1306,1),(1307,1),(1308,1),(1309,1),(1310,1),(1311,1),(1312,1),(1313,1),(1314,1),(1315,1),(1316,1),(1317,1),(1318,1),(1319,1),(1320,1),(1321,1),(1322,1),(1323,1),(1324,1),(1325,1),(1326,1),(1327,1),(1328,1),(1329,1),(1330,1),(1331,1),(1332,1),(1333,1),(1334,1),(1335,1),(1336,1),(1337,1),(1338,1),(1339,1),(1340,1),(1341,1),(1342,1),(1343,1),(1344,1),(1345,1),(1346,1),(1347,1),(1348,1),(1349,1),(1350,1),(1351,1),(1352,1),(1353,1),(1354,1),(1355,1),(1356,1),(1357,1),(1358,1),(1359,1),(1360,1),(1361,1),(1362,1),(1363,1),(1364,1),(1365,1),(1366,1),(1367,1),(1368,1),(1369,1),(1370,1),(1371,1),(1372,1),(1373,1),(1374,1),(1375,1),(1376,1),(1377,1),(1378,1),(1379,1),(1380,1),(1381,1),(1382,1),(1383,1),(1384,1),(1385,1),(1386,1),(1387,1),(1388,1),(1389,1),(1390,1),(1391,1),(1392,1),(1393,1),(1394,1),(1395,1),(1396,1),(1397,1),(1398,1),(1399,1),(1400,1),(1401,1),(1402,1),(1403,1),(1404,1),(1405,1),(1406,1),(1407,1),(1408,1),(1409,1),(1410,1),(1411,1),(1412,1),(1413,1),(1414,1),(1415,1),(1416,1),(1417,1),(1418,1),(1419,1),(1420,1),(1421,1),(1422,1),(1423,1),(1424,1),(1425,1),(1426,1),(1427,1),(1428,1),(1429,1),(1430,1),(1431,1),(1432,1),(1433,1),(1434,1),(1435,1),(1436,1),(1437,1),(1438,1),(1439,1),(1440,1),(1441,1),(1442,1),(1443,1),(1444,1),(1445,1),(1446,1),(1447,1),(1448,1),(1449,1),(1450,1),(1451,1),(1452,1),(1453,1),(1454,1),(1455,1),(1456,1),(1457,1),(1458,1),(1459,1),(1460,1),(1461,1),(1462,1),(1463,1),(1464,1),(1465,1),(1466,1),(1467,1),(1468,1),(1469,1),(1470,1),(1471,1),(1472,1),(1473,1),(1474,1),(1475,1),(1476,1),(1477,1),(1478,1),(1479,1),(1480,1),(1481,1),(1482,1),(1483,1),(1484,1),(1485,1),(1486,1),(1487,1),(1488,1),(1489,1),(1490,1),(1491,1),(1492,1),(1493,1),(1494,1),(1495,1),(1496,1),(1497,1),(1498,1),(1499,1),(1500,1),(1501,1),(1502,1),(1503,1),(1504,1),(1505,1),(1506,1),(1507,1),(1508,1),(1509,1),(1510,1),(1511,1),(1512,1),(1513,1),(1514,1),(1515,1),(1516,1),(1517,1),(1518,1),(1519,1),(1520,1),(1521,1),(1522,1),(1523,1),(1524,1),(1525,1),(1526,1),(1527,1),(1528,1),(1529,1),(1530,1),(1531,1),(1532,1),(1533,1),(1534,1),(1535,1),(1536,1),(1537,1),(1538,1),(1539,1),(1540,1),(1541,1),(1542,1),(1543,1),(1544,1),(1545,1),(1546,1),(1547,1),(1548,1),(1549,1),(1550,1),(1551,1),(1552,1),(1553,1),(1554,1),(1555,1),(1556,1),(1557,1),(1558,1),(1559,1),(1560,1),(1561,1),(1562,1),(1563,1),(1564,1),(1565,1),(1566,1),(1567,1),(1568,1),(1569,1),(1570,1),(1571,1),(1572,1),(1573,1),(1574,1),(1575,1),(1576,1),(1577,1),(1578,1),(1579,1),(1580,1),(1581,1),(1582,1),(1583,1),(1584,1),(1585,1),(1586,1),(1587,1),(1588,1),(1589,1),(1590,1),(1591,1),(1592,1),(1593,1),(1594,1),(1595,1),(1596,1),(1597,1),(1598,1),(1599,1),(1600,1),(1601,1),(1602,1),(1603,1),(1604,1),(1605,1),(1606,1),(1607,1),(1608,1),(1609,1),(1610,1),(1611,1),(1612,1),(1613,1),(1614,1),(1615,1),(1616,1),(1617,1),(1618,1),(1619,1),(1620,1),(1621,1),(1622,1),(1623,1),(1624,1),(1625,1),(1626,1),(1627,1),(1628,1),(1629,1),(1630,1),(1631,1),(1632,1),(1633,1),(1634,1),(1635,1),(1636,1),(1637,1),(1638,1),(1639,1),(1640,1),(1641,1),(1642,1),(1643,1),(1644,1),(1645,1),(1646,1),(1647,1),(1648,1),(1649,1),(1650,1),(1651,1),(1652,1),(1653,1),(1654,1),(1655,1),(1656,1),(1657,1),(1658,1),(1659,1),(1660,1),(1661,1),(1662,1),(1663,1),(1664,1),(1665,1),(1666,1),(1667,1),(1668,1),(1669,1),(1670,1),(1671,1),(1672,1),(1673,1),(1674,1),(1675,1),(1676,1),(1677,1),(1678,1),(1679,1),(1680,1),(1681,1),(1682,1),(1683,1),(1684,1),(1685,1),(1686,1),(1687,1),(1688,1),(1689,1),(1690,1),(1691,1),(1692,1),(1693,1),(1694,1),(1695,1),(1696,1),(1697,1),(1698,1),(1699,1),(1700,1),(1701,1),(1702,1),(1703,1),(1704,1),(1705,1),(1706,1),(1707,1),(1708,1),(1709,1),(1710,1),(1711,1),(1712,1),(1713,1),(1714,1),(1715,1),(1716,1),(1717,1),(1718,1),(1719,1),(1720,1),(1721,1),(1722,1),(1723,1),(1724,1),(1725,1),(1726,1),(1727,1),(1728,1),(1729,1),(1730,1),(1731,1),(1732,1),(1733,1),(1734,1),(1735,1),(1736,1),(1737,1),(1738,1),(1739,1),(1740,1),(1741,1),(1742,1),(1743,1),(1744,1),(1745,1),(1746,1),(1747,1),(1748,1),(1749,1),(1750,1),(1751,1),(1752,1),(1753,1),(1754,1),(1755,1),(1756,1),(1757,1),(1758,1),(1759,1),(1760,1),(1761,1),(1762,1),(1763,1),(1764,1),(1765,1),(1766,1),(1767,1),(1768,1),(1769,1),(1770,1),(1771,1),(1772,1),(1773,1),(1774,1),(1775,1),(1776,1),(1777,1),(1778,1),(1779,1),(1780,1),(1781,1),(1782,1),(1783,1),(1784,1),(1785,1),(1786,1),(1787,1),(1788,1),(1789,1),(1790,1),(1791,1),(1792,1),(1793,1),(1794,1),(1795,1),(1796,1),(1797,1),(1798,1),(1799,1),(1800,1),(1801,1),(1802,1),(1803,1),(1804,1),(1805,1),(1806,1),(1807,1),(1808,1),(1809,1),(1810,1),(1811,1),(1812,1),(1813,1),(1814,1),(1815,1),(1816,1),(1817,1),(1818,1),(1819,1),(1820,1),(1821,1),(1822,1),(1823,1),(1824,1),(1825,1),(1826,1),(1827,1),(1828,1),(1829,1),(1830,1),(1831,1),(1832,1),(1833,1),(1834,1),(1835,1),(1836,1),(1837,1),(1838,1),(1839,1),(1840,1),(1841,1),(1842,1),(1843,1),(1844,1),(1845,1),(1846,1),(1847,1),(1848,1),(1849,1),(1850,1),(1851,1),(1852,1),(1853,1),(1854,1),(1855,1),(1856,1),(1857,1),(1858,1),(1859,1),(1860,1),(1861,1),(1862,1),(1863,1),(1864,1),(1865,1),(1866,1),(1867,1),(1868,1),(1869,1),(1870,1),(1871,1),(1872,1),(1873,1),(1874,1),(1875,1),(1876,1),(1877,1),(1878,1),(1879,1),(1880,1),(1881,1),(1882,1),(1883,1),(1884,1),(1885,1),(1886,1),(1887,1),(1888,1),(1889,1),(1890,1),(1891,1),(1892,1),(1893,1),(1894,1),(1895,1),(1896,1),(1897,1),(1898,1),(1899,1),(1900,1),(1901,1),(1902,1),(1903,1),(1904,1),(1905,1),(1906,1),(1907,1),(1908,1),(1909,1),(1910,1),(1911,1),(1912,1),(1913,1),(1914,1),(1915,1),(1916,1),(1917,1),(1918,1),(1919,1),(1920,1),(1921,1),(1922,1),(1923,1),(1924,1),(1925,1),(1926,1),(1927,1),(1928,1),(1929,1),(1930,1),(1931,1),(1932,1),(1933,1),(1934,1),(1935,1),(1936,1),(1937,1),(1938,1),(1939,1),(1940,1),(1941,1),(1942,1),(1943,1),(1944,1),(1945,1),(1946,1),(1947,1),(1948,1),(1949,1),(1950,1),(1951,1),(1952,1),(1953,1),(1954,1),(1955,1),(1956,1),(1957,1),(1958,1),(1959,1),(1960,1),(1961,1),(1962,1),(1963,1),(1964,1),(1965,1),(1966,1),(1967,1),(1968,1),(1969,1),(1970,1),(1971,1),(1972,1),(1973,1),(1974,1),(1975,1),(1976,1),(1977,1),(1978,1),(1979,1),(1980,1),(1981,1),(1982,1),(1983,1),(1984,1),(1985,1),(1986,1),(1987,1),(1988,1),(1989,1),(1990,1),(1991,1),(1992,1),(1993,1),(1994,1),(1995,1),(1996,1),(1997,1),(1998,1),(1999,1),(2000,1),(2001,1),(2002,1),(2003,1),(2004,1),(2005,1),(2006,1),(2007,1),(2008,1),(2009,1),(2010,1),(2011,1),(2012,1),(2013,1),(2014,1),(2015,1),(2016,1),(2017,1),(2018,1),(2019,1),(2020,1),(2021,1),(2022,1),(2023,1),(2024,1),(2025,1),(2026,1),(2027,1),(2028,1),(2029,1),(2030,1),(2031,1),(2032,1),(2033,1),(2034,1),(2035,1),(2036,1),(2037,1),(2038,1),(2039,1),(2040,1),(2041,1),(2042,1),(2043,1),(2044,1),(2045,1),(2046,1),(2047,1) \ No newline at end of file diff --git a/tests/queries/0_stateless/01633_limit_fuzz.sql b/tests/queries/0_stateless/01633_limit_fuzz.sql new file mode 100644 index 00000000000..3e11513e1c4 --- /dev/null +++ b/tests/queries/0_stateless/01633_limit_fuzz.sql @@ -0,0 +1 @@ +SELECT number, 1 AS k FROM numbers(100000) ORDER BY k, number LIMIT 1025, 1023 FORMAT Values; diff --git a/tests/queries/0_stateless/01634_sum_map_nulls.reference b/tests/queries/0_stateless/01634_sum_map_nulls.reference new file mode 100644 index 00000000000..cba3db5eb23 --- /dev/null +++ b/tests/queries/0_stateless/01634_sum_map_nulls.reference @@ -0,0 +1,4 @@ +([1,2],[2,1],[9,0]) +([1,2],[2,1],[-1,0]) +([1,2],[2,1],[9,10]) +([1,2],[2,1],[-1,10]) diff --git a/tests/queries/0_stateless/01634_sum_map_nulls.sql b/tests/queries/0_stateless/01634_sum_map_nulls.sql new file mode 100644 index 00000000000..a0b892f9803 --- /dev/null +++ b/tests/queries/0_stateless/01634_sum_map_nulls.sql @@ -0,0 +1,5 @@ +SELECT initializeAggregation('sumMap', [1, 2, 1], [1, 1, 1], [-1, null, 10]); +SELECT initializeAggregation('sumMap', [1, 2, 1], [1, 1, 1], [-1, null, null]); +SELECT initializeAggregation('sumMap', [1, 2, 1], [1, 1, 1], [null, null, null]); -- { serverError 43 } +SELECT initializeAggregation('sumMap', [1, 2, 1], [1, 1, 1], [-1, 10, 10]); +SELECT initializeAggregation('sumMap', [1, 2, 1], [1, 1, 1], [-1, 10, null]); diff --git a/tests/queries/0_stateless/01634_uuid_fuzz.reference b/tests/queries/0_stateless/01634_uuid_fuzz.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01634_uuid_fuzz.sql b/tests/queries/0_stateless/01634_uuid_fuzz.sql new file mode 100644 index 00000000000..62ca209f6f3 --- /dev/null +++ b/tests/queries/0_stateless/01634_uuid_fuzz.sql @@ -0,0 +1 @@ +SELECT toUUID(-1.1); -- { serverError 48 } diff --git a/tests/queries/0_stateless/01635_nullable_fuzz.reference b/tests/queries/0_stateless/01635_nullable_fuzz.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01635_nullable_fuzz.sql b/tests/queries/0_stateless/01635_nullable_fuzz.sql new file mode 100644 index 00000000000..c134578b221 --- /dev/null +++ b/tests/queries/0_stateless/01635_nullable_fuzz.sql @@ -0,0 +1,21 @@ +SELECT + 'Nul\0able\0String)Nul\0\0ble(String)Nul\0able(String)Nul\0able(String)', + NULL AND 2, + '', + number, + NULL AS k +FROM +( + SELECT + materialize(NULL) OR materialize(-9223372036854775808), + number + FROM system.numbers + LIMIT 1000000 +) +ORDER BY + k ASC, + number ASC, + k ASC +LIMIT 1023, 1023 +SETTINGS max_bytes_before_external_sort = 1000000 +FORMAT Null; diff --git a/tests/queries/0_stateless/01635_sum_map_fuzz.reference b/tests/queries/0_stateless/01635_sum_map_fuzz.reference new file mode 100644 index 00000000000..1aeb897d4d0 --- /dev/null +++ b/tests/queries/0_stateless/01635_sum_map_fuzz.reference @@ -0,0 +1 @@ +([1,2],[1,2],[1,0]) diff --git a/tests/queries/0_stateless/01635_sum_map_fuzz.sql b/tests/queries/0_stateless/01635_sum_map_fuzz.sql new file mode 100644 index 00000000000..0749e6e6be6 --- /dev/null +++ b/tests/queries/0_stateless/01635_sum_map_fuzz.sql @@ -0,0 +1,6 @@ +SELECT finalizeAggregation(*) FROM (select initializeAggregation('sumMapState', [1, 2], [1, 2], [1, null])); + +DROP TABLE IF EXISTS sum_map_overflow; +CREATE TABLE sum_map_overflow(events Array(UInt8), counts Array(UInt8)) ENGINE = Log; +SELECT [NULL], sumMapWithOverflow(events, [NULL], [[(NULL)]], counts) FROM sum_map_overflow; -- { serverError 43 } +DROP TABLE sum_map_overflow; diff --git a/tests/queries/0_stateless/01636_nullable_fuzz2.reference b/tests/queries/0_stateless/01636_nullable_fuzz2.reference new file mode 100644 index 00000000000..b26b82b1cb9 --- /dev/null +++ b/tests/queries/0_stateless/01636_nullable_fuzz2.reference @@ -0,0 +1,6 @@ +2020-07-09 +2020-07-10 +2020-07-11 +2020-07-12 + +1970-01-01 diff --git a/tests/queries/0_stateless/01636_nullable_fuzz2.sql b/tests/queries/0_stateless/01636_nullable_fuzz2.sql new file mode 100644 index 00000000000..a40da51c38c --- /dev/null +++ b/tests/queries/0_stateless/01636_nullable_fuzz2.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS open_events_tmp; +DROP TABLE IF EXISTS tracking_events_tmp; + +CREATE TABLE open_events_tmp (`APIKey` UInt32, `EventDate` Date) ENGINE = MergeTree PARTITION BY toMonday(EventDate) ORDER BY (APIKey, EventDate); +CREATE TABLE tracking_events_tmp (`APIKey` UInt32, `EventDate` Date) ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (APIKey, EventDate); + +insert into open_events_tmp select 2, '2020-07-10' from numbers(32); +insert into open_events_tmp select 2, '2020-07-11' from numbers(31); +insert into open_events_tmp select 2, '2020-07-12' from numbers(30); + +insert into tracking_events_tmp select 2, '2020-07-09' from numbers(1555); +insert into tracking_events_tmp select 2, '2020-07-10' from numbers(1881); +insert into tracking_events_tmp select 2, '2020-07-11' from numbers(1623); + +SELECT EventDate +FROM +( + SELECT EventDate + FROM tracking_events_tmp AS t1 + WHERE (EventDate >= toDate('2020-07-09')) AND (EventDate <= toDate('2020-07-11')) AND (APIKey = 2) + GROUP BY EventDate +) +FULL OUTER JOIN +( + SELECT EventDate + FROM remote('127.0.0.{1,3}', currentDatabase(), open_events_tmp) AS t2 + WHERE (EventDate <= toDate('2020-07-12')) AND (APIKey = 2) + GROUP BY EventDate + WITH TOTALS +) USING (EventDate) +ORDER BY EventDate ASC +SETTINGS totals_mode = 'after_having_auto', group_by_overflow_mode = 'any', max_rows_to_group_by = 10000000, joined_subquery_requires_alias = 0; + +DROP TABLE open_events_tmp; +DROP TABLE tracking_events_tmp; diff --git a/tests/queries/0_stateless/01637_nullable_fuzz3.reference b/tests/queries/0_stateless/01637_nullable_fuzz3.reference new file mode 100644 index 00000000000..d9cf16b9d2a --- /dev/null +++ b/tests/queries/0_stateless/01637_nullable_fuzz3.reference @@ -0,0 +1,2 @@ + +0 diff --git a/tests/queries/0_stateless/01637_nullable_fuzz3.sql b/tests/queries/0_stateless/01637_nullable_fuzz3.sql new file mode 100644 index 00000000000..21bf8999eae --- /dev/null +++ b/tests/queries/0_stateless/01637_nullable_fuzz3.sql @@ -0,0 +1,4 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (`item_id` UInt64, `price_sold` Float32, `date` Date) ENGINE = MergeTree ORDER BY item_id; +SELECT item_id FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS l FULL OUTER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) AS r USING (item_id); +DROP TABLE t; diff --git a/tests/queries/0_stateless/01638_div_mod_ambiguities.reference b/tests/queries/0_stateless/01638_div_mod_ambiguities.reference new file mode 100644 index 00000000000..85d05c8624a --- /dev/null +++ b/tests/queries/0_stateless/01638_div_mod_ambiguities.reference @@ -0,0 +1,16 @@ +MOD +1 +MOD +1 +modulo(DIV, 1) +0 +DIV +1 +mod +1 +MOD +1 +modulo(DIV, 1) +0 +div +1 diff --git a/tests/queries/0_stateless/01638_div_mod_ambiguities.sql b/tests/queries/0_stateless/01638_div_mod_ambiguities.sql new file mode 100644 index 00000000000..5c011e7e99d --- /dev/null +++ b/tests/queries/0_stateless/01638_div_mod_ambiguities.sql @@ -0,0 +1,9 @@ +SELECT DIV AS MOD FROM (SELECT 1 `DIV`) FORMAT TSVWithNames; +SELECT DIV `MOD` FROM (SELECT 1 `DIV`) FORMAT TSVWithNames; +SELECT DIV MOD 1 FROM (SELECT 1 `DIV`) FORMAT TSVWithNames; +SELECT 1 DIV `MOD` `DIV` FROM (SELECT 1 `MOD`) FORMAT TSVWithNames; + +SELECT DIV AS mod FROM (SELECT 1 `DIV`) FORMAT TSVWithNames; +SELECT div `MOD` FROM (SELECT 1 `div`) FORMAT TSVWithNames; +SELECT DIV mod 1 FROM (SELECT 1 `DIV`) FORMAT TSVWithNames; +SELECT 1 div `mod` `div` FROM (SELECT 1 `mod`) FORMAT TSVWithNames; diff --git a/tests/queries/0_stateless/01639_distributed_sync_insert_zero_rows.reference b/tests/queries/0_stateless/01639_distributed_sync_insert_zero_rows.reference new file mode 100644 index 00000000000..ad86be865c1 --- /dev/null +++ b/tests/queries/0_stateless/01639_distributed_sync_insert_zero_rows.reference @@ -0,0 +1,6 @@ +128 +256 +128 +256 +128 +256 diff --git a/tests/queries/0_stateless/01639_distributed_sync_insert_zero_rows.sql b/tests/queries/0_stateless/01639_distributed_sync_insert_zero_rows.sql new file mode 100644 index 00000000000..7fb81efaac8 --- /dev/null +++ b/tests/queries/0_stateless/01639_distributed_sync_insert_zero_rows.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS local; +DROP TABLE IF EXISTS distributed; + +CREATE TABLE local (x UInt8) ENGINE = Memory; +CREATE TABLE distributed AS local ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), local, x); + +SET insert_distributed_sync = 1; + +INSERT INTO distributed SELECT number FROM numbers(256) WHERE number % 2 = 0; +SELECT count() FROM local; +SELECT count() FROM distributed; + +TRUNCATE TABLE local; +INSERT INTO distributed SELECT number FROM numbers(256) WHERE number % 2 = 1; +SELECT count() FROM local; +SELECT count() FROM distributed; + +TRUNCATE TABLE local; +INSERT INTO distributed SELECT number FROM numbers(256) WHERE number < 128; +SELECT count() FROM local; +SELECT count() FROM distributed; + +DROP TABLE local; +DROP TABLE distributed; diff --git a/tests/queries/0_stateless/01640_distributed_async_insert_compression.reference b/tests/queries/0_stateless/01640_distributed_async_insert_compression.reference new file mode 100644 index 00000000000..fef47999e2e --- /dev/null +++ b/tests/queries/0_stateless/01640_distributed_async_insert_compression.reference @@ -0,0 +1,2 @@ +256 +512 diff --git a/tests/queries/0_stateless/01640_distributed_async_insert_compression.sql b/tests/queries/0_stateless/01640_distributed_async_insert_compression.sql new file mode 100644 index 00000000000..b0a674b8389 --- /dev/null +++ b/tests/queries/0_stateless/01640_distributed_async_insert_compression.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS local; +DROP TABLE IF EXISTS distributed; + +CREATE TABLE local (x UInt8) ENGINE = Memory; +CREATE TABLE distributed AS local ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), local, x); + +SET insert_distributed_sync = 0, network_compression_method = 'zstd'; + +INSERT INTO distributed SELECT number FROM numbers(256); +SYSTEM FLUSH DISTRIBUTED distributed; + +SELECT count() FROM local; +SELECT count() FROM distributed; + +DROP TABLE local; +DROP TABLE distributed; diff --git a/tests/queries/0_stateless/01640_marks_corruption_regression.reference b/tests/queries/0_stateless/01640_marks_corruption_regression.reference new file mode 100644 index 00000000000..a71b69fef89 --- /dev/null +++ b/tests/queries/0_stateless/01640_marks_corruption_regression.reference @@ -0,0 +1,4 @@ +"marks",7 +"optimize_trivial_count_query",16384 +"max_threads=1",16384 +"max_threads=100",16384 diff --git a/tests/queries/0_stateless/01640_marks_corruption_regression.sql b/tests/queries/0_stateless/01640_marks_corruption_regression.sql new file mode 100644 index 00000000000..7ccd8741dda --- /dev/null +++ b/tests/queries/0_stateless/01640_marks_corruption_regression.sql @@ -0,0 +1,57 @@ +DROP TABLE IF EXISTS adaptive_table; + +CREATE TABLE adaptive_table( + key UInt64, + value String +) ENGINE MergeTree() +ORDER BY key +SETTINGS + index_granularity_bytes=1048576, + min_bytes_for_wide_part=0, + old_parts_lifetime=0, + index_granularity=8192 +; + +-- This triggers adjustment of the granules that was introduced in PR#17120 +INSERT INTO adaptive_table SELECT number, randomPrintableASCII(if(number BETWEEN 8192-30 AND 8192, 102400, 1)) FROM system.numbers LIMIT 16384; +-- This creates the following marks: +-- +-- $ check-marks /path/to/db/adaptive_table/all_*/key.{mrk2,bin} +-- Mark 0, points to 0, 0, has rows after 8192, decompressed size 72808.