Merge remote-tracking branch 'upstream/master' into HEAD

This commit is contained in:
Anton Popov 2021-01-11 13:51:12 +03:00
commit 36ae0e4d35
367 changed files with 4978 additions and 2552 deletions

View File

@ -1,6 +1,6 @@
---
name: Question
about: Ask question about ClickHouse
about: Ask a question about ClickHouse
title: ''
labels: question
assignees: ''

View File

@ -1,6 +1,6 @@
---
name: Unexpected behaviour
about: Create a report to help us improve ClickHouse
about: Some feature is working in non-obvious way
title: ''
labels: unexpected behaviour
assignees: ''

View File

@ -0,0 +1,30 @@
---
name: Incomplete implementation
about: Implementation of existing feature is not finished
title: ''
labels: unfinished code
assignees: ''
---
(you don't have to strictly follow this form)
**Describe the unexpected behaviour**
A clear and concise description of what works not as it is supposed to.
**How to reproduce**
* Which ClickHouse server version to use
* Which interface to use, if matters
* Non-default settings, if any
* `CREATE TABLE` statements for all tables involved
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Queries to run that lead to unexpected result
**Expected behavior**
A clear and concise description of what you expected to happen.
**Error message and/or stacktrace**
If applicable, add screenshots to help explain your problem.
**Additional context**
Add any other context about the problem here.

View File

@ -1,6 +1,6 @@
---
name: Usability issue
about: Create a report to help us improve ClickHouse
about: Report something can be made more convenient to use
title: ''
labels: usability
assignees: ''

View File

@ -1,6 +1,6 @@
---
name: Backward compatibility issue
about: Create a report to help us improve ClickHouse
about: Report the case when the behaviour of a new version can break existing use cases
title: ''
labels: backward compatibility
assignees: ''

View File

@ -0,0 +1,16 @@
---
name: Assertion found via fuzzing
about: Potential issue has been found via Fuzzer or Stress tests
title: ''
labels: fuzz
assignees: ''
---
(you don't have to strictly follow this form)
**Describe the bug**
A link to the report
**How to reproduce**
Try to reproduce the report and copy the tables and queries involved.

View File

@ -214,6 +214,19 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
endif ()
endif()
# Create BuildID when using lld. For other linkers it is created by default.
if (LINKER_NAME MATCHES "lld$")
# SHA1 is not cryptographically secure but it is the best what lld is offering.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1")
endif ()
# Add a section with the hash of the compiled machine code for integrity checks.
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
set (USE_BINARY_HASH 1)
endif ()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd

View File

@ -12,7 +12,7 @@
# https://youtrack.jetbrains.com/issue/CPP-2659
# https://youtrack.jetbrains.com/issue/CPP-870
if (NOT DEFINED ENV{CLION_IDE})
if (NOT DEFINED ENV{CLION_IDE} AND NOT DEFINED ENV{XCODE_IDE})
find_program(NINJA_PATH ninja)
if (NINJA_PATH)
set(CMAKE_GENERATOR "Ninja" CACHE INTERNAL "" FORCE)

View File

@ -56,6 +56,9 @@
#include <Common/Config/ConfigProcessor.h>
#include <Common/MemorySanitizer.h>
#include <Common/SymbolIndex.h>
#include <Common/getExecutablePath.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/Elf.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
@ -80,16 +83,6 @@ static void call_default_signal_handler(int sig)
raise(sig);
}
const char * msan_strsignal(int sig)
{
// Apparently strsignal is not instrumented by MemorySanitizer, so we
// have to unpoison it to avoid msan reports inside fmt library when we
// print it.
const char * signal_name = strsignal(sig);
__msan_unpoison_string(signal_name);
return signal_name;
}
static constexpr size_t max_query_id_size = 127;
static const size_t signal_pipe_buf_size =
@ -294,13 +287,13 @@ private:
{
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, msan_strsignal(sig), sig);
thread_num, strsignal(sig), sig);
}
else
{
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, query_id, msan_strsignal(sig), sig);
thread_num, query_id, strsignal(sig), sig);
}
String error_message;
@ -328,6 +321,32 @@ private:
/// Write symbolized stack trace line by line for better grep-ability.
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
#if defined(__linux__)
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (daemon.stored_binary_hash.empty())
{
LOG_FATAL(log, "Calculated checksum of the binary: {}."
" There is no information about the reference checksum.", calculated_binary_hash);
}
else if (calculated_binary_hash == daemon.stored_binary_hash)
{
LOG_FATAL(log, "Checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
}
else
{
LOG_FATAL(log, "Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file was changed just after startup;"
" - the file is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
" - the file was intentionally modified;"
" - logical error in code."
, calculated_binary_hash, daemon.stored_binary_hash);
}
#endif
/// Write crash to system.crash_log table if available.
if (collectCrashLog)
collectCrashLog(sig, thread_num, query_id, stack_trace);
@ -481,8 +500,9 @@ void BaseDaemon::kill()
{
dumpCoverageReportIfPossible();
pid_file.reset();
if (::raise(SIGKILL) != 0)
throw Poco::SystemException("cannot kill process");
/// Exit with the same code as it is usually set by shell when process is terminated by SIGKILL.
/// It's better than doing 'raise' or 'kill', because they have no effect for 'init' process (with pid = 0, usually in Docker).
_exit(128 + SIGKILL);
}
std::string BaseDaemon::getDefaultCorePath() const
@ -787,6 +807,13 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
#else
build_id_info = "no build id";
#endif
#if defined(__linux__)
std::string executable_path = getExecutablePath();
if (!executable_path.empty())
stored_binary_hash = DB::Elf(executable_path).getBinaryHash();
#endif
}
void BaseDaemon::logRevision() const
@ -846,13 +873,13 @@ void BaseDaemon::handleSignal(int signal_id)
onInterruptSignals(signal_id);
}
else
throw DB::Exception(std::string("Unsupported signal: ") + msan_strsignal(signal_id), 0);
throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0);
}
void BaseDaemon::onInterruptSignals(int signal_id)
{
is_cancelled = true;
LOG_INFO(&logger(), "Received termination signal ({})", msan_strsignal(signal_id));
LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id));
if (sigint_signals_counter >= 2)
{
@ -998,3 +1025,9 @@ void BaseDaemon::setupWatchdog()
#endif
}
}
String BaseDaemon::getStoredBinaryHash() const
{
return stored_binary_hash;
}

View File

@ -60,7 +60,7 @@ public:
static void terminate();
/// Forceful shutdown
void kill();
[[noreturn]] void kill();
/// Cancellation request has been received.
bool isCancelled() const
@ -121,6 +121,9 @@ public:
/// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly).
void shouldSetupWatchdog(char * argv0_);
/// Hash of the binary for integrity checks.
String getStoredBinaryHash() const;
protected:
virtual void logRevision() const;
@ -168,6 +171,7 @@ protected:
Poco::Util::AbstractConfiguration * last_configuration = nullptr;
String build_id_info;
String stored_binary_hash;
std::vector<int> handled_signals;

View File

@ -258,11 +258,3 @@ double lgamma_r(double x, int *signgamp)
r = nadj - r;
return r;
}
int signgam;
double lgamma(double x)
{
return lgamma_r(x, &signgam);
}

View File

@ -328,12 +328,3 @@ long double lgammal_r(long double x, int *sg)
return lgamma_r(x, sg);
}
#endif
int signgam_lgammal;
long double lgammal(long double x)
{
return lgammal_r(x, &signgam_lgammal);
}

View File

@ -0,0 +1,125 @@
#include <signal.h>
#include <string.h>
#if (SIGHUP == 1) && (SIGINT == 2) && (SIGQUIT == 3) && (SIGILL == 4) \
&& (SIGTRAP == 5) && (SIGABRT == 6) && (SIGBUS == 7) && (SIGFPE == 8) \
&& (SIGKILL == 9) && (SIGUSR1 == 10) && (SIGSEGV == 11) && (SIGUSR2 == 12) \
&& (SIGPIPE == 13) && (SIGALRM == 14) && (SIGTERM == 15) && (SIGSTKFLT == 16) \
&& (SIGCHLD == 17) && (SIGCONT == 18) && (SIGSTOP == 19) && (SIGTSTP == 20) \
&& (SIGTTIN == 21) && (SIGTTOU == 22) && (SIGURG == 23) && (SIGXCPU == 24) \
&& (SIGXFSZ == 25) && (SIGVTALRM == 26) && (SIGPROF == 27) && (SIGWINCH == 28) \
&& (SIGPOLL == 29) && (SIGPWR == 30) && (SIGSYS == 31)
#define sigmap(x) x
#else
static const char map[] = {
[SIGHUP] = 1,
[SIGINT] = 2,
[SIGQUIT] = 3,
[SIGILL] = 4,
[SIGTRAP] = 5,
[SIGABRT] = 6,
[SIGBUS] = 7,
[SIGFPE] = 8,
[SIGKILL] = 9,
[SIGUSR1] = 10,
[SIGSEGV] = 11,
[SIGUSR2] = 12,
[SIGPIPE] = 13,
[SIGALRM] = 14,
[SIGTERM] = 15,
#if defined(SIGSTKFLT)
[SIGSTKFLT] = 16,
#elif defined(SIGEMT)
[SIGEMT] = 16,
#endif
[SIGCHLD] = 17,
[SIGCONT] = 18,
[SIGSTOP] = 19,
[SIGTSTP] = 20,
[SIGTTIN] = 21,
[SIGTTOU] = 22,
[SIGURG] = 23,
[SIGXCPU] = 24,
[SIGXFSZ] = 25,
[SIGVTALRM] = 26,
[SIGPROF] = 27,
[SIGWINCH] = 28,
[SIGPOLL] = 29,
[SIGPWR] = 30,
[SIGSYS] = 31
};
#define sigmap(x) ((x) >= sizeof map ? (x) : map[(x)])
#endif
static const char strings[] =
"Unknown signal\0"
"Hangup\0"
"Interrupt\0"
"Quit\0"
"Illegal instruction\0"
"Trace/breakpoint trap\0"
"Aborted\0"
"Bus error\0"
"Arithmetic exception\0"
"Killed\0"
"User defined signal 1\0"
"Segmentation fault\0"
"User defined signal 2\0"
"Broken pipe\0"
"Alarm clock\0"
"Terminated\0"
#if defined(SIGSTKFLT)
"Stack fault\0"
#elif defined(SIGEMT)
"Emulator trap\0"
#else
"Unknown signal\0"
#endif
"Child process status\0"
"Continued\0"
"Stopped (signal)\0"
"Stopped\0"
"Stopped (tty input)\0"
"Stopped (tty output)\0"
"Urgent I/O condition\0"
"CPU time limit exceeded\0"
"File size limit exceeded\0"
"Virtual timer expired\0"
"Profiling timer expired\0"
"Window changed\0"
"I/O possible\0"
"Power failure\0"
"Bad system call\0"
"RT32"
"\0RT33\0RT34\0RT35\0RT36\0RT37\0RT38\0RT39\0RT40"
"\0RT41\0RT42\0RT43\0RT44\0RT45\0RT46\0RT47\0RT48"
"\0RT49\0RT50\0RT51\0RT52\0RT53\0RT54\0RT55\0RT56"
"\0RT57\0RT58\0RT59\0RT60\0RT61\0RT62\0RT63\0RT64"
#if _NSIG > 65
"\0RT65\0RT66\0RT67\0RT68\0RT69\0RT70\0RT71\0RT72"
"\0RT73\0RT74\0RT75\0RT76\0RT77\0RT78\0RT79\0RT80"
"\0RT81\0RT82\0RT83\0RT84\0RT85\0RT86\0RT87\0RT88"
"\0RT89\0RT90\0RT91\0RT92\0RT93\0RT94\0RT95\0RT96"
"\0RT97\0RT98\0RT99\0RT100\0RT101\0RT102\0RT103\0RT104"
"\0RT105\0RT106\0RT107\0RT108\0RT109\0RT110\0RT111\0RT112"
"\0RT113\0RT114\0RT115\0RT116\0RT117\0RT118\0RT119\0RT120"
"\0RT121\0RT122\0RT123\0RT124\0RT125\0RT126\0RT127\0RT128"
#endif
"";
char *strsignal(int signum)
{
const char *s = strings;
signum = sigmap(signum);
if (signum - 1U >= _NSIG-1) signum = 0;
for (; signum--; s++) for (; *s; s++);
return (char *)s;
}

View File

@ -0,0 +1,2 @@
add_library(harmful harmful.c)
install(TARGETS harmful EXPORT global ARCHIVE DESTINATION lib)

1
base/harmful/README.md Normal file
View File

@ -0,0 +1 @@
A library that traps whenever harmful functions from libc are called.

244
base/harmful/harmful.c Normal file
View File

@ -0,0 +1,244 @@
/** This library provides runtime instrumentation (hardening)
* that ensures no "harmful" functions from libc are called
* (by terminating the program immediately).
*/
/// It is only enabled in debug build (its intended use is for CI checks).
#if !defined(NDEBUG)
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wincompatible-library-redeclaration"
#else
#pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch"
#endif
/// We cannot use libc headers here.
long write(int, const void *, unsigned long);
#define TRAP(func) void func() { write(2, #func "\n", __builtin_strlen(#func) + 1); __builtin_trap(); }
/// Trap all non thread-safe functions:
/// nm -D /lib/x86_64-linux-gnu/{libc.so.6,libdl.so.2,libm.so.6,libpthread.so.0,librt.so.1,libnss_dns.so.2,libresolv.so.2} | grep -P '_r@?$' | awk '{ print $3 }' | sed -r -e 's/_r//' | grep -vP '^_'
/// See also https://reviews.llvm.org/D90944
/// You can edit this list and even comment out some functions.
/// The only purpose of the library is to force you to pay attention.
TRAP(argp_error)
TRAP(argp_help)
TRAP(argp_parse)
TRAP(argp_state_help)
TRAP(argp_usage)
TRAP(asctime)
TRAP(clearenv)
TRAP(crypt)
TRAP(ctime)
TRAP(cuserid)
TRAP(drand48)
TRAP(ecvt)
TRAP(encrypt)
TRAP(endfsent)
TRAP(endgrent)
TRAP(endhostent)
TRAP(endnetent)
TRAP(endnetgrent)
TRAP(endprotoent)
TRAP(endpwent)
TRAP(endservent)
TRAP(endutent)
TRAP(endutxent)
TRAP(erand48)
TRAP(error_at_line)
///TRAP(exit)
TRAP(fcloseall)
TRAP(fcvt)
TRAP(fgetgrent)
TRAP(fgetpwent)
TRAP(gammal)
TRAP(getchar_unlocked)
TRAP(getdate)
TRAP(getfsent)
TRAP(getfsfile)
TRAP(getfsspec)
TRAP(getgrent)
TRAP(getgrent_r)
TRAP(getgrgid)
TRAP(getgrnam)
TRAP(gethostbyaddr)
TRAP(gethostbyname)
TRAP(gethostbyname2)
TRAP(gethostent)
TRAP(getlogin)
TRAP(getmntent)
TRAP(getnetbyaddr)
TRAP(getnetbyname)
TRAP(getnetent)
TRAP(getnetgrent)
TRAP(getnetgrent_r)
TRAP(getopt)
TRAP(getopt_long)
TRAP(getopt_long_only)
TRAP(getpass)
TRAP(getprotobyname)
TRAP(getprotobynumber)
TRAP(getprotoent)
TRAP(getpwent)
TRAP(getpwent_r)
TRAP(getpwnam)
TRAP(getpwuid)
TRAP(getservbyname)
TRAP(getservbyport)
TRAP(getservent)
TRAP(getutent)
TRAP(getutent_r)
TRAP(getutid)
TRAP(getutid_r)
TRAP(getutline)
TRAP(getutline_r)
TRAP(getutxent)
TRAP(getutxid)
TRAP(getutxline)
TRAP(getwchar_unlocked)
//TRAP(glob)
//TRAP(glob64)
TRAP(gmtime)
TRAP(hcreate)
TRAP(hdestroy)
TRAP(hsearch)
TRAP(innetgr)
TRAP(jrand48)
TRAP(l64a)
TRAP(lcong48)
TRAP(lgammafNx)
TRAP(localeconv)
TRAP(localtime)
TRAP(login)
TRAP(login_tty)
TRAP(logout)
TRAP(logwtmp)
TRAP(lrand48)
TRAP(mallinfo)
TRAP(mallopt)
TRAP(mblen)
TRAP(mbrlen)
TRAP(mbrtowc)
TRAP(mbsnrtowcs)
TRAP(mbsrtowcs)
//TRAP(mbtowc) // Used by Standard C++ library
TRAP(mcheck)
TRAP(mprobe)
TRAP(mrand48)
TRAP(mtrace)
TRAP(muntrace)
TRAP(nrand48)
TRAP(__ppc_get_timebase_freq)
TRAP(ptsname)
TRAP(putchar_unlocked)
TRAP(putenv)
TRAP(pututline)
TRAP(pututxline)
TRAP(putwchar_unlocked)
TRAP(qecvt)
TRAP(qfcvt)
TRAP(register_printf_function)
TRAP(seed48)
//TRAP(setenv)
TRAP(setfsent)
TRAP(setgrent)
TRAP(sethostent)
TRAP(sethostid)
TRAP(setkey)
//TRAP(setlocale) // Used by replxx at startup
TRAP(setlogmask)
TRAP(setnetent)
TRAP(setnetgrent)
TRAP(setprotoent)
TRAP(setpwent)
TRAP(setservent)
TRAP(setutent)
TRAP(setutxent)
TRAP(siginterrupt)
TRAP(sigpause)
//TRAP(sigprocmask)
TRAP(sigsuspend)
TRAP(sleep)
TRAP(srand48)
//TRAP(strerror) // Used by RocksDB and many other libraries, unfortunately.
//TRAP(strsignal) // This function is imported from Musl and is thread safe.
TRAP(strtok)
TRAP(tcflow)
TRAP(tcsendbreak)
TRAP(tmpnam)
TRAP(ttyname)
TRAP(unsetenv)
TRAP(updwtmp)
TRAP(utmpname)
TRAP(utmpxname)
//TRAP(valloc)
TRAP(vlimit)
//TRAP(wcrtomb) // Used by Standard C++ library
TRAP(wcsnrtombs)
TRAP(wcsrtombs)
TRAP(wctomb)
TRAP(wordexp)
TRAP(basename)
TRAP(catgets)
TRAP(dbm_clearerr)
TRAP(dbm_close)
TRAP(dbm_delete)
TRAP(dbm_error)
TRAP(dbm_fetch)
TRAP(dbm_firstkey)
TRAP(dbm_nextkey)
TRAP(dbm_open)
TRAP(dbm_store)
TRAP(dirname)
TRAP(dlerror)
TRAP(ftw)
TRAP(getc_unlocked)
//TRAP(getenv) // Ok at program startup
TRAP(inet_ntoa)
TRAP(lgamma)
TRAP(lgammaf)
TRAP(lgammal)
TRAP(nftw)
TRAP(nl_langinfo)
TRAP(putc_unlocked)
TRAP(rand)
/** In the current POSIX.1 specification (POSIX.1-2008), readdir() is not required to be thread-safe. However, in modern
* implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams
* are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external
* synchronization is still preferable to the use of the deprecated readdir_r(3) function. It is expected that a future
* version of POSIX.1 will require that readdir() be thread-safe when concurrently employed on different directory streams.
* - man readdir
*/
//TRAP(readdir)
TRAP(system)
TRAP(wcstombs)
TRAP(ether_aton)
TRAP(ether_ntoa)
TRAP(fgetsgent)
TRAP(fgetspent)
TRAP(getaliasbyname)
TRAP(getaliasent)
TRAP(getrpcbyname)
TRAP(getrpcbynumber)
TRAP(getrpcent)
TRAP(getsgent)
TRAP(getsgnam)
TRAP(getspent)
TRAP(getspnam)
TRAP(initstate)
TRAP(random)
TRAP(setstate)
TRAP(sgetsgent)
TRAP(sgetspent)
TRAP(srandom)
TRAP(twalk)
TRAP(lgammaf128)
TRAP(lgammaf32)
TRAP(lgammaf32x)
TRAP(lgammaf64)
TRAP(lgammaf64x)
#endif

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54444)
SET(VERSION_MAJOR 20)
SET(VERSION_MINOR 13)
SET(VERSION_REVISION 54445)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 1)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471)
SET(VERSION_DESCRIBE v20.13.1.1-prestable)
SET(VERSION_STRING 20.13.1.1)
SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
SET(VERSION_DESCRIBE v21.1.1.5643-prestable)
SET(VERSION_STRING 21.1.1.5643)
# end of autochange

View File

@ -32,12 +32,21 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
# 4+ ccache respect SOURCE_DATE_EPOCH (always includes it into the hash
# of the manifest) and debian will extract these from d/changelog, and
# makes cache of ccache unusable
# debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is
# filled from the debian/changelog or current time.
#
# FIXME: once sloppiness will be introduced for this this can be removed.
if (CCACHE_VERSION VERSION_GREATER "4.0")
# - 4.0+ ccache always includes this environment variable into the hash
# of the manifest, which do not allow to use previous cache,
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
#
# So for:
# - 4.2+ time_macros sloppiness is used,
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
message(STATUS "Use time_macros sloppiness for ccache")
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")

View File

@ -39,6 +39,7 @@ find_package(Threads REQUIRED)
if (NOT OS_ANDROID)
# Our compatibility layer doesn't build under Android, many errors in musl.
add_subdirectory(base/glibc-compatibility)
add_subdirectory(base/harmful)
endif ()
include (cmake/find/unwind.cmake)

View File

@ -18,7 +18,11 @@ if (WITH_COVERAGE)
set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE})
separate_arguments(WITHOUT_COVERAGE_LIST)
# disable coverage for contib files and build with optimisations
add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST})
if (COMPILER_CLANG)
add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST})
else()
add_compile_options(-O3 -DNDEBUG -finline-functions ${WITHOUT_COVERAGE_LIST})
endif()
endif()
if (SANITIZE STREQUAL "undefined")

@ -1 +1 @@
Subproject commit e05523ca7c1fb8d095b612a1b1cfe96e199ffb17
Subproject commit 21f451d4d3157ffed31ec60a8b76c407190e66bd

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 254be98ae7f2fd92d6db768f8e11ea5a5226cbf5
Subproject commit cdb6e3f2ce4464225daf9c8beeae7db98d590bdc

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 8b966f0ca298fc1475bd09d9775f32dff0fdce0a
Subproject commit 54a0decabbcf4c0bb5cf7befa9c597f28289bff5

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.13.1.1) unstable; urgency=low
clickhouse (21.1.0) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 23 Nov 2020 10:29:24 +0300
-- Alexey Milovidov <milovidov@yandex-team.ru> Mon, 11 Jan 2021 03:51:08 +0300

2
debian/control vendored
View File

@ -40,7 +40,7 @@ Description: Common files for ClickHouse
Package: clickhouse-server
Architecture: all
Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser
Recommends: libcap2-bin, krb5-user
Recommends: libcap2-bin
Replaces: clickhouse-server-common, clickhouse-server-base
Provides: clickhouse-server-common
Description: Server binary for ClickHouse

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.13.1.*
ARG version=21.1.0
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -31,7 +31,7 @@ find . -name '*.so.*' -print -exec mv '{}' /output \;
if [ "performance" == "$COMBINED_OUTPUT" ]
then
cp -r ../tests/performance /output
cp -r ../tests/config/top_level_domains /
cp -r ../tests/config/top_level_domains /output
cp -r ../docker/test/performance-comparison/config /output ||:
rm /output/unit_tests_dbms ||:
rm /output/clickhouse-odbc-bridge ||:

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.13.1.*
ARG version=21.1.0
ARG gosu_ver=1.10
RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.13.1.*
ARG version=21.1.0
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -10,6 +10,11 @@
<max_execution_time>
<max>10</max>
</max_execution_time>
<!-- Not ready for production -->
<compile_expressions>
<readonly />
</compile_expressions>
</constraints>
</default>
</profiles>

View File

@ -81,12 +81,11 @@ function fuzz
echo Server started
fuzzer_exit_code=0
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames.
# They are all alphanumeric.
# shellcheck disable=SC2012
./clickhouse-client --query-fuzzer-runs=1000 \
< <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \
> >(tail -10000 > fuzzer.log) \
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
# SC2046: Quote this to prevent word splitting. Actually I need word splitting.
# shellcheck disable=SC2012,SC2046
./clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
> >(tail -n 10000 > fuzzer.log) \
2>&1 \
|| fuzzer_exit_code=$?

View File

@ -36,6 +36,22 @@ function wait_for_server # port, pid
fi
}
function left_or_right()
{
local from=$1 && shift
local basename=$1 && shift
if [ -e "$from/$basename" ]; then
echo "$from/$basename"
return
fi
case "$from" in
left) echo "right/$basename" ;;
right) echo "left/$basename" ;;
esac
}
function configure
{
# Use the new config for both servers, so that we can change it in a PR.
@ -55,7 +71,7 @@ function configure
# server *config* directives overrides
--path db0
--user_files_path db0/user_files
--top_level_domains_path /top_level_domains
--top_level_domains_path "$(left_or_right right top_level_domains)"
--tcp_port $LEFT_SERVER_PORT
)
left/clickhouse-server "${setup_left_server_opts[@]}" &> setup-server-log.log &
@ -103,7 +119,7 @@ function restart
# server *config* directives overrides
--path left/db
--user_files_path left/db/user_files
--top_level_domains_path /top_level_domains
--top_level_domains_path "$(left_or_right left top_level_domains)"
--tcp_port $LEFT_SERVER_PORT
)
left/clickhouse-server "${left_server_opts[@]}" &>> left-server-log.log &
@ -118,7 +134,7 @@ function restart
# server *config* directives overrides
--path right/db
--user_files_path right/db/user_files
--top_level_domains_path /top_level_domains
--top_level_domains_path "$(left_or_right right top_level_domains)"
--tcp_port $RIGHT_SERVER_PORT
)
right/clickhouse-server "${right_server_opts[@]}" &>> right-server-log.log &

View File

@ -37,7 +37,15 @@ chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "SHOW DATABASES"
clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
clickhouse-client --query "CREATE DATABASE test"
service clickhouse-server restart && sleep 5
service clickhouse-server restart
# Wait for server to start accepting connections
for _ in {1..120}; do
clickhouse-client --query "SELECT 1" && break
sleep 1
done
clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "SHOW TABLES FROM test"
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"

View File

@ -11,7 +11,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f
Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference.
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests.
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and it is available to general public.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
@ -84,11 +84,9 @@ If you want to improve performance of ClickHouse in some scenario, and if improv
Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing.
You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. These kind of tests are not automated.
## Miscellaneous Tests {#miscellaneous-tests}
There are tests for external dictionaries located at `tests/external_dictionaries` and for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests.
There are tests for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests.
There is separate test for quorum inserts. This test run ClickHouse cluster on separate servers and emulate various failure cases: network split, packet drop (between ClickHouse nodes, between ClickHouse and ZooKeeper, between ClickHouse server and client, etc.), `kill -9`, `kill -STOP` and `kill -CONT` , like [Jepsen](https://aphyr.com/tags/Jepsen). Then the test checks that all acknowledged inserts was written and all rejected inserts was not.
@ -169,53 +167,55 @@ Precise query execution timings are not recorded and not compared due to high va
## Build Tests {#build-tests}
Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. Tests are located at `ci` directory. They run build from source inside Docker, Vagrant, and sometimes with `qemu-user-static` inside Docker. These tests are under development and test runs are not automated.
Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well.
Motivation:
Normally we release and run all tests on a single variant of ClickHouse build. But there are alternative build variants that are not thoroughly tested. Examples:
- build on FreeBSD
- build on Debian with libraries from system packages
- build with shared linking of libraries
- build on AArch64 platform
- build on PowerPc platform
Examples:
- cross-compile for Darwin x86_64 (Mac OS X)
- cross-compile for FreeBSD x86_64
- cross-compile for Linux AArch64
- build on Ubuntu with libraries from system packages (discouraged)
- build with shared linking of libraries (discouraged)
For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts.
Though we cannot run all tests on all variant of builds, we want to check at least that various build variants are not broken. For this purpose we use build tests.
We also test that there are no translation units that are too long to compile or require too much RAM.
We also test that there are no too large stack frames.
## Testing for Protocol Compatibility {#testing-for-protocol-compatibility}
When we extend ClickHouse network protocol, we test manually that old clickhouse-client works with new clickhouse-server and new clickhouse-client works with old clickhouse-server (simply by running binaries from corresponding packages).
We also test some cases automatically with integrational tests:
- if data written by old version of ClickHouse can be successfully read by the new version;
- do distributed queries work in a cluster with different ClickHouse versions.
## Help from the Compiler {#help-from-the-compiler}
Main ClickHouse code (that is located in `dbms` directory) is built with `-Wall -Wextra -Werror` and with some additional enabled warnings. Although these options are not enabled for third-party libraries.
Clang has even more useful warnings - you can look for them with `-Weverything` and pick something to default build.
For production builds, gcc is used (it still generates slightly more efficient code than clang). For development, clang is usually more convenient to use. You can build on your own machine with debug mode (to save battery of your laptop), but please note that compiler is able to generate more warnings with `-O3` due to better control flow and inter-procedure analysis. When building with clang in debug mode, debug version of `libc++` is used that allows to catch more errors at runtime.
For production builds, clang is used, but we also test make gcc builds. For development, clang is usually more convenient to use. You can build on your own machine with debug mode (to save battery of your laptop), but please note that compiler is able to generate more warnings with `-O3` due to better control flow and inter-procedure analysis. When building with clang in debug mode, debug version of `libc++` is used that allows to catch more errors at runtime.
## Sanitizers {#sanitizers}
### Address sanitizer
We run functional and integration tests under ASan on per-commit basis.
### Valgrind (Memcheck)
We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).
### Undefined behaviour sanitizer
We run functional and integration tests under ASan on per-commit basis.
We run functional, integration, stress and unit tests under ASan on per-commit basis.
### Thread sanitizer
We run functional tests under TSan on per-commit basis. We still dont run integration tests under TSan on per-commit basis.
We run functional, integration, stress and unit tests under TSan on per-commit basis.
### Memory sanitizer
Currently we still dont use MSan.
We run functional, integration, stress and unit tests under MSan on per-commit basis.
### Debug allocator
Debug version of `jemalloc` is used for debug build.
### Undefined behaviour sanitizer
We run functional, integration, stress and unit tests under UBSan on per-commit basis. The code of some third-party libraries is not sanitized for UB.
### Valgrind (Memcheck)
We used to run functional tests under Valgrind overnight, but don't do it anymore. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).
## Fuzzing {#fuzzing}
@ -233,19 +233,62 @@ Google OSS-Fuzz can be found at `docker/fuzz`.
We also use simple fuzz test to generate random SQL queries and to check that the server doesnt die executing them.
You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).
We also use sophisticated AST-based query fuzzer that is able to find huge amount of corner cases. It does random permutations and substitutions in queries AST. It remembers AST nodes from previous tests to use them for fuzzing of subsequent tests while processing them in random order.
## Stress test
Stress tests are another case of fuzzing. It runs all functional tests in parallel in random order with a single server. Results of the tests are not checked.
It is checked that:
- server does not crash, no debug or sanitizer traps are triggered;
- there are no deadlocks;
- the database structure is consistent;
- server can successfully stop after the test and start again without exceptions.
There are five variants (Debug, ASan, TSan, MSan, UBSan).
## Thread Fuzzer
Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases.
## Security Audit {#security-audit}
People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.
## Static Analyzers {#static-analyzers}
We run `PVS-Studio` on per-commit basis. We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`. You will find instructions for usage in `tests/instructions/` directory. Also you can read [the article in russian](https://habr.com/company/yandex/blog/342018/).
We run `clang-tidy` and `PVS-Studio` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks.
We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory. Also you can read [the article in russian](https://habr.com/company/yandex/blog/342018/).
If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box.
We also use `shellcheck` for static analysis of shell scripts.
## Hardening {#hardening}
`FORTIFY_SOURCE` is used by default. It is almost useless, but still makes sense in rare cases and we dont disable it.
In debug build we are using custom allocator that does ASLR of user-level allocations.
We also manually protect memory regions that are expected to be readonly after allocation.
In debug build we also involve a customization of libc that ensures that no "harmful" (obsolete, insecure, not thread-safe) functions are called.
Debug assertions are used extensively.
In debug build, if exception with "logical error" code (implies a bug) is being thrown, the program is terminated prematurally. It allows to use exceptions in release build but make it an assertion in debug build.
Debug version of jemalloc is used for debug builds.
Debug version of libc++ is used for debug builds.
## Runtime Integrity Checks
Data stored on disk is checksummed. Data in MergeTree tables is checksummed in three ways simultaneously* (compressed data blocks, uncompressed data blocks, the total checksum across blocks). Data transferred over network between client and server or between servers is also checksummed. Replication ensures bit-identical data on replicas.
It is required to protect from faulty hardware (bit rot on storage media, bit flips in RAM on server, bit flips in RAM of network controller, bit flips in RAM of network switch, bit flips in RAM of client, bit flips on the wire). Note that bit flips are common and likely to occur even for ECC RAM and in presense of TCP checksums (if you manage to run thousands of servers processing petabytes of data each day). [See the video (russian)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).
ClickHouse provides diagnostics that will help ops engineers to find faulty hardware.
\* and it is not slow.
## Code Style {#code-style}
@ -259,6 +302,8 @@ Alternatively you can try `uncrustify` tool to reformat your code. Configuration
`CLion` has its own code formatter that has to be tuned for our code style.
We also use `codespell` to find typos in code. It is automated as well.
## Metrica B2B Tests {#metrica-b2b-tests}
Each ClickHouse release is tested with Yandex Metrica and AppMetrica engines. Testing and stable versions of ClickHouse are deployed on VMs and run with a small copy of Metrica engine that is processing fixed sample of input data. Then results of two instances of Metrica engine are compared together.
@ -267,13 +312,25 @@ These tests are automated by separate team. Due to high number of moving parts,
## Test Coverage {#test-coverage}
As of July 2018 we dont track test coverage.
We also track test coverage but only for functional tests and only for clickhouse-server. It is performed on daily basis.
## Tests for Tests
There is automated check for flaky tests. It runs all new tests 100 times (for functional tests) or 10 times (for integration tests). If at least single time the test failed, it is considered flaky.
## Testflows
[Testflows](https://testflows.com/) is an enterprise-grade testing framework. It is used by Altinity for some of the tests and we run these tests in our CI.
## Yandex Checks (only for Yandex employees)
These checks are importing ClickHouse code into Yandex internal monorepository, so ClickHouse codebase can be used as a library by other products at Yandex (YT and YDB). Note that clickhouse-server itself is not being build from internal repo and unmodified open-source build is used for Yandex applications.
## Test Automation {#test-automation}
We run tests with Yandex internal CI and job automation system named “Sandbox”.
Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored eternally. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you.
Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you.
We dont use Travis CI due to the limit on time and computational power.
We dont use Jenkins. It was used before and now we are happy we are not using Jenkins.

View File

@ -98,7 +98,9 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
- `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
- `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage).
- `max_parts_in_total` — Maximum number of parts in all partitions.
- `max_parts_in_total` — Maximum number of parts in all partitions.
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
**Example of Sections Setting**

View File

@ -38,15 +38,15 @@ Example of setting the addresses of the ZooKeeper cluster:
``` xml
<zookeeper>
<node index="1">
<node>
<host>example1</host>
<port>2181</port>
</node>
<node index="2">
<node>
<host>example2</host>
<port>2181</port>
</node>
<node index="3">
<node>
<host>example3</host>
<port>2181</port>
</node>
@ -61,21 +61,21 @@ Example of setting the addresses of the auxiliary ZooKeeper cluster:
``` xml
<auxiliary_zookeepers>
<zookeeper2>
<node index="1">
<node>
<host>example_2_1</host>
<port>2181</port>
</node>
<node index="2">
<node>
<host>example_2_2</host>
<port>2181</port>
</node>
<node index="3">
<node>
<host>example_2_3</host>
<port>2181</port>
</node>
</zookeeper2>
<zookeeper3>
<node index="1">
<node>
<host>example_3_1</host>
<port>2181</port>
</node>

View File

@ -25,10 +25,27 @@ The Distributed engine accepts parameters:
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
Also it accept the following settings:
- `fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk.
- `fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to asynchronous inserts on Distributed table (after insert, after sending the data to shard, etc).
!!! note "Note"
**Durability settings** (`fsync_...`):
- Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards.
- May significantly decrease the inserts' performance
- Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings`
Example:
``` sql
Distributed(logs, default, hits[, sharding_key[, policy_name]])
SETTINGS
fsync_after_insert=0,
fsync_directories=0;
```
Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.

View File

@ -0,0 +1,414 @@
---
toc_priority: 20
toc_title: Brown University Benchmark
---
# Brown University Benchmark
MgBench - A new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
Download the data:
```
wget https://datasets.clickhouse.tech/mgbench{1..3}.csv.xz
```
Unpack the data:
```
xz -v -d mgbench{1..3}.csv.xz
```
Create tables:
```
CREATE DATABASE mgbench;
CREATE TABLE mgbench.logs1 (
log_time DateTime,
machine_name LowCardinality(String),
machine_group LowCardinality(String),
cpu_idle Nullable(Float32),
cpu_nice Nullable(Float32),
cpu_system Nullable(Float32),
cpu_user Nullable(Float32),
cpu_wio Nullable(Float32),
disk_free Nullable(Float32),
disk_total Nullable(Float32),
part_max_used Nullable(Float32),
load_fifteen Nullable(Float32),
load_five Nullable(Float32),
load_one Nullable(Float32),
mem_buffers Nullable(Float32),
mem_cached Nullable(Float32),
mem_free Nullable(Float32),
mem_shared Nullable(Float32),
swap_free Nullable(Float32),
bytes_in Nullable(Float32),
bytes_out Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (machine_group, machine_name, log_time);
CREATE TABLE mgbench.logs2 (
log_time DateTime,
client_ip IPv4,
request String,
status_code UInt16,
object_size UInt64
)
ENGINE = MergeTree()
ORDER BY log_time;
CREATE TABLE mgbench.logs3 (
log_time DateTime64,
device_id FixedString(15),
device_name LowCardinality(String),
device_type LowCardinality(String),
device_floor UInt8,
event_type LowCardinality(String),
event_unit FixedString(1),
event_value Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (event_type, log_time);
```
Insert data:
```
clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
```
Run benchmark queries:
```
-- Q1.1: What is the CPU/network utilization for each web server since midnight?
SELECT machine_name,
MIN(cpu) AS cpu_min,
MAX(cpu) AS cpu_max,
AVG(cpu) AS cpu_avg,
MIN(net_in) AS net_in_min,
MAX(net_in) AS net_in_max,
AVG(net_in) AS net_in_avg,
MIN(net_out) AS net_out_min,
MAX(net_out) AS net_out_max,
AVG(net_out) AS net_out_avg
FROM (
SELECT machine_name,
COALESCE(cpu_user, 0.0) AS cpu,
COALESCE(bytes_in, 0.0) AS net_in,
COALESCE(bytes_out, 0.0) AS net_out
FROM logs1
WHERE machine_name IN ('anansi','aragog','urd')
AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
) AS r
GROUP BY machine_name;
-- Q1.2: Which computer lab machines have been offline in the past day?
SELECT machine_name,
log_time
FROM logs1
WHERE (machine_name LIKE 'cslab%' OR
machine_name LIKE 'mslab%')
AND load_one IS NULL
AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
ORDER BY machine_name,
log_time;
-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation?
SELECT dt,
hr,
AVG(load_fifteen) AS load_fifteen_avg,
AVG(load_five) AS load_five_avg,
AVG(load_one) AS load_one_avg,
AVG(mem_free) AS mem_free_avg,
AVG(swap_free) AS swap_free_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
load_fifteen,
load_five,
load_one,
mem_free,
swap_free
FROM logs1
WHERE machine_name = 'babbage'
AND load_fifteen IS NOT NULL
AND load_five IS NOT NULL
AND load_one IS NOT NULL
AND mem_free IS NOT NULL
AND swap_free IS NOT NULL
AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
) AS r
GROUP BY dt,
hr
ORDER BY dt,
hr;
-- Q1.4: Over a 1-month period, how often was each server blocked on disk I/O?
SELECT machine_name,
COUNT(*) AS spikes
FROM logs1
WHERE machine_group = 'Servers'
AND cpu_wio > 0.99
AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
AND log_time < TIMESTAMP '2017-01-01 00:00:00'
GROUP BY machine_name
ORDER BY spikes DESC
LIMIT 10;
-- Q1.5: Which externally reachable VMs have run low on memory?
SELECT machine_name,
dt,
MIN(mem_free) AS mem_free_min
FROM (
SELECT machine_name,
CAST(log_time AS DATE) AS dt,
mem_free
FROM logs1
WHERE machine_group = 'DMZ'
AND mem_free IS NOT NULL
) AS r
GROUP BY machine_name,
dt
HAVING MIN(mem_free) < 10000
ORDER BY machine_name,
dt;
-- Q1.6: What is the total hourly network traffic across all file servers?
SELECT dt,
hr,
SUM(net_in) AS net_in_sum,
SUM(net_out) AS net_out_sum,
SUM(net_in) + SUM(net_out) AS both_sum
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
FROM logs1
WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
'poprocks','razzles','runts','smarties','smuggler','spree','stride',
'tootsie','trident','wrigley','york')
) AS r
GROUP BY dt,
hr
ORDER BY both_sum DESC
LIMIT 10;
-- Q2.1: Which requests have caused server errors within the past 2 weeks?
SELECT *
FROM logs2
WHERE status_code >= 500
AND log_time >= TIMESTAMP '2012-12-18 00:00:00'
ORDER BY log_time;
-- Q2.2: During a specific 2-week period, was the user password file leaked?
SELECT *
FROM logs2
WHERE status_code >= 200
AND status_code < 300
AND request LIKE '%/etc/passwd%'
AND log_time >= TIMESTAMP '2012-05-06 00:00:00'
AND log_time < TIMESTAMP '2012-05-20 00:00:00';
-- Q2.3: What was the average path depth for top-level requests in the past month?
SELECT top_level,
AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg
FROM (
SELECT SUBSTRING(request FROM 1 FOR len) AS top_level,
request
FROM (
SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len,
request
FROM logs2
WHERE status_code >= 200
AND status_code < 300
AND log_time >= TIMESTAMP '2012-12-01 00:00:00'
) AS r
WHERE len > 0
) AS s
WHERE top_level IN ('/about','/courses','/degrees','/events',
'/grad','/industry','/news','/people',
'/publications','/research','/teaching','/ugrad')
GROUP BY top_level
ORDER BY top_level;
-- Q2.4: During the last 3 months, which clients have made an excessive number of requests?
SELECT client_ip,
COUNT(*) AS num_requests
FROM logs2
WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00'
GROUP BY client_ip
HAVING COUNT(*) >= 100000
ORDER BY num_requests DESC;
-- Q2.5: What are the daily unique visitors?
SELECT dt,
COUNT(DISTINCT client_ip)
FROM (
SELECT CAST(log_time AS DATE) AS dt,
client_ip
FROM logs2
) AS r
GROUP BY dt
ORDER BY dt;
-- Q2.6: What are the average and maximum data transfer rates (Gbps)?
SELECT AVG(transfer) / 125000000.0 AS transfer_avg,
MAX(transfer) / 125000000.0 AS transfer_max
FROM (
SELECT log_time,
SUM(object_size) AS transfer
FROM logs2
GROUP BY log_time
) AS r;
-- Q3.1: Did the indoor temperature reach freezing over the weekend?
SELECT *
FROM logs3
WHERE event_type = 'temperature'
AND event_value <= 32.0
AND log_time >= '2019-11-29 17:00:00.000';
-- Q3.4: Over the past 6 months, how frequently was each door opened?
SELECT device_name,
device_floor,
COUNT(*) AS ct
FROM logs3
WHERE event_type = 'door_open'
AND log_time >= '2019-06-01 00:00:00.000'
GROUP BY device_name,
device_floor
ORDER BY ct DESC;
-- Q3.5: Where in the building do large temperature variations occur in winter and summer?
WITH temperature AS (
SELECT dt,
device_name,
device_type,
device_floor
FROM (
SELECT dt,
hr,
device_name,
device_type,
device_floor,
AVG(event_value) AS temperature_hourly_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
device_name,
device_type,
device_floor,
event_value
FROM logs3
WHERE event_type = 'temperature'
) AS r
GROUP BY dt,
hr,
device_name,
device_type,
device_floor
) AS s
GROUP BY dt,
device_name,
device_type,
device_floor
HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0
)
SELECT DISTINCT device_name,
device_type,
device_floor,
'WINTER'
FROM temperature
WHERE dt >= DATE '2018-12-01'
AND dt < DATE '2019-03-01'
UNION
SELECT DISTINCT device_name,
device_type,
device_floor,
'SUMMER'
FROM temperature
WHERE dt >= DATE '2019-06-01'
AND dt < DATE '2019-09-01';
-- Q3.6: For each device category, what are the monthly power consumption metrics?
SELECT yr,
mo,
SUM(coffee_hourly_avg) AS coffee_monthly_sum,
AVG(coffee_hourly_avg) AS coffee_monthly_avg,
SUM(printer_hourly_avg) AS printer_monthly_sum,
AVG(printer_hourly_avg) AS printer_monthly_avg,
SUM(projector_hourly_avg) AS projector_monthly_sum,
AVG(projector_hourly_avg) AS projector_monthly_avg,
SUM(vending_hourly_avg) AS vending_monthly_sum,
AVG(vending_hourly_avg) AS vending_monthly_avg
FROM (
SELECT dt,
yr,
mo,
hr,
AVG(coffee) AS coffee_hourly_avg,
AVG(printer) AS printer_hourly_avg,
AVG(projector) AS projector_hourly_avg,
AVG(vending) AS vending_hourly_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(YEAR FROM log_time) AS yr,
EXTRACT(MONTH FROM log_time) AS mo,
EXTRACT(HOUR FROM log_time) AS hr,
CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee,
CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer,
CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector,
CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending
FROM logs3
WHERE device_type = 'meter'
) AS r
GROUP BY dt,
yr,
mo,
hr
) AS s
GROUP BY yr,
mo
ORDER BY yr,
mo;
```
The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).

View File

@ -13,11 +13,12 @@ The list of documented datasets:
- [GitHub Events](../../getting-started/example-datasets/github-events.md)
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Recipes](../../getting-started/example-datasets/recipes.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
- [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -71,4 +71,4 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
[ClickHouse tutorial](../../getting-started/tutorial.md) is based on Yandex.Metrica dataset and the recommended way to get started with this dataset is to just go through tutorial.
Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hists` and `test.visits` there).
Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hits` and `test.visits` there).

View File

@ -398,6 +398,8 @@ ORDER BY c DESC
LIMIT 10;
```
You can also play with the data in Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIERheU9mV2VlaywgY291bnQoKikgQVMgYwpGUk9NIG9udGltZQpXSEVSRSBZZWFyPj0yMDAwIEFORCBZZWFyPD0yMDA4CkdST1VQIEJZIERheU9mV2VlawpPUkRFUiBCWSBjIERFU0M7Cg==).
This performance test was created by Vadim Tkachenko. See:
- https://www.percona.com/blog/2009/10/02/analyzing-air-traffic-performance-with-infobright-and-monetdb/

View File

@ -13,6 +13,7 @@ toc_title: Client Libraries
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -844,23 +844,27 @@ Higher values will lead to higher memory usage.
## max_compress_block_size {#max-compress-block-size}
The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isnt any reason to change this setting.
The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
!!! note "Warning"
This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse.
Dont confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table).
## min_compress_block_size {#min-compress-block-size}
For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)" tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least min_compress_block_size. By default, 65,536.
For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least `min_compress_block_size`. By default, 65,536.
The actual size of the block, if the uncompressed data is less than max_compress_block_size, is no less than this value and no less than the volume of data for one mark.
The actual size of the block, if the uncompressed data is less than `max_compress_block_size`, is no less than this value and no less than the volume of data for one mark.
Lets look at an example. Assume that index_granularity was set to 8192 during table creation.
Lets look at an example. Assume that `index_granularity` was set to 8192 during table creation.
We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks.
We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data wont be decompressed.
There usually isnt any reason to change this setting.
!!! note "Warning"
This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse.
## max_query_size {#settings-max_query_size}
@ -2470,6 +2474,45 @@ Possible values:
Default value: `0`.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 0.
**Example**
Consider the following query with aggregate functions:
```sql
SELECT
SUM(-1),
MAX(0)
FROM system.one
WHERE 0
```
With `aggregate_functions_null_for_empty = 0` it would produce:
```text
┌─SUM(-1)─┬─MAX(0)─┐
│ 0 │ 0 │
└─────────┴────────┘
```
With `aggregate_functions_null_for_empty = 1` the result would be:
```text
┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
│ NULL │ NULL │
└───────────────┴──────────────┘
```
## union_default_mode {#union-default-mode}
Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`.
@ -2484,6 +2527,7 @@ Default value: `''`.
See examples in [UNION](../../sql-reference/statements/select/union.md).
## data_type_default_nullable {#data_type_default_nullable}
Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
@ -2495,6 +2539,7 @@ Possible values:
Default value: `0`.
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
Enables special logic to perform merges on replicas.

View File

@ -20,7 +20,33 @@ System tables:
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
Unlike other system tables, the system tables [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), crash_log and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are:
- `database`: database the system log table belongs to. This option is deprecated now. All system log tables are under database `system`.
- `table`: table to insert data.
- `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression.
- `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression.
- `flush_interval_milliseconds`: interval of flushing data to disk.
- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit.
An example:
```
<yandex>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
<!--
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
```
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.

View File

@ -11,6 +11,7 @@ This table contains the following columns (the column type is shown in brackets)
- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`.
- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md).
- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication.
- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting).
Example:
@ -21,11 +22,11 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree')
```
``` text
┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┐
Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │
MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │
│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │
└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘
┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┬─supports_parallel_insert─
MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ 1 │
Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │
│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │
└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┴──────────────────────────
```
**See also**

View File

@ -1290,25 +1290,68 @@ Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference
## arrayMin(\[func,\] arr1, …) {#array-min}
Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements.
Returns the min of the `func` values. If the function is omitted, it just returns the min of the array elements.
Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
Examples:
```sql
SELECT arrayMin([1, 2, 4]) AS res
┌─res─┐
│ 1 │
└─────┘
SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res
┌─res─┐
│ -4 │
└─────┘
```
## arrayMax(\[func,\] arr1, …) {#array-max}
Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements.
Returns the max of the `func` values. If the function is omitted, it just returns the max of the array elements.
Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
Examples:
```sql
SELECT arrayMax([1, 2, 4]) AS res
┌─res─┐
│ 4 │
└─────┘
SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res
┌─res─┐
│ -1 │
└─────┘
```
## arraySum(\[func,\] arr1, …) {#array-sum}
Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements.
Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
Examples:
```sql
SELECT arraySum([2,3]) AS res
┌─res─┐
│ 5 │
└─────┘
SELECT arraySum(x -> x*x, [2, 3]) AS res
┌─res─┐
│ 13 │
└─────┘
```
## arrayAvg(\[func,\] arr1, …) {#array-avg}
Returns the sum of the `func` values. If the function is omitted, it just returns the average of the array elements.
Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements.
Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.

View File

@ -182,6 +182,14 @@ If `NULL` is passed to the function as input, then it returns the `Nullable(Noth
Gets the size of the block.
In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for.
## byteSize(...) {#function-bytesize}
Get an estimate of uncompressed byte size of its arguments in memory.
E.g. for UInt32 argument it will return constant 4, for String argument - the string length + 9 (terminating zero + length).
The function can take multiple arguments. The typical application is byteSize(*).
Use case: Suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis.
## materialize(x) {#materializex}
Turns a constant into a full column containing just one value.

View File

@ -13,6 +13,7 @@ toc_title: Client Libraries
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -15,6 +15,7 @@ toc_title: "Biblioth\xE8ques Clientes"
- [clickhouse-chauffeur](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP
- [smi2 / phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov / clickhouse-PHP-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -15,6 +15,7 @@ toc_title: "\u30AF\u30E9\u30A4\u30A2\u30F3\u30C8"
- [clickhouse-ドライバ](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-クライアント](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-クライアント](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -77,17 +77,19 @@ ORDER BY expr
- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (необязательные):
- `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
- `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
- `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
- `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
- `min_index_granularity_bytes` — минимально допустимый размер гранул данных в байтах. Значение по умолчанию — 1024b. Для обеспечения защиты от случайного создания таблиц с очень низким значением `index_granularity_bytes`. Смотрите [Хранение данных](#mergetree-data-storage).
- `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
- `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
- <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
- `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
- `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
- `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
- `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
- `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
- <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
- `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
- `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
- `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
- `min_bytes_for_wide_part`, `min_rows_for_wide_part` — минимальное количество байт/строк в куске данных для хранения в формате `Wide`. Можно задать одну или обе настройки или не задавать ни одной. Подробнее см. в разделе [Хранение данных](#mergetree-data-storage).
- `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
- `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
**Пример задания секций**

View File

@ -13,6 +13,7 @@ toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -811,23 +811,27 @@ log_query_threads=1
## max_compress_block_size {#max-compress-block-size}
Максимальный размер блоков не сжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативки. Как правило, не имеет смысла менять эту настройку.
Максимальный размер блоков несжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативной памяти.
!!! note "Предупреждение"
Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse.
Не путайте блоки для сжатия (кусок памяти, состоящий из байт) и блоки для обработки запроса (пачка строк из таблицы).
## min_compress_block_size {#min-compress-block-size}
Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min_compress_block_size. По умолчанию - 65 536.
Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше `min_compress_block_size`. По умолчанию - 65 536.
Реальный размер блока, если несжатых данных меньше max_compress_block_size, будет не меньше этого значения и не меньше объёма данных на одну засечку.
Реальный размер блока, если несжатых данных меньше `max_compress_block_size`, будет не меньше этого значения и не меньше объёма данных на одну засечку.
Рассмотрим пример. Пусть index_granularity, указанная при создании таблицы - 8192.
Рассмотрим пример. Пусть `index_granularity`, указанная при создании таблицы - 8192.
Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как min_compress_block_size = 65 536, сжатый блок будет сформирован на каждые две засечки.
Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как `min_compress_block_size` = 65 536, сжатый блок будет сформирован на каждые две засечки.
Пусть мы записываем столбец URL типа String (средний размер - 60 байт на значение). При записи 8192 строк, будет, в среднем, чуть меньше 500 КБ данных. Так как это больше 65 536 строк, то сжатый блок будет сформирован на каждую засечку. В этом случае, при чтении с диска данных из диапазона в одну засечку, не будет разжато лишних данных.
Как правило, не имеет смысла менять эту настройку.
!!! note "Предупреждение"
Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse.
## max_query_size {#settings-max_query_size}
@ -2339,6 +2343,45 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow;
Значение по умолчанию: `0`.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Включает или отключает перезапись всех агрегатных функций в запросе, с добавлением к ним суффикса [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull). Включите для совместимости со стандартом SQL.
Реализуется с помощью перезаписи запросов (аналогично настройке [count_distinct_implementation](#settings-count_distinct_implementation)), чтобы получить согласованные результаты для распределенных запросов.
Возможные значения:
- 0 — выключена.
- 1 — включена.
Значение по умолчанию: 0.
**Пример**
Рассмотрим запрос с агрегирующими функциями:
```sql
SELECT
SUM(-1),
MAX(0)
FROM system.one
WHERE 0
```
Результат запроса с настройкой `aggregate_functions_null_for_empty = 0`:
```text
┌─SUM(-1)─┬─MAX(0)─┐
│ 0 │ 0 │
└─────────┴────────┘
```
Результат запроса с настройкой `aggregate_functions_null_for_empty = 1`:
```text
┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
│ NULL │ NULL │
└───────────────┴──────────────┘
```
## union_default_mode {#union-default-mode}
Устанавливает режим объединения результатов `SELECT` запросов. Настройка используется только при совместном использовании с [UNION](../../sql-reference/statements/select/union.md) без явного указания `UNION ALL` или `UNION DISTINCT`.
@ -2353,6 +2396,7 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow;
Смотрите примеры в разделе [UNION](../../sql-reference/statements/select/union.md).
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
Включает особую логику выполнения слияний на репликах.

View File

@ -8,7 +8,7 @@
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — значение метрики.
- `description` ([String](../../sql-reference/data-types/string.md)) — описание метрики.
Список поддержанных метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
Список поддерживаемых метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
**Пример**

View File

@ -267,7 +267,7 @@ void executeQuery(
**9.** 多行注释的开头和结尾不得有空行(关闭多行注释的行除外)。
**10.** 要注释掉代码,请使用基本注释,而不是«记录»注释。
**10.** 要注释掉代码,请使用基本注释,而不是“文档”注释。
**11.** 在提交之前删除代码的无效注释部分。
@ -335,7 +335,7 @@ template <bool without_www>
struct ExtractDomain
```
**7.** 对于抽象类(接口),用 `I` 前缀。
**7.** 对于抽象类(接口),用 `I` 前缀。
``` cpp
class IBlockInputStream
@ -349,7 +349,7 @@ class IBlockInputStream
bool info_successfully_loaded = false;
```
**9.** `define` 和全局常量的名称使用带下划线的 `ALL_CAPS`
**9.** `define` 和全局常量的名称使用全大写带下划线的形式,如 `ALL_CAPS`
``` cpp
#define MAX_SRC_TABLE_NAMES_TO_STORE 1000
@ -357,14 +357,14 @@ bool info_successfully_loaded = false;
**10.** 文件名应使用与其内容相同的样式。
如果文件包含单个类,则以与该类名称相同的方式命名该文件。
如果文件包含单个类,则以与该类名称相同的方式命名该文件CamelCase
如果文件包含单个函数,则以与函数名称相同的方式命名文件。
如果文件包含单个函数,则以与函数名称相同的方式命名文件camelCase
**11.** 如果名称包含缩写,则:
- 对于变量名,缩写应使用小写字母 `mysql_connection`(不是 `mySQL_connection` )。
- 对于类和函数的名称,请将大写字母保留在缩写 `MySQLConnection`(不是 `MySqlConnection`
- 对于类和函数的名称,请将大写字母保留在缩写 `MySQLConnection`(不是 `MySqlConnection`
**12.** 仅用于初始化类成员的构造方法参数的命名方式应与类成员相同,但最后使用下划线。
@ -411,7 +411,7 @@ enum class CompressionMethod
如果缩短版本是常用的,则可以接受不完整的单词。
如果注释中旁边包含全名,您也可以使用缩写。
如果旁边有注释包含全名,您也可以使用缩写。
**17.** C++ 源码文件名称必须为 `.cpp` 拓展名。 头文件必须为 `.h` 拓展名。
@ -441,7 +441,7 @@ enum class CompressionMethod
在离线数据处理应用程序中,通常可以接受不捕获异常。
在处理用户请求的服务器中,通常足以捕获连接处理程序顶层的异常。
在处理用户请求的服务器中,捕获连接处理程序顶层的异常通常就足够了
在线程函数中,你应该在 `join` 之后捕获并保留所有异常以在主线程中重新抛出它们。
@ -548,7 +548,7 @@ Fork不用于并行化。
**10.** 常量。
使用 const 引用,指向常量的指针,`const_iterator`和 const 指针
使用 const 引用、指针,指向常量、`const_iterator`和 const 方法
`const` 视为默认值,仅在必要时使用非 `const`
@ -560,7 +560,7 @@ Fork不用于并行化。
**12.** 数值类型。
使用 `UInt8` `UInt16` `UInt32` `UInt64` `Int8` `Int16` `Int32` 以及 `Int64` `size_t` `ssize_t` 还有 `ptrdiff_t`
使用 `UInt8` `UInt16` `UInt32` `UInt64` `Int8` `Int16` `Int32``Int64`,同样还有 `size_t` `ssize_t` `ptrdiff_t`
不要使用这些类型:`signed / unsigned long``long long``short``signed / unsigned char``char`。
@ -732,11 +732,11 @@ CPU指令集是我们服务器中支持的最小集合。 目前它是SSE 4.2
**8.** 尽可能经常地进行提交,即使代码只是部分准备好了。
目的明确的功能,使用分支。
为了这种目的可以创建分支。
如果 `master` 分支中的代码尚不可构建,`push` 之前将其从构建中排除。您需要在几天内完成或删除它。
如果您的代码在 `master` 分支中尚不可构建,在 `push` 之前需要将其从构建中排除。您需要在几天内完成或删除它。
**9.** 对于不重要的更改,请使用分支并在服务器上发布它们。
**9.** 对于非一般的更改,请使用分支并在服务器上发布它们。
**10.** 未使用的代码将从 repo 中删除。

View File

@ -13,6 +13,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -22,9 +22,35 @@ toc_title: "\u7CFB\u7EDF\u8868"
大多数系统表将数据存储在RAM中。 ClickHouse服务器在开始时创建此类系统表。
与其他系统表不同,系统表 [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) 由 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改则ClickHouse会重命名当前表并创建一个新表。
与其他系统表不同,系统日志表 [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log), [part_log](../../operations/system-tables/part_log.md#system.part_log), crash_log and text_log 默认采用[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改则ClickHouse会重命名当前表并创建一个新表。
默认情况下,表增长是无限的。 要控制表的大小,可以使用 [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。
用户可以通过在`/etc/clickhouse-server/config.d/`下创建与系统表同名的配置文件, 或者在`/etc/clickhouse-server/config.xml`中设置相应配置项,来自定义系统日志表的结构。可以自定义的配置项如下:
- `database`: 系统日志表所在的数据库。这个选项目前已经废弃。所有的系统日表都位于`system`库中。
- `table`: 系统日志表名。
- `partition_by`: 指定[PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md)表达式。
- `ttl`: 指定系统日志表TTL选项。
- `flush_interval_milliseconds`: 指定系统日志表数据落盘时间。
- `engine`: 指定完整的表引擎定义。(以`ENGINE = `开始)。 这个选项与`partition_by`以及`ttl`冲突。如果两者一起设置,服务启动时会抛出异常并且退出。
一个配置定义的例子如下:
```
<yandex>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
<!--
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
```
默认情况下,表增长是无限的。 要控制表的大小,可以使用 TTL 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。
## 系统指标的来源 {#system-tables-sources-of-system-metrics}

View File

@ -216,7 +216,13 @@ if (CLICKHOUSE_SPLIT_BINARY)
install(PROGRAMS clickhouse-split-helper DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME clickhouse COMPONENT clickhouse)
else ()
add_executable (clickhouse main.cpp)
target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils)
# A library that prevent usage of several functions from libc.
if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID)
set (HARMFUL_LIB harmful)
endif ()
target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB})
target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
if (ENABLE_CLICKHOUSE_SERVER)
@ -312,6 +318,10 @@ else ()
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM)
endif()
if (USE_BINARY_HASH)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM)
endif()
endif ()
if (ENABLE_TESTS AND USE_GTEST)

View File

@ -59,7 +59,9 @@
#include <DataStreams/AsynchronousBlockInputStream.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <DataStreams/InternalTextLogsRowOutputStream.h>
#include <DataStreams/NullBlockOutputStream.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTUseQuery.h>
#include <Parsers/ASTInsertQuery.h>
@ -110,6 +112,7 @@ namespace ErrorCodes
extern const int INVALID_USAGE_OF_INPUT;
extern const int DEADLOCK_AVOIDED;
extern const int UNRECOGNIZED_ARGUMENTS;
extern const int SYNTAX_ERROR;
}
@ -136,6 +139,9 @@ private:
bool stdin_is_a_tty = false; /// stdin is a terminal.
bool stdout_is_a_tty = false; /// stdout is a terminal.
/// If not empty, queries will be read from these files
std::vector<std::string> queries_files;
std::unique_ptr<Connection> connection; /// Connection to DB.
String full_query; /// Current query as it was given to the client.
@ -478,10 +484,10 @@ private:
/// - stdin is not a terminal. In this case queries are read from it.
/// - -qf (--queries-file) command line option is present.
/// The value of the option is used as file with query (or of multiple queries) to execute.
if (!stdin_is_a_tty || config().has("query") || config().has("queries-file"))
if (!stdin_is_a_tty || config().has("query") || !queries_files.empty())
is_interactive = false;
if (config().has("query") && config().has("queries-file"))
if (config().has("query") && !queries_files.empty())
{
throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS);
}
@ -696,14 +702,8 @@ private:
auto query_id = config().getString("query_id", "");
if (!query_id.empty())
context.setCurrentQueryId(query_id);
if (query_fuzzer_runs)
{
nonInteractiveWithFuzzing();
}
else
{
nonInteractive();
}
nonInteractive();
/// If exception code isn't zero, we should return non-zero return code anyway.
if (last_exception_received_from_server)
@ -794,15 +794,22 @@ private:
{
String text;
if (config().has("queries-file"))
if (!queries_files.empty())
{
ReadBufferFromFile in(config().getString("queries-file"));
readStringUntilEOF(text, in);
processMultiQuery(text);
for (const auto & queries_file : queries_files)
{
connection->setDefaultDatabase(connection_parameters.default_database);
ReadBufferFromFile in(queries_file);
readStringUntilEOF(text, in);
if (!processMultiQuery(text))
break;
}
return;
}
else if (config().has("query"))
{
text = config().getRawString("query"); /// Poco configuration should not process substitutions in form of ${...} inside query.
}
else
{
/// If 'query' parameter is not set, read a query from stdin.
@ -811,113 +818,10 @@ private:
readStringUntilEOF(text, in);
}
processQueryText(text);
}
void nonInteractiveWithFuzzing()
{
if (config().has("query"))
{
// Poco configuration should not process substitutions in form of
// ${...} inside query
processWithFuzzing(config().getRawString("query"));
return;
}
// Try to stream the queries from stdin, without reading all of them
// into memory. The interface of the parser does not support streaming,
// in particular, it can't distinguish the end of partial input buffer
// and the final end of input file. This means we have to try to split
// the input into separate queries here. Two patterns of input are
// especially interesting:
// 1) multiline query:
// select 1
// from system.numbers;
//
// 2) csv insert with in-place data:
// insert into t format CSV 1;2
//
// (1) means we can't split on new line, and (2) means we can't split on
// semicolon. Solution: split on ';\n'. This sequence is frequent enough
// in the SQL tests which are our principal input for fuzzing. Now we
// have another interesting case:
// 3) escaped semicolon followed by newline, e.g.
// select ';
// '
//
// To handle (3), parse until we can, and read more data if the parser
// complains. Hopefully this should be enough...
ReadBufferFromFileDescriptor in(STDIN_FILENO);
std::string text;
while (!in.eof())
{
// Read until separator.
while (!in.eof())
{
char * next_separator = find_first_symbols<';'>(in.position(),
in.buffer().end());
if (next_separator < in.buffer().end())
{
next_separator++;
if (next_separator < in.buffer().end()
&& *next_separator == '\n')
{
// Found ';\n', append it to the query text and try to
// parse.
next_separator++;
text.append(in.position(), next_separator - in.position());
in.position() = next_separator;
break;
}
}
// Didn't find the semicolon and reached the end of buffer.
text.append(in.position(), next_separator - in.position());
in.position() = next_separator;
if (text.size() > 1024 * 1024)
{
// We've read a lot of text and still haven't seen a separator.
// Likely some pathological input, just fall through to prevent
// too long loops.
break;
}
}
// Parse and execute what we've read.
const auto * new_end = processWithFuzzing(text);
if (new_end > &text[0])
{
const auto rest_size = text.size() - (new_end - &text[0]);
memcpy(&text[0], new_end, rest_size);
text.resize(rest_size);
}
else
{
// We didn't read enough text to parse a query. Will read more.
}
// Ensure that we're still connected to the server. If the server died,
// the reconnect is going to fail with an exception, and the fuzzer
// will exit. The ping() would be the best match here, but it's
// private, probably for a good reason that the protocol doesn't allow
// pings at any possible moment.
// Don't forget to reset the default database which might have changed.
connection->setDefaultDatabase("");
connection->forceConnected(connection_parameters.timeouts);
if (text.size() > 4 * 1024)
{
// Some pathological situation where the text is larger than 4kB
// and we still cannot parse a single query in it. Abort.
std::cerr << "Read too much text and still can't parse a query."
" Aborting." << std::endl;
exit(1);
}
}
if (query_fuzzer_runs)
processWithFuzzing(text);
else
processQueryText(text);
}
bool processQueryText(const String & text)
@ -945,7 +849,8 @@ private:
{
const bool test_mode = config().has("testmode");
{ /// disable logs if expects errors
{
/// disable logs if expects errors
TestHint test_hint(test_mode, all_queries_text);
if (test_hint.clientError() || test_hint.serverError())
processTextAsSingleQuery("SET send_logs_level = 'fatal'");
@ -1019,7 +924,7 @@ private:
if (hint.clientError() != e.code())
{
if (hint.clientError())
e.addMessage("\nExpected clinet error: " + std::to_string(hint.clientError()));
e.addMessage("\nExpected client error: " + std::to_string(hint.clientError()));
throw;
}
@ -1078,40 +983,51 @@ private:
expected_client_error = test_hint.clientError();
expected_server_error = test_hint.serverError();
try
if (query_fuzzer_runs)
{
processParsedSingleQuery();
if (insert_ast && insert_ast->data)
{
// For VALUES format: use the end of inline data as reported
// by the format parser (it is saved in sendData()). This
// allows us to handle queries like:
// insert into t values (1); select 1
//, where the inline data is delimited by semicolon and not
// by a newline.
this_query_end = parsed_query->as<ASTInsertQuery>()->end;
}
}
catch (...)
{
last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
actual_client_error = last_exception_received_from_server->code();
if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error))
std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message();
received_exception_from_server = true;
}
if (!test_hint.checkActual(actual_server_error, actual_client_error, received_exception_from_server, last_exception_received_from_server))
connection->forceConnected(connection_parameters.timeouts);
if (received_exception_from_server && !ignore_error)
{
if (is_interactive)
break;
else
if (!processWithFuzzing(full_query))
return false;
}
else
{
try
{
processParsedSingleQuery();
if (insert_ast && insert_ast->data)
{
// For VALUES format: use the end of inline data as reported
// by the format parser (it is saved in sendData()). This
// allows us to handle queries like:
// insert into t values (1); select 1
//, where the inline data is delimited by semicolon and not
// by a newline.
this_query_end = parsed_query->as<ASTInsertQuery>()->end;
}
}
catch (...)
{
last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
actual_client_error = last_exception_received_from_server->code();
if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error))
std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message();
received_exception_from_server = true;
}
if (!test_hint.checkActual(
actual_server_error, actual_client_error, received_exception_from_server, last_exception_received_from_server))
{
connection->forceConnected(connection_parameters.timeouts);
}
if (received_exception_from_server && !ignore_error)
{
if (is_interactive)
break;
else
return false;
}
}
this_query_begin = this_query_end;
}
@ -1120,165 +1036,148 @@ private:
}
// Returns the last position we could parse.
const char * processWithFuzzing(const String & text)
/// Returns false when server is not available.
bool processWithFuzzing(const String & text)
{
/// Several queries separated by ';'.
/// INSERT data is ended by the end of line, not ';'.
ASTPtr orig_ast;
const char * begin = text.data();
const char * end = begin + text.size();
while (begin < end)
try
{
// Skip whitespace before the query
while (isWhitespaceASCII(*begin) || *begin == ';')
const char * begin = text.data();
orig_ast = parseQuery(begin, begin + text.size(), true);
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::SYNTAX_ERROR)
throw;
}
if (!orig_ast)
{
// Can't continue after a parsing error
return true;
}
// Don't repeat inserts, the tables grow too big. Also don't repeat
// creates because first we run the unmodified query, it will succeed,
// and the subsequent queries will fail. When we run out of fuzzer
// errors, it may be interesting to add fuzzing of create queries that
// wraps columns into LowCardinality or Nullable. Also there are other
// kinds of create queries such as CREATE DICTIONARY, we could fuzz
// them as well. Also there is no point fuzzing DROP queries.
size_t this_query_runs = query_fuzzer_runs;
if (orig_ast->as<ASTInsertQuery>() || orig_ast->as<ASTCreateQuery>() || orig_ast->as<ASTDropQuery>())
{
this_query_runs = 1;
}
ASTPtr fuzz_base = orig_ast;
for (size_t fuzz_step = 0; fuzz_step < this_query_runs; ++fuzz_step)
{
fmt::print(stderr, "Fuzzing step {} out of {}\n",
fuzz_step, this_query_runs);
ASTPtr ast_to_process;
try
{
++begin;
WriteBufferFromOwnString dump_before_fuzz;
fuzz_base->dumpTree(dump_before_fuzz);
auto base_before_fuzz = fuzz_base->formatForErrorMessage();
ast_to_process = fuzz_base->clone();
WriteBufferFromOwnString dump_of_cloned_ast;
ast_to_process->dumpTree(dump_of_cloned_ast);
// Run the original query as well.
if (fuzz_step > 0)
{
fuzzer.fuzzMain(ast_to_process);
}
auto base_after_fuzz = fuzz_base->formatForErrorMessage();
// Debug AST cloning errors.
if (base_before_fuzz != base_after_fuzz)
{
fmt::print(stderr,
"Base before fuzz: {}\n"
"Base after fuzz: {}\n",
base_before_fuzz, base_after_fuzz);
fmt::print(stderr, "Dump before fuzz:\n{}\n", dump_before_fuzz.str());
fmt::print(stderr, "Dump of cloned AST:\n{}\n", dump_of_cloned_ast.str());
fmt::print(stderr, "Dump after fuzz:\n");
WriteBufferFromOStream cerr_buf(std::cerr, 4096);
fuzz_base->dumpTree(cerr_buf);
cerr_buf.next();
fmt::print(stderr, "IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly.");
assert(false);
}
auto fuzzed_text = ast_to_process->formatForErrorMessage();
if (fuzz_step > 0 && fuzzed_text == base_before_fuzz)
{
fmt::print(stderr, "Got boring AST\n");
continue;
}
parsed_query = ast_to_process;
query_to_send = parsed_query->formatForErrorMessage();
processParsedSingleQuery();
}
catch (...)
{
// Some functions (e.g. protocol parsers) don't throw, but
// set last_exception instead, so we'll also do it here for
// uniformity.
last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
received_exception_from_server = true;
}
const auto * this_query_begin = begin;
ASTPtr orig_ast = parseQuery(begin, end, true);
if (!orig_ast)
if (received_exception_from_server)
{
// Can't continue after a parsing error
return begin;
fmt::print(stderr, "Error on processing query '{}': {}\n",
ast_to_process->formatForErrorMessage(),
last_exception_received_from_server->message());
}
auto * as_insert = orig_ast->as<ASTInsertQuery>();
if (as_insert && as_insert->data)
if (!connection->isConnected())
{
// INSERT data is ended by newline
as_insert->end = find_first_symbols<'\n'>(as_insert->data, end);
begin = as_insert->end;
// Probably the server is dead because we found an assertion
// failure. Fail fast.
fmt::print(stderr, "Lost connection to the server\n");
return false;
}
full_query = text.substr(this_query_begin - text.data(),
begin - text.data());
// Don't repeat inserts, the tables grow too big. Also don't repeat
// creates because first we run the unmodified query, it will succeed,
// and the subsequent queries will fail. When we run out of fuzzer
// errors, it may be interesting to add fuzzing of create queries that
// wraps columns into LowCardinality or Nullable. Also there are other
// kinds of create queries such as CREATE DICTIONARY, we could fuzz
// them as well.
int this_query_runs = query_fuzzer_runs;
if (as_insert
|| orig_ast->as<ASTCreateQuery>())
// The server is still alive so we're going to continue fuzzing.
// Determine what we're going to use as the starting AST.
if (received_exception_from_server)
{
this_query_runs = 1;
// Query completed with error, keep the previous starting AST.
// Also discard the exception that we now know to be non-fatal,
// so that it doesn't influence the exit code.
last_exception_received_from_server.reset(nullptr);
received_exception_from_server = false;
}
ASTPtr fuzz_base = orig_ast;
for (int fuzz_step = 0; fuzz_step < this_query_runs; fuzz_step++)
else if (ast_to_process->formatForErrorMessage().size() > 500)
{
fprintf(stderr, "fuzzing step %d out of %d for query at pos %zd\n",
fuzz_step, this_query_runs, this_query_begin - text.data());
ASTPtr ast_to_process;
try
{
WriteBufferFromOwnString dump_before_fuzz;
fuzz_base->dumpTree(dump_before_fuzz);
auto base_before_fuzz = fuzz_base->formatForErrorMessage();
ast_to_process = fuzz_base->clone();
WriteBufferFromOwnString dump_of_cloned_ast;
ast_to_process->dumpTree(dump_of_cloned_ast);
// Run the original query as well.
if (fuzz_step > 0)
{
fuzzer.fuzzMain(ast_to_process);
}
auto base_after_fuzz = fuzz_base->formatForErrorMessage();
// Debug AST cloning errors.
if (base_before_fuzz != base_after_fuzz)
{
fprintf(stderr, "base before fuzz: %s\n"
"base after fuzz: %s\n", base_before_fuzz.c_str(),
base_after_fuzz.c_str());
fprintf(stderr, "dump before fuzz:\n%s\n",
dump_before_fuzz.str().c_str());
fprintf(stderr, "dump of cloned ast:\n%s\n",
dump_of_cloned_ast.str().c_str());
fprintf(stderr, "dump after fuzz:\n");
WriteBufferFromOStream cerr_buf(std::cerr, 4096);
fuzz_base->dumpTree(cerr_buf);
cerr_buf.next();
fmt::print(stderr, "IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly.");
assert(false);
}
auto fuzzed_text = ast_to_process->formatForErrorMessage();
if (fuzz_step > 0 && fuzzed_text == base_before_fuzz)
{
fprintf(stderr, "got boring ast\n");
continue;
}
parsed_query = ast_to_process;
query_to_send = parsed_query->formatForErrorMessage();
processParsedSingleQuery();
}
catch (...)
{
// Some functions (e.g. protocol parsers) don't throw, but
// set last_exception instead, so we'll also do it here for
// uniformity.
last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
received_exception_from_server = true;
}
if (received_exception_from_server)
{
fmt::print(stderr, "Error on processing query '{}': {}\n",
ast_to_process->formatForErrorMessage(),
last_exception_received_from_server->message());
}
if (!connection->isConnected())
{
// Probably the server is dead because we found an assertion
// failure. Fail fast.
fmt::print(stderr, "Lost connection to the server\n");
return begin;
}
// The server is still alive so we're going to continue fuzzing.
// Determine what we're going to use as the starting AST.
if (received_exception_from_server)
{
// Query completed with error, keep the previous starting AST.
// Also discard the exception that we now know to be non-fatal,
// so that it doesn't influence the exit code.
last_exception_received_from_server.reset(nullptr);
received_exception_from_server = false;
}
else if (ast_to_process->formatForErrorMessage().size() > 500)
{
// ast too long, start from original ast
fprintf(stderr, "Current AST is too long, discarding it and using the original AST as a start\n");
fuzz_base = orig_ast;
}
else
{
// fuzz starting from this successful query
fprintf(stderr, "Query succeeded, using this AST as a start\n");
fuzz_base = ast_to_process;
}
// ast too long, start from original ast
fmt::print(stderr, "Current AST is too long, discarding it and using the original AST as a start\n");
fuzz_base = orig_ast;
}
else
{
// fuzz starting from this successful query
fmt::print(stderr, "Query succeeded, using this AST as a start\n");
fuzz_base = ast_to_process;
}
}
return begin;
return true;
}
void processTextAsSingleQuery(const String & text_)
@ -1891,6 +1790,13 @@ private:
{
if (!block_out_stream)
{
/// Ignore all results when fuzzing as they can be huge.
if (query_fuzzer_runs)
{
block_out_stream = std::make_shared<NullBlockOutputStream>(block);
return;
}
WriteBuffer * out_buf = nullptr;
String pager = config().getString("pager", "");
if (!pager.empty())
@ -2348,7 +2254,8 @@ public:
"Suggestion limit for how many databases, tables and columns to fetch.")
("multiline,m", "multiline")
("multiquery,n", "multiquery")
("queries-file", po::value<std::string>(), "file path with queries to execute")
("queries-file", po::value<std::vector<std::string>>()->multitoken(),
"file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
("format,f", po::value<std::string>(), "default output format")
("testmode,T", "enable test hints in comments")
("ignore-error", "do not stop processing in multiquery mode")
@ -2478,12 +2385,11 @@ public:
if (options.count("query"))
config().setString("query", options["query"].as<std::string>());
if (options.count("queries-file"))
config().setString("queries-file", options["queries-file"].as<std::string>());
queries_files = options["queries-file"].as<std::vector<std::string>>();
if (options.count("database"))
config().setString("database", options["database"].as<std::string>());
if (options.count("pager"))
config().setString("pager", options["pager"].as<std::string>());
if (options.count("port") && !options["port"].defaulted())
config().setInt("port", options["port"].as<int>());
if (options.count("secure"))
@ -2537,7 +2443,6 @@ public:
config().setBool("multiquery", true);
// Ignore errors in parsing queries.
// TODO stop using parseQuery.
config().setBool("ignore-error", true);
ignore_error = true;
}

View File

@ -21,6 +21,7 @@
#include <Common/OpenSSLHelpers.h>
#include <Common/hex.h>
#include <common/getResource.h>
#include <common/sleep.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h>
@ -763,7 +764,7 @@ namespace
fmt::print("Server started\n");
break;
}
::sleep(1);
sleepForSeconds(1);
}
if (try_num == num_tries)
@ -875,7 +876,7 @@ namespace
fmt::print("Server stopped\n");
break;
}
::sleep(1);
sleepForSeconds(1);
}
if (try_num == num_tries)

View File

@ -273,11 +273,12 @@ try
global_context->setCurrentDatabase(default_database);
applyCmdOptions(*global_context);
String path = global_context->getPath();
if (!path.empty())
if (config().has("path"))
{
String path = global_context->getPath();
/// Lock path directory before read
status.emplace(global_context->getPath() + "status", StatusFile::write_full_info);
status.emplace(path + "status", StatusFile::write_full_info);
LOG_DEBUG(log, "Loading metadata from {}", path);
Poco::File(path + "data/").createDirectories();
@ -288,7 +289,7 @@ try
DatabaseCatalog::instance().loadDatabases();
LOG_DEBUG(log, "Loaded metadata.");
}
else
else if (!config().has("no-system-tables"))
{
attachSystemTables(*global_context);
}
@ -540,6 +541,7 @@ void LocalServer::init(int argc, char ** argv)
("logger.log", po::value<std::string>(), "Log file name")
("logger.level", po::value<std::string>(), "Log level")
("ignore-error", "do not stop processing if a query failed")
("no-system-tables", "do not attach system tables (better startup time)")
("version,V", "print version information and exit")
;
@ -602,6 +604,8 @@ void LocalServer::init(int argc, char ** argv)
config().setString("logger.level", options["logger.level"].as<std::string>());
if (options.count("ignore-error"))
config().setBool("ignore-error", true);
if (options.count("no-system-tables"))
config().setBool("no-system-tables", true);
std::vector<std::string> arguments;
for (int arg_num = 1; arg_num < argc; ++arg_num)

View File

@ -18,6 +18,7 @@
#endif
#include <Common/StringUtils/StringUtils.h>
#include <Common/getHashOfLoadedBinary.h>
#include <common/phdr_cache.h>
#include <ext/scope_guard.h>
@ -62,6 +63,14 @@ int mainEntryClickHouseStatus(int argc, char ** argv);
int mainEntryClickHouseRestart(int argc, char ** argv);
#endif
int mainEntryClickHouseHashBinary(int, char **)
{
/// Intentionally without newline. So you can run:
/// objcopy --add-section .note.ClickHouse.hash=<(./clickhouse hash-binary) clickhouse
std::cout << getHashOfLoadedBinaryHex();
return 0;
}
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
namespace
@ -110,6 +119,7 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
{"status", mainEntryClickHouseStatus},
{"restart", mainEntryClickHouseRestart},
#endif
{"hash-binary", mainEntryClickHouseHashBinary},
};

View File

@ -65,6 +65,8 @@
#include <Server/TCPHandlerFactory.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/ThreadFuzzer.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/Elf.h>
#include <Server/MySQLHandlerFactory.h>
#include <Server/PostgreSQLHandlerFactory.h>
#include <Server/ProtocolServerAdapter.h>
@ -184,6 +186,7 @@ namespace ErrorCodes
extern const int FAILED_TO_GETPWUID;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR;
extern const int CORRUPTED_DATA;
}
@ -436,7 +439,44 @@ int Server::main(const std::vector<std::string> & /*args*/)
#if defined(OS_LINUX)
std::string executable_path = getExecutablePath();
if (executable_path.empty())
if (!executable_path.empty())
{
/// Integrity check based on checksum of the executable code.
/// Note: it is not intended to protect from malicious party,
/// because the reference checksum can be easily modified as well.
/// And we don't involve asymmetric encryption with PKI yet.
/// It's only intended to protect from faulty hardware.
/// Note: it is only based on machine code.
/// But there are other sections of the binary (e.g. exception handling tables)
/// that are interpreted (not executed) but can alter the behaviour of the program as well.
String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (stored_binary_hash.empty())
{
LOG_WARNING(log, "Calculated checksum of the binary: {}."
" There is no information about the reference checksum.", calculated_binary_hash);
}
else if (calculated_binary_hash == stored_binary_hash)
{
LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
}
else
{
throw Exception(ErrorCodes::CORRUPTED_DATA,
"Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file {2} was changed just after startup;"
" - the file {2} is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
" - the file {2} was intentionally modified;"
" - logical error in code."
, calculated_binary_hash, stored_binary_hash, executable_path);
}
}
else
executable_path = "/usr/bin/clickhouse"; /// It is used for information messages.
/// After full config loaded

View File

@ -204,7 +204,7 @@
<privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
<!-- dhparams are optional. You can delete the <dhParamsFile> element.
To generate dhparams, use the following command:
openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096
openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096
Only file format with BEGIN DH PARAMETERS is supported.
-->
<dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
@ -432,7 +432,7 @@
<!-- Configuration of clusters that could be used in Distributed tables.
https://clickhouse.tech/docs/en/operations/table_engines/distributed/
-->
<remote_servers incl="clickhouse_remote_servers" >
<remote_servers>
<!-- Test only shard config for testing distributed storage -->
<test_shard_localhost>
<!-- Inter-server per-cluster secret for Distributed queries
@ -566,17 +566,37 @@
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
Optional. If you don't use replicated tables, you could omit that.
See https://clickhouse.yandex/docs/en/table_engines/replication/
See https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replication/
-->
<zookeeper incl="zookeeper-servers" optional="true" />
<!--
<zookeeper>
<node>
<host>example1</host>
<port>2181</port>
</node>
<node>
<host>example2</host>
<port>2181</port>
</node>
<node>
<host>example3</host>
<port>2181</port>
</node>
</zookeeper>
-->
<!-- Substitutions for parameters of replicated tables.
Optional. If you don't use replicated tables, you could omit that.
See https://clickhouse.yandex/docs/en/table_engines/replication/#creating-replicated-tables
See https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replication/#creating-replicated-tables
-->
<macros incl="macros" optional="true" />
<!--
<macros>
<shard>01</shard>
<replica>example01-01-1</replica>
</macros>
-->
<!-- Reloading interval for embedded dictionaries, in seconds. Default: 3600. -->
@ -656,7 +676,7 @@
<database>system</database>
<table>query_log</table>
<!--
PARTITION BY expr: https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
PARTITION BY expr: https://clickhouse.yandex/docs/en/table_engines/mergetree-family/custom_partitioning_key/
Example:
event_date
toMonday(event_date)
@ -810,8 +830,8 @@
<!-- Uncomment if you want data to be compressed 30-100% better.
Don't do that if you just started using ClickHouse.
-->
<compression incl="clickhouse_compression">
<!--
<compression>
<!- - Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used. - ->
<case>
@ -822,8 +842,8 @@
<!- - What compression method to use. - ->
<method>zstd</method>
</case>
-->
</compression>
-->
<!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->

View File

@ -20,7 +20,7 @@
<default>
<password></password>
<networks incl="networks" replace="replace">
<networks>
<ip>::/0</ip>
</networks>

View File

@ -77,7 +77,7 @@
Strongly recommended that regexp is ends with $
All results of DNS requests are cached till server restart.
-->
<networks incl="networks" replace="replace">
<networks>
<ip>::/0</ip>
</networks>

View File

@ -188,13 +188,13 @@ public:
if (!limit_num_elems)
{
if (rhs_elems.value.size())
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
}
else
{
UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
if (elems_to_insert)
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.begin() + elems_to_insert, arena);
cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena);
}
}

View File

@ -53,17 +53,35 @@ class AggregateFunctionIfNullUnary final
private:
size_t num_arguments;
/// The name of the nested function, including combinators (i.e. *If)
///
/// getName() from the nested_function cannot be used because in case of *If combinator
/// with Nullable argument nested_function will point to the function w/o combinator.
/// (I.e. sumIf(Nullable, 1) -> sum()), and distributed query processing will fail.
///
/// And nested_function cannot point to the function with *If since
/// due to optimization in the add() which pass only one column with the result,
/// and so AggregateFunctionIf::add() cannot be called this way
/// (it write to the last argument -- num_arguments-1).
///
/// And to avoid extra level of indirection, the name of function is cached:
///
/// AggregateFunctionIfNullUnary::add -> [ AggregateFunctionIf::add -> ] AggregateFunctionSum::add
String name;
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>;
public:
String getName() const override
{
return Base::getName();
return name;
}
AggregateFunctionIfNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: Base(std::move(nested_function_), arguments, params), num_arguments(arguments.size())
AggregateFunctionIfNullUnary(const String & name_, AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: Base(std::move(nested_function_), arguments, params)
, num_arguments(arguments.size())
, name(name_)
{
if (num_arguments == 0)
throw Exception("Aggregate function " + getName() + " require at least one argument",
@ -174,14 +192,14 @@ AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter(
{
if (return_type_is_nullable)
{
return std::make_shared<AggregateFunctionIfNullUnary<true, true>>(nested_func, arguments, params);
return std::make_shared<AggregateFunctionIfNullUnary<true, true>>(nested_function->getName(), nested_func, arguments, params);
}
else
{
if (serialize_flag)
return std::make_shared<AggregateFunctionIfNullUnary<false, true>>(nested_func, arguments, params);
return std::make_shared<AggregateFunctionIfNullUnary<false, true>>(nested_function->getName(), nested_func, arguments, params);
else
return std::make_shared<AggregateFunctionIfNullUnary<false, false>>(nested_func, arguments, params);
return std::make_shared<AggregateFunctionIfNullUnary<false, false>>(nested_function->getName(), nested_func, arguments, params);
}
}
else

View File

@ -66,8 +66,7 @@ auto parseArguments(const std::string & name, const DataTypes & arguments)
values_types.push_back(array_type->getNestedType());
}
return std::tuple{std::move(keys_type), std::move(values_types),
tuple_argument};
return std::tuple{std::move(keys_type), std::move(values_types), tuple_argument};
}
// This function instantiates a particular overload of the sumMap family of

View File

@ -54,6 +54,8 @@ struct AggregateFunctionMapData
* ([1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20])
*
* minMap and maxMap share the same idea, but calculate min and max correspondingly.
*
* NOTE: The implementation of these functions are "amateur grade" - not efficient and low quality.
*/
template <typename T, typename Derived, typename Visitor, bool overflow, bool tuple_argument, bool compact>
@ -72,7 +74,8 @@ public:
const DataTypes & values_types_, const DataTypes & argument_types_)
: Base(argument_types_, {} /* parameters */), keys_type(keys_type_),
values_types(values_types_)
{}
{
}
DataTypePtr getReturnType() const override
{
@ -81,13 +84,26 @@ public:
for (const auto & value_type : values_types)
{
if constexpr (std::is_same_v<Visitor, FieldVisitorSum>)
{
if (!value_type->isSummable())
throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Values for {} cannot be summed, passed type {}",
getName(), value_type->getName()};
}
DataTypePtr result_type;
if constexpr (overflow)
{
if (value_type->onlyNull())
throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Cannot calculate {} of type {}",
getName(), value_type->getName()};
// Overflow, meaning that the returned type is the same as
// the input type.
result_type = value_type;
// the input type. Nulls are skipped.
result_type = removeNullable(value_type);
}
else
{
@ -120,9 +136,9 @@ public:
}
}
void add(AggregateDataPtr place, const IColumn** _columns, const size_t row_num, Arena *) const override
void add(AggregateDataPtr place, const IColumn ** columns_, const size_t row_num, Arena *) const override
{
const auto & columns = getArgumentColumns(_columns);
const auto & columns = getArgumentColumns(columns_);
// Column 0 contains array of keys of known type
const ColumnArray & array_column0 = assert_cast<const ColumnArray &>(*columns[0]);
@ -148,16 +164,13 @@ public:
// Insert column values for all keys
for (size_t i = 0; i < keys_vec_size; ++i)
{
auto value = value_column.operator[](values_vec_offset + i);
auto key = key_column.operator[](keys_vec_offset + i).get<T>();
auto value = value_column[values_vec_offset + i];
auto key = key_column[keys_vec_offset + i].get<T>();
if (!keepKey(key))
continue;
if (value.isNull())
continue;
typename std::decay_t<decltype(merged_maps)>::iterator it;
decltype(merged_maps.begin()) it;
if constexpr (IsDecimalNumber<T>)
{
// FIXME why is storing NearestFieldType not enough, and we
@ -170,17 +183,20 @@ public:
if (it != merged_maps.end())
{
applyVisitor(Visitor(value), it->second[col]);
if (!value.isNull())
{
if (it->second[col].isNull())
it->second[col] = value;
else
applyVisitor(Visitor(value), it->second[col]);
}
}
else
{
// Create a value array for this key
Array new_values;
new_values.resize(values_types.size());
for (size_t k = 0; k < new_values.size(); ++k)
{
new_values[k] = (k == col) ? value : values_types[k]->getDefault();
}
new_values.resize(size);
new_values[col] = value;
if constexpr (IsDecimalNumber<T>)
{
@ -207,7 +223,8 @@ public:
if (it != merged_maps.end())
{
for (size_t col = 0; col < values_types.size(); ++col)
applyVisitor(Visitor(elem.second[col]), it->second[col]);
if (!elem.second[col].isNull())
applyVisitor(Visitor(elem.second[col]), it->second[col]);
}
else
merged_maps[elem.first] = elem.second;
@ -253,6 +270,8 @@ public:
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{
size_t num_columns = values_types.size();
// Final step does compaction of keys that have zero values, this mutates the state
auto & merged_maps = this->data(place).merged_maps;
@ -263,9 +282,9 @@ public:
{
// Key is not compacted if it has at least one non-zero value
bool erase = true;
for (size_t col = 0; col < values_types.size(); ++col)
for (size_t col = 0; col < num_columns; ++col)
{
if (it->second[col] != values_types[col]->getDefault())
if (!it->second[col].isNull() && it->second[col] != values_types[col]->getDefault())
{
erase = false;
break;
@ -290,7 +309,7 @@ public:
to_keys_offsets.push_back(to_keys_offsets.back() + size);
to_keys_col.reserve(size);
for (size_t col = 0; col < values_types.size(); ++col)
for (size_t col = 0; col < num_columns; ++col)
{
auto & to_values_arr = assert_cast<ColumnArray &>(to_tuple.getColumn(col + 1));
auto & to_values_offsets = to_values_arr.getOffsets();
@ -305,10 +324,13 @@ public:
to_keys_col.insert(elem.first);
// Write 0..n arrays of values
for (size_t col = 0; col < values_types.size(); ++col)
for (size_t col = 0; col < num_columns; ++col)
{
auto & to_values_col = assert_cast<ColumnArray &>(to_tuple.getColumn(col + 1)).getData();
to_values_col.insert(elem.second[col]);
if (elem.second[col].isNull())
to_values_col.insertDefault();
else
to_values_col.insert(elem.second[col]);
}
}
}

View File

@ -19,12 +19,12 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
struct ComparePairFirst final
struct ComparePair final
{
template <typename T1, typename T2>
bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
{
return lhs.first < rhs.first;
return lhs.first == rhs.first ? lhs.second < rhs.second : lhs.first < rhs.first;
}
};
@ -33,8 +33,8 @@ template <typename T>
struct AggregateFunctionWindowFunnelData
{
using TimestampEvent = std::pair<T, UInt8>;
using TimestampEvents = PODArray<TimestampEvent, 64>;
using Comparator = ComparePairFirst;
using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
using Comparator = ComparePair;
bool sorted = true;
TimestampEvents events_list;
@ -47,8 +47,13 @@ struct AggregateFunctionWindowFunnelData
void add(T timestamp, UInt8 event)
{
// Since most events should have already been sorted by timestamp.
if (sorted && events_list.size() > 0 && events_list.back().first > timestamp)
sorted = false;
if (sorted && events_list.size() > 0)
{
if (events_list.back().first == timestamp)
sorted = events_list.back().second <= event;
else
sorted = events_list.back().first <= timestamp;
}
events_list.emplace_back(timestamp, event);
}

View File

@ -393,6 +393,12 @@ size_t ColumnAggregateFunction::byteSize() const
+ (my_arena ? my_arena->size() : 0);
}
size_t ColumnAggregateFunction::byteSizeAt(size_t) const
{
/// Lower estimate as aggregate function can allocate more data in Arena.
return sizeof(data[0]) + func->sizeOfData();
}
/// Like in byteSize(), the size is underestimated.
size_t ColumnAggregateFunction::allocatedBytes() const
{

View File

@ -163,6 +163,8 @@ public:
size_t byteSize() const override;
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;

View File

@ -403,6 +403,21 @@ size_t ColumnArray::byteSize() const
}
size_t ColumnArray::byteSizeAt(size_t n) const
{
const auto & offsets_data = getOffsets();
size_t pos = offsets_data[n - 1];
size_t end = offsets_data[n];
size_t res = sizeof(offsets_data[0]);
for (; pos < end; ++pos)
res += getData().byteSizeAt(pos);
return res;
}
size_t ColumnArray::allocatedBytes() const
{
return getData().allocatedBytes() + getOffsets().allocated_bytes();

View File

@ -84,6 +84,7 @@ public:
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;

View File

@ -187,6 +187,11 @@ public:
return data->byteSize() + sizeof(s);
}
size_t byteSizeAt(size_t) const override
{
return data->byteSizeAt(0);
}
size_t allocatedBytes() const override
{
return data->allocatedBytes() + sizeof(s);

View File

@ -87,6 +87,7 @@ public:
size_t size() const override { return data.size(); }
size_t byteSize() const override { return data.size() * sizeof(data[0]); }
size_t byteSizeAt(size_t) const override { return sizeof(data[0]); }
size_t allocatedBytes() const override { return data.allocated_bytes(); }
void protect() override { data.protect(); }
void reserve(size_t n) override { data.reserve(n); }

View File

@ -57,6 +57,11 @@ public:
return chars.size() + sizeof(n);
}
size_t byteSizeAt(size_t) const override
{
return n;
}
size_t allocatedBytes() const override
{
return chars.allocated_bytes() + sizeof(n);

View File

@ -140,6 +140,15 @@ size_t ColumnFunction::byteSize() const
return total_size;
}
size_t ColumnFunction::byteSizeAt(size_t n) const
{
size_t total_size = 0;
for (const auto & column : captured_columns)
total_size += column.column->byteSizeAt(n);
return total_size;
}
size_t ColumnFunction::allocatedBytes() const
{
size_t total_size = 0;

View File

@ -47,6 +47,7 @@ public:
void getExtremes(Field &, Field &) const override {}
size_t byteSize() const override;
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void appendArguments(const ColumnsWithTypeAndName & columns);

View File

@ -151,6 +151,7 @@ public:
void reserve(size_t n) override { idx.reserve(n); }
size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); }
size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
void forEachSubcolumn(ColumnCallback callback) override

View File

@ -211,6 +211,11 @@ size_t ColumnMap::byteSize() const
return nested->byteSize();
}
size_t ColumnMap::byteSizeAt(size_t n) const
{
return nested->byteSizeAt(n);
}
size_t ColumnMap::allocatedBytes() const
{
return nested->allocatedBytes();

View File

@ -77,6 +77,7 @@ public:
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;

View File

@ -495,6 +495,11 @@ size_t ColumnNullable::byteSize() const
return getNestedColumn().byteSize() + getNullMapColumn().byteSize();
}
size_t ColumnNullable::byteSizeAt(size_t n) const
{
return sizeof(getNullMapData()[0]) + getNestedColumn().byteSizeAt(n);
}
size_t ColumnNullable::allocatedBytes() const
{
return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes();

View File

@ -101,6 +101,7 @@ public:
const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;

View File

@ -71,6 +71,12 @@ public:
return chars.size() + offsets.size() * sizeof(offsets[0]);
}
size_t byteSizeAt(size_t n) const override
{
assert(n < size());
return sizeAt(n) + sizeof(offsets[0]);
}
size_t allocatedBytes() const override
{
return chars.allocated_bytes() + offsets.allocated_bytes();

View File

@ -424,6 +424,14 @@ size_t ColumnTuple::byteSize() const
return res;
}
size_t ColumnTuple::byteSizeAt(size_t n) const
{
size_t res = 0;
for (const auto & column : columns)
res += column->byteSizeAt(n);
return res;
}
size_t ColumnTuple::allocatedBytes() const
{
size_t res = 0;

View File

@ -83,6 +83,7 @@ public:
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;

View File

@ -88,6 +88,10 @@ public:
bool isNumeric() const override { return column_holder->isNumeric(); }
size_t byteSize() const override { return column_holder->byteSize(); }
size_t byteSizeAt(size_t n) const override
{
return getNestedColumn()->byteSizeAt(n);
}
void protect() override { column_holder->protect(); }
size_t allocatedBytes() const override
{

View File

@ -178,6 +178,11 @@ public:
return data.size() * sizeof(data[0]);
}
size_t byteSizeAt(size_t) const override
{
return sizeof(data[0]);
}
size_t allocatedBytes() const override
{
if constexpr (is_POD)

View File

@ -333,6 +333,9 @@ public:
/// Size of column data in memory (may be approximate) - for profiling. Zero, if could not be determined.
virtual size_t byteSize() const = 0;
/// Size of single value in memory (for accounting purposes)
virtual size_t byteSizeAt(size_t /*n*/) const = 0;
/// Size of memory, allocated for column.
/// This is greater or equals to byteSize due to memory reservation in containers.
/// Zero, if could not be determined.

View File

@ -33,6 +33,7 @@ public:
void insertDefault() override { ++s; }
void popBack(size_t n) override { s -= n; }
size_t byteSize() const override { return 0; }
size_t byteSizeAt(size_t) const override { return 0; }
size_t allocatedBytes() const override { return 0; }
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override

View File

@ -0,0 +1,41 @@
#include <Common/DirectorySyncGuard.h>
#include <Common/Exception.h>
#include <Disks/IDisk.h>
#include <fcntl.h> // O_RDWR
/// OSX does not have O_DIRECTORY
#ifndef O_DIRECTORY
#define O_DIRECTORY O_RDWR
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_FSYNC;
}
DirectorySyncGuard::DirectorySyncGuard(const DiskPtr & disk_, const String & path)
: disk(disk_)
, fd(disk_->open(path, O_DIRECTORY))
{}
DirectorySyncGuard::~DirectorySyncGuard()
{
try
{
#if defined(OS_DARWIN)
if (fcntl(fd, F_FULLFSYNC, 0))
throwFromErrno("Cannot fcntl(F_FULLFSYNC)", ErrorCodes::CANNOT_FSYNC);
#endif
disk->sync(fd);
disk->close(fd);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}

View File

@ -1,36 +1,26 @@
#pragma once
#include <Disks/IDisk.h>
#include <string>
#include <memory>
namespace DB
{
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
/// Helper class, that receives file descriptor and does fsync for it in destructor.
/// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end.
/// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version.
/// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496
class FileSyncGuard
class DirectorySyncGuard
{
public:
/// NOTE: If you have already opened descriptor, it's preferred to use
/// this constructor instead of constructor with path.
FileSyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {}
FileSyncGuard(const DiskPtr & disk_, const String & path)
: disk(disk_), fd(disk_->open(path, O_RDWR)) {}
~FileSyncGuard()
{
try
{
disk->sync(fd);
disk->close(fd);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
DirectorySyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {}
DirectorySyncGuard(const DiskPtr & disk_, const std::string & path);
~DirectorySyncGuard();
private:
DiskPtr disk;

View File

@ -151,6 +151,15 @@ String Elf::getBuildID(const char * nhdr_pos, size_t size)
}
String Elf::getBinaryHash() const
{
if (auto section = findSectionByName(".note.ClickHouse.hash"))
return {section->begin(), section->end()};
else
return {};
}
const char * Elf::Section::name() const
{
if (!elf.section_names)

Some files were not shown because too many files have changed in this diff Show More