Merge branch 'master' into kssenii-pg2ch

This commit is contained in:
Alexey Milovidov 2021-01-15 17:33:19 +03:00
commit a19e7edd14
361 changed files with 5138 additions and 2346 deletions

View File

@ -1,6 +1,6 @@
--- ---
name: Question name: Question
about: Ask question about ClickHouse about: Ask a question about ClickHouse
title: '' title: ''
labels: question labels: question
assignees: '' assignees: ''

View File

@ -1,6 +1,6 @@
--- ---
name: Unexpected behaviour name: Unexpected behaviour
about: Create a report to help us improve ClickHouse about: Some feature is working in non-obvious way
title: '' title: ''
labels: unexpected behaviour labels: unexpected behaviour
assignees: '' assignees: ''

View File

@ -0,0 +1,30 @@
---
name: Incomplete implementation
about: Implementation of existing feature is not finished
title: ''
labels: unfinished code
assignees: ''
---
(you don't have to strictly follow this form)
**Describe the unexpected behaviour**
A clear and concise description of what works not as it is supposed to.
**How to reproduce**
* Which ClickHouse server version to use
* Which interface to use, if matters
* Non-default settings, if any
* `CREATE TABLE` statements for all tables involved
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Queries to run that lead to unexpected result
**Expected behavior**
A clear and concise description of what you expected to happen.
**Error message and/or stacktrace**
If applicable, add screenshots to help explain your problem.
**Additional context**
Add any other context about the problem here.

View File

@ -1,6 +1,6 @@
--- ---
name: Usability issue name: Usability issue
about: Create a report to help us improve ClickHouse about: Report something can be made more convenient to use
title: '' title: ''
labels: usability labels: usability
assignees: '' assignees: ''

View File

@ -1,6 +1,6 @@
--- ---
name: Backward compatibility issue name: Backward compatibility issue
about: Create a report to help us improve ClickHouse about: Report the case when the behaviour of a new version can break existing use cases
title: '' title: ''
labels: backward compatibility labels: backward compatibility
assignees: '' assignees: ''

View File

@ -0,0 +1,16 @@
---
name: Assertion found via fuzzing
about: Potential issue has been found via Fuzzer or Stress tests
title: ''
labels: fuzz
assignees: ''
---
(you don't have to strictly follow this form)
**Describe the bug**
A link to the report
**How to reproduce**
Try to reproduce the report and copy the tables and queries involved.

View File

@ -220,6 +220,13 @@ if (LINKER_NAME MATCHES "lld$")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1")
endif () endif ()
# Add a section with the hash of the compiled machine code for integrity checks.
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
set (USE_BINARY_HASH 1)
endif ()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd

View File

@ -56,6 +56,9 @@
#include <Common/Config/ConfigProcessor.h> #include <Common/Config/ConfigProcessor.h>
#include <Common/MemorySanitizer.h> #include <Common/MemorySanitizer.h>
#include <Common/SymbolIndex.h> #include <Common/SymbolIndex.h>
#include <Common/getExecutablePath.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/Elf.h>
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
# include <Common/config_version.h> # include <Common/config_version.h>
@ -80,28 +83,6 @@ static void call_default_signal_handler(int sig)
raise(sig); raise(sig);
} }
// Apparently strsignal is not instrumented by MemorySanitizer, so we
// have to unpoison it to avoid msan reports inside fmt library when we
// print it.
const char * msan_strsignal(int sig)
{
// no glibc in osx/freebsd
#if !defined(__GLIBC_PREREQ)
#define __GLIBC_PREREQ(x, y) 0
#endif
// glibc 2.32+ deprecates sys_siglist[]
// newer glibc is a problem only for unbundled build.
#if __GLIBC_PREREQ(2, 32)
const char * signal_name = sigdescr_np(sig);
#else
const char * signal_name = sys_siglist[sig];
#endif
__msan_unpoison_string(signal_name);
return signal_name;
}
static constexpr size_t max_query_id_size = 127; static constexpr size_t max_query_id_size = 127;
static const size_t signal_pipe_buf_size = static const size_t signal_pipe_buf_size =
@ -131,11 +112,13 @@ static void writeSignalIDtoSignalPipe(int sig)
/** Signal handler for HUP / USR1 */ /** Signal handler for HUP / USR1 */
static void closeLogsSignalHandler(int sig, siginfo_t *, void *) static void closeLogsSignalHandler(int sig, siginfo_t *, void *)
{ {
DENY_ALLOCATIONS_IN_SCOPE;
writeSignalIDtoSignalPipe(sig); writeSignalIDtoSignalPipe(sig);
} }
static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
{ {
DENY_ALLOCATIONS_IN_SCOPE;
writeSignalIDtoSignalPipe(sig); writeSignalIDtoSignalPipe(sig);
} }
@ -144,6 +127,7 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
*/ */
static void signalHandler(int sig, siginfo_t * info, void * context) static void signalHandler(int sig, siginfo_t * info, void * context)
{ {
DENY_ALLOCATIONS_IN_SCOPE;
auto saved_errno = errno; /// We must restore previous value of errno in signal handler. auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
char buf[signal_pipe_buf_size]; char buf[signal_pipe_buf_size];
@ -306,13 +290,13 @@ private:
{ {
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, msan_strsignal(sig), sig); thread_num, strsignal(sig), sig);
} }
else else
{ {
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})", LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, query_id, msan_strsignal(sig), sig); thread_num, query_id, strsignal(sig), sig);
} }
String error_message; String error_message;
@ -340,6 +324,32 @@ private:
/// Write symbolized stack trace line by line for better grep-ability. /// Write symbolized stack trace line by line for better grep-ability.
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
#if defined(__linux__)
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (daemon.stored_binary_hash.empty())
{
LOG_FATAL(log, "Calculated checksum of the binary: {}."
" There is no information about the reference checksum.", calculated_binary_hash);
}
else if (calculated_binary_hash == daemon.stored_binary_hash)
{
LOG_FATAL(log, "Checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
}
else
{
LOG_FATAL(log, "Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file was changed just after startup;"
" - the file is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
" - the file was intentionally modified;"
" - logical error in code."
, calculated_binary_hash, daemon.stored_binary_hash);
}
#endif
/// Write crash to system.crash_log table if available. /// Write crash to system.crash_log table if available.
if (collectCrashLog) if (collectCrashLog)
collectCrashLog(sig, thread_num, query_id, stack_trace); collectCrashLog(sig, thread_num, query_id, stack_trace);
@ -493,8 +503,9 @@ void BaseDaemon::kill()
{ {
dumpCoverageReportIfPossible(); dumpCoverageReportIfPossible();
pid_file.reset(); pid_file.reset();
if (::raise(SIGKILL) != 0) /// Exit with the same code as it is usually set by shell when process is terminated by SIGKILL.
throw Poco::SystemException("cannot kill process"); /// It's better than doing 'raise' or 'kill', because they have no effect for 'init' process (with pid = 0, usually in Docker).
_exit(128 + SIGKILL);
} }
std::string BaseDaemon::getDefaultCorePath() const std::string BaseDaemon::getDefaultCorePath() const
@ -799,6 +810,13 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
#else #else
build_id_info = "no build id"; build_id_info = "no build id";
#endif #endif
#if defined(__linux__)
std::string executable_path = getExecutablePath();
if (!executable_path.empty())
stored_binary_hash = DB::Elf(executable_path).getBinaryHash();
#endif
} }
void BaseDaemon::logRevision() const void BaseDaemon::logRevision() const
@ -858,13 +876,13 @@ void BaseDaemon::handleSignal(int signal_id)
onInterruptSignals(signal_id); onInterruptSignals(signal_id);
} }
else else
throw DB::Exception(std::string("Unsupported signal: ") + msan_strsignal(signal_id), 0); throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0);
} }
void BaseDaemon::onInterruptSignals(int signal_id) void BaseDaemon::onInterruptSignals(int signal_id)
{ {
is_cancelled = true; is_cancelled = true;
LOG_INFO(&logger(), "Received termination signal ({})", msan_strsignal(signal_id)); LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id));
if (sigint_signals_counter >= 2) if (sigint_signals_counter >= 2)
{ {
@ -1010,3 +1028,9 @@ void BaseDaemon::setupWatchdog()
#endif #endif
} }
} }
String BaseDaemon::getStoredBinaryHash() const
{
return stored_binary_hash;
}

View File

@ -60,7 +60,7 @@ public:
static void terminate(); static void terminate();
/// Forceful shutdown /// Forceful shutdown
void kill(); [[noreturn]] void kill();
/// Cancellation request has been received. /// Cancellation request has been received.
bool isCancelled() const bool isCancelled() const
@ -121,6 +121,9 @@ public:
/// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly). /// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly).
void shouldSetupWatchdog(char * argv0_); void shouldSetupWatchdog(char * argv0_);
/// Hash of the binary for integrity checks.
String getStoredBinaryHash() const;
protected: protected:
virtual void logRevision() const; virtual void logRevision() const;
@ -168,6 +171,7 @@ protected:
Poco::Util::AbstractConfiguration * last_configuration = nullptr; Poco::Util::AbstractConfiguration * last_configuration = nullptr;
String build_id_info; String build_id_info;
String stored_binary_hash;
std::vector<int> handled_signals; std::vector<int> handled_signals;

View File

@ -0,0 +1,125 @@
#include <signal.h>
#include <string.h>
#if (SIGHUP == 1) && (SIGINT == 2) && (SIGQUIT == 3) && (SIGILL == 4) \
&& (SIGTRAP == 5) && (SIGABRT == 6) && (SIGBUS == 7) && (SIGFPE == 8) \
&& (SIGKILL == 9) && (SIGUSR1 == 10) && (SIGSEGV == 11) && (SIGUSR2 == 12) \
&& (SIGPIPE == 13) && (SIGALRM == 14) && (SIGTERM == 15) && (SIGSTKFLT == 16) \
&& (SIGCHLD == 17) && (SIGCONT == 18) && (SIGSTOP == 19) && (SIGTSTP == 20) \
&& (SIGTTIN == 21) && (SIGTTOU == 22) && (SIGURG == 23) && (SIGXCPU == 24) \
&& (SIGXFSZ == 25) && (SIGVTALRM == 26) && (SIGPROF == 27) && (SIGWINCH == 28) \
&& (SIGPOLL == 29) && (SIGPWR == 30) && (SIGSYS == 31)
#define sigmap(x) x
#else
static const char map[] = {
[SIGHUP] = 1,
[SIGINT] = 2,
[SIGQUIT] = 3,
[SIGILL] = 4,
[SIGTRAP] = 5,
[SIGABRT] = 6,
[SIGBUS] = 7,
[SIGFPE] = 8,
[SIGKILL] = 9,
[SIGUSR1] = 10,
[SIGSEGV] = 11,
[SIGUSR2] = 12,
[SIGPIPE] = 13,
[SIGALRM] = 14,
[SIGTERM] = 15,
#if defined(SIGSTKFLT)
[SIGSTKFLT] = 16,
#elif defined(SIGEMT)
[SIGEMT] = 16,
#endif
[SIGCHLD] = 17,
[SIGCONT] = 18,
[SIGSTOP] = 19,
[SIGTSTP] = 20,
[SIGTTIN] = 21,
[SIGTTOU] = 22,
[SIGURG] = 23,
[SIGXCPU] = 24,
[SIGXFSZ] = 25,
[SIGVTALRM] = 26,
[SIGPROF] = 27,
[SIGWINCH] = 28,
[SIGPOLL] = 29,
[SIGPWR] = 30,
[SIGSYS] = 31
};
#define sigmap(x) ((x) >= sizeof map ? (x) : map[(x)])
#endif
static const char strings[] =
"Unknown signal\0"
"Hangup\0"
"Interrupt\0"
"Quit\0"
"Illegal instruction\0"
"Trace/breakpoint trap\0"
"Aborted\0"
"Bus error\0"
"Arithmetic exception\0"
"Killed\0"
"User defined signal 1\0"
"Segmentation fault\0"
"User defined signal 2\0"
"Broken pipe\0"
"Alarm clock\0"
"Terminated\0"
#if defined(SIGSTKFLT)
"Stack fault\0"
#elif defined(SIGEMT)
"Emulator trap\0"
#else
"Unknown signal\0"
#endif
"Child process status\0"
"Continued\0"
"Stopped (signal)\0"
"Stopped\0"
"Stopped (tty input)\0"
"Stopped (tty output)\0"
"Urgent I/O condition\0"
"CPU time limit exceeded\0"
"File size limit exceeded\0"
"Virtual timer expired\0"
"Profiling timer expired\0"
"Window changed\0"
"I/O possible\0"
"Power failure\0"
"Bad system call\0"
"RT32"
"\0RT33\0RT34\0RT35\0RT36\0RT37\0RT38\0RT39\0RT40"
"\0RT41\0RT42\0RT43\0RT44\0RT45\0RT46\0RT47\0RT48"
"\0RT49\0RT50\0RT51\0RT52\0RT53\0RT54\0RT55\0RT56"
"\0RT57\0RT58\0RT59\0RT60\0RT61\0RT62\0RT63\0RT64"
#if _NSIG > 65
"\0RT65\0RT66\0RT67\0RT68\0RT69\0RT70\0RT71\0RT72"
"\0RT73\0RT74\0RT75\0RT76\0RT77\0RT78\0RT79\0RT80"
"\0RT81\0RT82\0RT83\0RT84\0RT85\0RT86\0RT87\0RT88"
"\0RT89\0RT90\0RT91\0RT92\0RT93\0RT94\0RT95\0RT96"
"\0RT97\0RT98\0RT99\0RT100\0RT101\0RT102\0RT103\0RT104"
"\0RT105\0RT106\0RT107\0RT108\0RT109\0RT110\0RT111\0RT112"
"\0RT113\0RT114\0RT115\0RT116\0RT117\0RT118\0RT119\0RT120"
"\0RT121\0RT122\0RT123\0RT124\0RT125\0RT126\0RT127\0RT128"
#endif
"";
char *strsignal(int signum)
{
const char *s = strings;
signum = sigmap(signum);
if (signum - 1U >= _NSIG-1) signum = 0;
for (; signum--; s++) for (; *s; s++);
return (char *)s;
}

View File

@ -142,7 +142,7 @@ TRAP(qecvt)
TRAP(qfcvt) TRAP(qfcvt)
TRAP(register_printf_function) TRAP(register_printf_function)
TRAP(seed48) TRAP(seed48)
TRAP(setenv) //TRAP(setenv)
TRAP(setfsent) TRAP(setfsent)
TRAP(setgrent) TRAP(setgrent)
TRAP(sethostent) TRAP(sethostent)
@ -164,7 +164,7 @@ TRAP(sigsuspend)
TRAP(sleep) TRAP(sleep)
TRAP(srand48) TRAP(srand48)
//TRAP(strerror) // Used by RocksDB and many other libraries, unfortunately. //TRAP(strerror) // Used by RocksDB and many other libraries, unfortunately.
TRAP(strsignal) //TRAP(strsignal) // This function is imported from Musl and is thread safe.
TRAP(strtok) TRAP(strtok)
TRAP(tcflow) TRAP(tcflow)
TRAP(tcsendbreak) TRAP(tcsendbreak)

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh: # This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54444) SET(VERSION_REVISION 54445)
SET(VERSION_MAJOR 20) SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 13) SET(VERSION_MINOR 1)
SET(VERSION_PATCH 1) SET(VERSION_PATCH 1)
SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471) SET(VERSION_GITHASH 667dd0cf0ccecdaa6f334177b7ece2f53bd196a1)
SET(VERSION_DESCRIBE v20.13.1.1-prestable) SET(VERSION_DESCRIBE v21.1.1.5646-prestable)
SET(VERSION_STRING 20.13.1.1) SET(VERSION_STRING 21.1.1.5646)
# end of autochange # end of autochange

View File

@ -32,12 +32,21 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}") message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
# 4+ ccache respect SOURCE_DATE_EPOCH (always includes it into the hash # debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is
# of the manifest) and debian will extract these from d/changelog, and # filled from the debian/changelog or current time.
# makes cache of ccache unusable
# #
# FIXME: once sloppiness will be introduced for this this can be removed. # - 4.0+ ccache always includes this environment variable into the hash
if (CCACHE_VERSION VERSION_GREATER "4.0") # of the manifest, which do not allow to use previous cache,
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
#
# So for:
# - 4.2+ time_macros sloppiness is used,
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
message(STATUS "Use time_macros sloppiness for ccache")
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}") set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}") set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")

View File

@ -1,5 +1,4 @@
# Freebsd: contrib/cppkafka/include/cppkafka/detail/endianness.h:53:23: error: 'betoh16' was not declared in this scope if (NOT ARCH_ARM AND OPENSSL_FOUND)
if (NOT ARCH_ARM AND NOT OS_FREEBSD AND OPENSSL_FOUND)
option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES}) option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES})
elseif(ENABLE_RDKAFKA AND NOT OPENSSL_FOUND) elseif(ENABLE_RDKAFKA AND NOT OPENSSL_FOUND)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use librdkafka without SSL") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use librdkafka without SSL")

View File

@ -1,2 +1,2 @@
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
tar xJf MacOSX10.14.sdk.tar.xz --strip-components=1 tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1

View File

@ -18,7 +18,11 @@ if (WITH_COVERAGE)
set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE}) set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE})
separate_arguments(WITHOUT_COVERAGE_LIST) separate_arguments(WITHOUT_COVERAGE_LIST)
# disable coverage for contib files and build with optimisations # disable coverage for contib files and build with optimisations
add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST}) if (COMPILER_CLANG)
add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST})
else()
add_compile_options(-O3 -DNDEBUG -finline-functions ${WITHOUT_COVERAGE_LIST})
endif()
endif() endif()
if (SANITIZE STREQUAL "undefined") if (SANITIZE STREQUAL "undefined")

2
contrib/libcxx vendored

@ -1 +1 @@
Subproject commit 95650a0db4399ee871d5fd698ad12384fe9fa964 Subproject commit 8b80a151d12b98ffe2d0c22f7cec12c3b9ff88d7

View File

@ -5,6 +5,8 @@ set(LIBCXX_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx)
set(SRCS set(SRCS
${LIBCXX_SOURCE_DIR}/src/algorithm.cpp ${LIBCXX_SOURCE_DIR}/src/algorithm.cpp
${LIBCXX_SOURCE_DIR}/src/any.cpp ${LIBCXX_SOURCE_DIR}/src/any.cpp
${LIBCXX_SOURCE_DIR}/src/atomic.cpp
${LIBCXX_SOURCE_DIR}/src/barrier.cpp
${LIBCXX_SOURCE_DIR}/src/bind.cpp ${LIBCXX_SOURCE_DIR}/src/bind.cpp
${LIBCXX_SOURCE_DIR}/src/charconv.cpp ${LIBCXX_SOURCE_DIR}/src/charconv.cpp
${LIBCXX_SOURCE_DIR}/src/chrono.cpp ${LIBCXX_SOURCE_DIR}/src/chrono.cpp
@ -20,6 +22,7 @@ ${LIBCXX_SOURCE_DIR}/src/functional.cpp
${LIBCXX_SOURCE_DIR}/src/future.cpp ${LIBCXX_SOURCE_DIR}/src/future.cpp
${LIBCXX_SOURCE_DIR}/src/hash.cpp ${LIBCXX_SOURCE_DIR}/src/hash.cpp
${LIBCXX_SOURCE_DIR}/src/ios.cpp ${LIBCXX_SOURCE_DIR}/src/ios.cpp
${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp
${LIBCXX_SOURCE_DIR}/src/iostream.cpp ${LIBCXX_SOURCE_DIR}/src/iostream.cpp
${LIBCXX_SOURCE_DIR}/src/locale.cpp ${LIBCXX_SOURCE_DIR}/src/locale.cpp
${LIBCXX_SOURCE_DIR}/src/memory.cpp ${LIBCXX_SOURCE_DIR}/src/memory.cpp
@ -28,6 +31,7 @@ ${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp
${LIBCXX_SOURCE_DIR}/src/new.cpp ${LIBCXX_SOURCE_DIR}/src/new.cpp
${LIBCXX_SOURCE_DIR}/src/optional.cpp ${LIBCXX_SOURCE_DIR}/src/optional.cpp
${LIBCXX_SOURCE_DIR}/src/random.cpp ${LIBCXX_SOURCE_DIR}/src/random.cpp
${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp
${LIBCXX_SOURCE_DIR}/src/regex.cpp ${LIBCXX_SOURCE_DIR}/src/regex.cpp
${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp ${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp
${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp ${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp

2
contrib/libcxxabi vendored

@ -1 +1 @@
Subproject commit 1ebc83af4c06dbcd56b4d166c1314a7d4c1173f9 Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076

View File

@ -11,7 +11,6 @@ ${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp
${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp ${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp
${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp ${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp ${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_unexpected.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp ${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp ${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp
${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp ${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp

View File

@ -83,7 +83,8 @@
#if (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <= 101400) #if (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <= 101400)
#define _TTHREAD_EMULATE_TIMESPEC_GET_ #define _TTHREAD_EMULATE_TIMESPEC_GET_
#endif #endif
#elif defined(__FreeBSD__)
#define HAVE_PTHREAD_SETNAME_FREEBSD 1
#else #else
// pthread_setname_gnu // pthread_setname_gnu
#define HAVE_PTHREAD_SETNAME_GNU 1 #define HAVE_PTHREAD_SETNAME_GNU 1

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 8b966f0ca298fc1475bd09d9775f32dff0fdce0a Subproject commit 54a0decabbcf4c0bb5cf7befa9c597f28289bff5

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.13.1.1) unstable; urgency=low clickhouse (21.1.0) unstable; urgency=low
* Modified source code * Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 23 Nov 2020 10:29:24 +0300 -- Alexey Milovidov <milovidov@yandex-team.ru> Mon, 11 Jan 2021 03:51:08 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.13.1.* ARG version=21.1.0
RUN apt-get update \ RUN apt-get update \
&& apt-get install --yes --no-install-recommends \ && apt-get install --yes --no-install-recommends \

View File

@ -45,7 +45,8 @@
"name": "yandex/clickhouse-stateless-test", "name": "yandex/clickhouse-stateless-test",
"dependent": [ "dependent": [
"docker/test/stateful", "docker/test/stateful",
"docker/test/coverage" "docker/test/coverage",
"docker/test/unit"
] ]
}, },
"docker/test/stateless_pytest": { "docker/test/stateless_pytest": {
@ -134,7 +135,9 @@
"name": "yandex/clickhouse-test-base", "name": "yandex/clickhouse-test-base",
"dependent": [ "dependent": [
"docker/test/stateless", "docker/test/stateless",
"docker/test/stateless_pytest" "docker/test/stateless_unbundled",
"docker/test/stateless_pytest",
"docker/test/integration/base"
] ]
}, },
"docker/packager/unbundled": { "docker/packager/unbundled": {
@ -151,5 +154,9 @@
"docker/test/integration/kerberized_hadoop": { "docker/test/integration/kerberized_hadoop": {
"name": "yandex/clickhouse-kerberized-hadoop", "name": "yandex/clickhouse-kerberized-hadoop",
"dependent": [] "dependent": []
},
"docker/test/sqlancer": {
"name": "yandex/clickhouse-sqlancer-test",
"dependent": []
} }
} }

View File

@ -82,7 +82,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& rm -rf cctools-port && rm -rf cctools-port
# Download toolchain for Darwin # Download toolchain for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
# Download toolchain for ARM # Download toolchain for ARM
# It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling. # It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.

View File

@ -3,7 +3,7 @@
set -x -e set -x -e
mkdir -p build/cmake/toolchain/darwin-x86_64 mkdir -p build/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
mkdir -p build/cmake/toolchain/linux-aarch64 mkdir -p build/cmake/toolchain/linux-aarch64
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1 tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1

View File

@ -4,5 +4,5 @@ alpine-root/install/*
# docs (looks useless) # docs (looks useless)
alpine-root/usr/share/doc/* alpine-root/usr/share/doc/*
# packages, etc. (used by prepare.sh) # packages, etc. (used by alpine-build.sh)
alpine-root/tgz-packages/* tgz-packages/*

View File

@ -1 +1,2 @@
alpine-root/* alpine-root/*
tgz-packages/*

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04 FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.13.1.* ARG version=21.1.0
ARG gosu_ver=1.10 ARG gosu_ver=1.10
RUN apt-get update \ RUN apt-get update \

View File

@ -16,7 +16,7 @@ RUN addgroup clickhouse \
&& chown root:clickhouse /var/log/clickhouse-server \ && chown root:clickhouse /var/log/clickhouse-server \
&& chmod 775 /var/log/clickhouse-server \ && chmod 775 /var/log/clickhouse-server \
&& chmod +x /entrypoint.sh \ && chmod +x /entrypoint.sh \
&& apk add --no-cache su-exec && apk add --no-cache su-exec bash
EXPOSE 9000 8123 9009 EXPOSE 9000 8123 9009

View File

@ -4,6 +4,7 @@ set -x
REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}" REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
VERSION="${VERSION:-20.9.3.45}" VERSION="${VERSION:-20.9.3.45}"
DOCKER_IMAGE="${DOCKER_IMAGE:-yandex/clickhouse-server}"
# where original files live # where original files live
DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}" DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
@ -11,12 +12,12 @@ DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
# we will create root for our image here # we will create root for our image here
CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root" CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"
# where to put downloaded tgz # clean up the root from old runs, it's reconstructed each time
TGZ_PACKAGES_FOLDER="${CONTAINER_ROOT_FOLDER}/tgz-packages"
# clean up the root from old runs
rm -rf "$CONTAINER_ROOT_FOLDER" rm -rf "$CONTAINER_ROOT_FOLDER"
mkdir -p "$CONTAINER_ROOT_FOLDER"
# where to put downloaded tgz
TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
mkdir -p "$TGZ_PACKAGES_FOLDER" mkdir -p "$TGZ_PACKAGES_FOLDER"
PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" ) PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
@ -24,7 +25,7 @@ PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
# download tars from the repo # download tars from the repo
for package in "${PACKAGES[@]}" for package in "${PACKAGES[@]}"
do do
wget -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
done done
# unpack tars # unpack tars
@ -42,7 +43,7 @@ mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
"${CONTAINER_ROOT_FOLDER}/lib64" "${CONTAINER_ROOT_FOLDER}/lib64"
cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/" cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh" cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
## get glibc components from ubuntu 20.04 and put them to expected place ## get glibc components from ubuntu 20.04 and put them to expected place
docker pull ubuntu:20.04 docker pull ubuntu:20.04
@ -56,4 +57,5 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAIN
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
rm -rf "$CONTAINER_ROOT_FOLDER"

View File

@ -1,152 +0,0 @@
#!/bin/sh
#set -x
DO_CHOWN=1
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
DO_CHOWN=0
fi
CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
# support --user
if [ "$(id -u)" = "0" ]; then
USER=$CLICKHOUSE_UID
GROUP=$CLICKHOUSE_GID
# busybox has setuidgid & chpst buildin
gosu="su-exec $USER:$GROUP"
else
USER="$(id -u)"
GROUP="$(id -g)"
gosu=""
DO_CHOWN=0
fi
# set some vars
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)"
# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)"
LOG_DIR="$(dirname "${LOG_PATH}" || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)"
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
for dir in "$DATA_DIR" \
"$ERROR_LOG_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$USER_PATH" \
"$FORMAT_SCHEMA_PATH"
do
# check if variable not empty
[ -z "$dir" ] && continue
# ensure directories exist
if ! mkdir -p "$dir"; then
echo "Couldn't create necessary directory: $dir"
exit 1
fi
if [ "$DO_CHOWN" = "1" ]; then
# ensure proper directories permissions
chown -R "$USER:$GROUP" "$dir"
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
exit 1
fi
done
# if clickhouse user is defined - create it (user "default" already exists out of box)
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
<yandex>
<!-- Docs: <https://clickhouse.tech/docs/en/operations/settings/settings_users/> -->
<users>
<!-- Remove default user -->
<default remove="remove">
</default>
<${CLICKHOUSE_USER}>
<profile>default</profile>
<networks>
<ip>::/0</ip>
</networks>
<password>${CLICKHOUSE_PASSWORD}</password>
<quota>default</quota>
</${CLICKHOUSE_USER}>
</users>
</yandex>
EOT
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 &
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 6 retries, with 1 sec timeout and 1 sec delay between retries)
tries=6
while ! wget --spider -T 1 -q "http://localhost:$HTTP_PORT/ping" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
if [ -n "$CLICKHOUSE_PASSWORD" ]; then
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
fi
clickhouseclient="clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD "
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"$clickhouseclient" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo "$0: running $f"
"$f"
else
echo "$0: sourcing $f"
. "$f"
fi
;;
*.sql) echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
echo
done
if ! kill -s TERM "$pid" || ! wait "$pid"; then
echo >&2 'Finishing of ClickHouse init process failed.'
exit 1
fi
fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@"
fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
exec "$@"

71
docker/server/entrypoint.sh Normal file → Executable file
View File

@ -1,7 +1,10 @@
#!/bin/bash #!/bin/bash
set -eo pipefail
shopt -s nullglob
DO_CHOWN=1 DO_CHOWN=1
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
DO_CHOWN=0 DO_CHOWN=0
fi fi
@ -9,10 +12,17 @@ CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}" CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
# support --user # support --user
if [ x"$UID" == x0 ]; then if [ "$(id -u)" = "0" ]; then
USER=$CLICKHOUSE_UID USER=$CLICKHOUSE_UID
GROUP=$CLICKHOUSE_GID GROUP=$CLICKHOUSE_GID
gosu="gosu $USER:$GROUP" if command -v gosu &> /dev/null; then
gosu="gosu $USER:$GROUP"
elif command -v su-exec &> /dev/null; then
gosu="su-exec $USER:$GROUP"
else
echo "No gosu/su-exec detected!"
exit 1
fi
else else
USER="$(id -u)" USER="$(id -u)"
GROUP="$(id -g)" GROUP="$(id -g)"
@ -23,18 +33,23 @@ fi
# set some vars # set some vars
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}" CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then
echo "Configuration file '$dir' isn't readable by user with id '$USER'"
exit 1
fi
# port is needed to check if clickhouse-server is ready for connections # port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)" HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
# get CH directories locations # get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)" DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)" TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)" USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)" LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
LOG_DIR="$(dirname $LOG_PATH || true)" LOG_DIR="$(dirname "$LOG_PATH" || true)"
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)" ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)" ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)" FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
@ -58,8 +73,8 @@ do
if [ "$DO_CHOWN" = "1" ]; then if [ "$DO_CHOWN" = "1" ]; then
# ensure proper directories permissions # ensure proper directories permissions
chown -R "$USER:$GROUP" "$dir" chown -R "$USER:$GROUP" "$dir"
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
echo "Necessary directory '$dir' isn't owned by user with id '$USER'" echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
exit 1 exit 1
fi fi
done done
@ -90,21 +105,22 @@ fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# Listen only on localhost until the initialization is done # Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 & $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!" pid="$!"
# check if clickhouse is ready to accept connections # check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec delay) # will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries)
if ! wget --spider --quiet --prefer-family=IPv6 --tries="${CLICKHOUSE_INIT_TIMEOUT:-12}" --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then tries=${CLICKHOUSE_INIT_TIMEOUT:-12}
echo >&2 'ClickHouse init process failed.' while ! wget --spider -T 1 -q "http://127.0.0.1:$HTTP_PORT/ping" 2>/dev/null; do
exit 1 if [ "$tries" -le "0" ]; then
fi echo >&2 'ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
fi
clickhouseclient=( clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD )
echo echo
@ -122,10 +138,11 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
"$f" "$f"
else else
echo "$0: sourcing $f" echo "$0: sourcing $f"
# shellcheck source=/dev/null
. "$f" . "$f"
fi fi
;; ;;
*.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;; *.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;; *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
*) echo "$0: ignoring $f" ;; *) echo "$0: ignoring $f" ;;
esac esac
@ -140,7 +157,7 @@ fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments # if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@" exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@"
fi fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image # Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.13.1.* ARG version=21.1.0
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \ apt-get install -y apt-transport-https dirmngr && \

View File

@ -329,6 +329,7 @@ function run_tests
# nc - command not found # nc - command not found
01601_proxy_protocol 01601_proxy_protocol
01622_defaults_for_url_engine
) )
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"

View File

@ -30,3 +30,4 @@ RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-od
ENV TZ=Europe/Moscow ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -0,0 +1,13 @@
# docker build -t yandex/clickhouse-sqlancer-test .
FROM ubuntu:20.04
RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven --yes --no-install-recommends
RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
RUN mkdir /sqlancer && \
cd /sqlancer && \
unzip /sqlancer.zip
RUN cd /sqlancer/sqlancer-master && mvn package -DskipTests
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

15
docker/test/sqlancer/run.sh Executable file
View File

@ -0,0 +1,15 @@
#!/bin/bash
set -e -x
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
service clickhouse-server start && sleep 5
cd /sqlancer/sqlancer-master
CLICKHOUSE_AVAILABLE=true mvn -Dtest=TestClickHouse test
cp /sqlancer/sqlancer-master/target/surefire-reports/TEST-sqlancer.dbms.TestClickHouse.xml /test_output/result.xml

View File

@ -66,3 +66,6 @@ function run_tests()
export -f run_tests export -f run_tests
timeout "$MAX_RUN_TIME" bash -c run_tests ||: timeout "$MAX_RUN_TIME" bash -c run_tests ||:
tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:

View File

@ -86,3 +86,4 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
COPY run.sh / COPY run.sh /
CMD ["/bin/bash", "/run.sh"] CMD ["/bin/bash", "/run.sh"]

View File

@ -7,3 +7,4 @@ RUN apt-get install gdb
CMD service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; \ CMD service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; \
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt

View File

@ -42,9 +42,9 @@ Also, we need to download macOS X SDK into the working tree.
``` bash ``` bash
cd ClickHouse cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz' wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64 mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1 tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
``` ```
## Build ClickHouse {#build-clickhouse} ## Build ClickHouse {#build-clickhouse}

View File

@ -99,6 +99,8 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes). - `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
- `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage). - `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage).
- `max_parts_in_total` — Maximum number of parts in all partitions. - `max_parts_in_total` — Maximum number of parts in all partitions.
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
**Example of Sections Setting** **Example of Sections Setting**

View File

@ -25,10 +25,27 @@ The Distributed engine accepts parameters:
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
Also it accept the following settings:
- `fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk.
- `fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to asynchronous inserts on Distributed table (after insert, after sending the data to shard, etc).
!!! note "Note"
**Durability settings** (`fsync_...`):
- Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards.
- May significantly decrease the inserts' performance
- Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings`
Example: Example:
``` sql ``` sql
Distributed(logs, default, hits[, sharding_key[, policy_name]]) Distributed(logs, default, hits[, sharding_key[, policy_name]])
SETTINGS
fsync_after_insert=0,
fsync_directories=0;
``` ```
Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster. Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.

View File

@ -13,6 +13,7 @@ toc_title: Client Libraries
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client) - [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient) - [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP - PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -844,23 +844,27 @@ Higher values will lead to higher memory usage.
## max_compress_block_size {#max-compress-block-size} ## max_compress_block_size {#max-compress-block-size}
The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isnt any reason to change this setting. The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
!!! note "Warning"
This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse.
Dont confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table). Dont confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table).
## min_compress_block_size {#min-compress-block-size} ## min_compress_block_size {#min-compress-block-size}
For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)" tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least min_compress_block_size. By default, 65,536. For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least `min_compress_block_size`. By default, 65,536.
The actual size of the block, if the uncompressed data is less than max_compress_block_size, is no less than this value and no less than the volume of data for one mark. The actual size of the block, if the uncompressed data is less than `max_compress_block_size`, is no less than this value and no less than the volume of data for one mark.
Lets look at an example. Assume that index_granularity was set to 8192 during table creation. Lets look at an example. Assume that `index_granularity` was set to 8192 during table creation.
We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks. We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks.
We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data wont be decompressed. We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data wont be decompressed.
There usually isnt any reason to change this setting. !!! note "Warning"
This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse.
## max_query_size {#settings-max_query_size} ## max_query_size {#settings-max_query_size}
@ -2470,6 +2474,45 @@ Possible values:
Default value: `0`. Default value: `0`.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 0.
**Example**
Consider the following query with aggregate functions:
```sql
SELECT
SUM(-1),
MAX(0)
FROM system.one
WHERE 0
```
With `aggregate_functions_null_for_empty = 0` it would produce:
```text
┌─SUM(-1)─┬─MAX(0)─┐
│ 0 │ 0 │
└─────────┴────────┘
```
With `aggregate_functions_null_for_empty = 1` the result would be:
```text
┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
│ NULL │ NULL │
└───────────────┴──────────────┘
```
## union_default_mode {#union-default-mode} ## union_default_mode {#union-default-mode}
Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`. Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`.
@ -2484,6 +2527,7 @@ Default value: `''`.
See examples in [UNION](../../sql-reference/statements/select/union.md). See examples in [UNION](../../sql-reference/statements/select/union.md).
## data_type_default_nullable {#data_type_default_nullable} ## data_type_default_nullable {#data_type_default_nullable}
Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
@ -2495,6 +2539,7 @@ Possible values:
Default value: `0`. Default value: `0`.
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold} ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
Enables special logic to perform merges on replicas. Enables special logic to perform merges on replicas.

View File

@ -0,0 +1,67 @@
# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster.
Columns:
- `entry` ([String](../../sql-reference/data-types/string.md)) - Query id.
- `host_name` ([String](../../sql-reference/data-types/string.md)) - Hostname.
- `host_address` ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port.
- `status` ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query.
- `cluster` ([String](../../sql-reference/data-types/string.md)) - Cluster name.
- `query` ([String](../../sql-reference/data-types/string.md)) - Query executed.
- `initiator` ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query.
- `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time.
- `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds.
- `exception_code` ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper.
**Example**
``` sql
SELECT *
FROM system.distributed_ddl_queue
WHERE cluster = 'test_cluster'
LIMIT 2
FORMAT Vertical
Query id: f544e72a-6641-43f1-836b-24baa1c9632a
Row 1:
──────
entry: query-0000000000
host_name: clickhouse01
host_address: 172.23.0.11
port: 9000
status: Finished
cluster: test_cluster
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
initiator: clickhouse01:9000
query_start_time: 2020-12-30 13:07:51
query_finish_time: 2020-12-30 13:07:51
query_duration_ms: 6
exception_code: ZOK
Row 2:
──────
entry: query-0000000000
host_name: clickhouse02
host_address: 172.23.0.12
port: 9000
status: Finished
cluster: test_cluster
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
initiator: clickhouse01:9000
query_start_time: 2020-12-30 13:07:51
query_finish_time: 2020-12-30 13:07:51
query_duration_ms: 6
exception_code: ZOK
2 rows in set. Elapsed: 0.025 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->

View File

@ -20,7 +20,33 @@ System tables:
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
Unlike other system tables, the system tables [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), crash_log and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are:
- `database`: database the system log table belongs to. This option is deprecated now. All system log tables are under database `system`.
- `table`: table to insert data.
- `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression.
- `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression.
- `flush_interval_milliseconds`: interval of flushing data to disk.
- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit.
An example:
```
<yandex>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
<!--
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
```
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables. By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.

View File

@ -11,6 +11,7 @@ This table contains the following columns (the column type is shown in brackets)
- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. - `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`.
- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). - `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md).
- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. - `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication.
- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting).
Example: Example:
@ -21,11 +22,11 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree')
``` ```
``` text ``` text
┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┐ ┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┬─supports_parallel_insert─
Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ 1 │
MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │
│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ │ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │
└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘ └───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┴──────────────────────────
``` ```
**See also** **See also**

View File

@ -55,10 +55,10 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con
When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md):
- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated.
- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query. - For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`.
- Dictionaries from other sources are updated every time by default. - Dictionaries from other sources are updated every time by default.
For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: For other sources (ODBC, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
- The dictionary table must have a field that always changes when the source data is updated. - The dictionary table must have a field that always changes when the source data is updated.
- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `<invalidate_query>` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md). - The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `<invalidate_query>` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md).

View File

@ -583,7 +583,7 @@ Example of settings:
or or
``` sql ``` sql
SOURCE(MONGO( SOURCE(MONGODB(
host 'localhost' host 'localhost'
port 27017 port 27017
user '' user ''

View File

@ -1290,22 +1290,65 @@ Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference
## arrayMin(\[func,\] arr1, …) {#array-min} ## arrayMin(\[func,\] arr1, …) {#array-min}
Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements. Returns the min of the `func` values. If the function is omitted, it just returns the min of the array elements.
Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
Examples:
```sql
SELECT arrayMin([1, 2, 4]) AS res
┌─res─┐
│ 1 │
└─────┘
SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res
┌─res─┐
│ -4 │
└─────┘
```
## arrayMax(\[func,\] arr1, …) {#array-max} ## arrayMax(\[func,\] arr1, …) {#array-max}
Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements. Returns the max of the `func` values. If the function is omitted, it just returns the max of the array elements.
Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
Examples:
```sql
SELECT arrayMax([1, 2, 4]) AS res
┌─res─┐
│ 4 │
└─────┘
SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res
┌─res─┐
│ -1 │
└─────┘
```
## arraySum(\[func,\] arr1, …) {#array-sum} ## arraySum(\[func,\] arr1, …) {#array-sum}
Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements. Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements.
Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
Examples:
```sql
SELECT arraySum([2,3]) AS res
┌─res─┐
│ 5 │
└─────┘
SELECT arraySum(x -> x*x, [2, 3]) AS res
┌─res─┐
│ 13 │
└─────┘
```
## arrayAvg(\[func,\] arr1, …) {#array-avg} ## arrayAvg(\[func,\] arr1, …) {#array-avg}
Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements. Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements.

View File

@ -23,6 +23,7 @@ The following actions are supported:
- [CLEAR COLUMN](#alter_clear-column) — Resets column values. - [CLEAR COLUMN](#alter_clear-column) — Resets column values.
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column. - [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
- [MODIFY COLUMN](#alter_modify-column) — Changes columns type, default expression and TTL. - [MODIFY COLUMN](#alter_modify-column) — Changes columns type, default expression and TTL.
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
These actions are described in detail below. These actions are described in detail below.
@ -145,6 +146,26 @@ The `ALTER` query is atomic. For MergeTree tables it is also lock-free.
The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously. The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously.
## MODIFY COLUMN REMOVE {#modify-remove}
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
Syntax:
```sql
ALTER TABLE table_name MODIFY column_name REMOVE property;
```
**Example**
```sql
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
```
## See Also
- [REMOVE TTL](ttl.md).
## Limitations {#alter-query-limitations} ## Limitations {#alter-query-limitations}
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot. The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.

View File

@ -286,7 +286,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:
- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. - As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`.
- As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. - As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. - Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. - In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.

View File

@ -3,10 +3,83 @@ toc_priority: 44
toc_title: TTL toc_title: TTL
--- ---
### Manipulations with Table TTL {#manipulations-with-table-ttl} # Manipulations with Table TTL {#manipulations-with-table-ttl}
## MODIFY TTL {#modify-ttl}
You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form: You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:
``` sql ``` sql
ALTER TABLE table-name MODIFY TTL ttl-expression ALTER TABLE table_name MODIFY TTL ttl_expression;
``` ```
## REMOVE TTL {#remove-ttl}
TTL-property can be removed from table with the following query:
```sql
ALTER TABLE table_name REMOVE TTL
```
**Example**
Consider the table with table `TTL`:
```sql
CREATE TABLE table_with_ttl
(
event_time DateTime,
UserID UInt64,
Comment String
)
ENGINE MergeTree()
ORDER BY tuple()
TTL event_time + INTERVAL 3 MONTH;
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
```
Run `OPTIMIZE` to force `TTL` cleanup:
```sql
OPTIMIZE TABLE table_with_ttl FINAL;
SELECT * FROM table_with_ttl FORMAT PrettyCompact;
```
Second row was deleted from table.
```text
┌─────────event_time────┬──UserID─┬─────Comment──┐
│ 2020-12-11 12:44:57 │ 1 │ username1 │
└───────────────────────┴─────────┴──────────────┘
```
Now remove table `TTL` with the following query:
```sql
ALTER TABLE table_with_ttl REMOVE TTL;
```
Re-insert the deleted row and force the `TTL` cleanup again with `OPTIMIZE`:
```sql
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
OPTIMIZE TABLE table_with_ttl FINAL;
SELECT * FROM table_with_ttl FORMAT PrettyCompact;
```
The `TTL` is no longer there, so the second row is not deleted:
```text
┌─────────event_time────┬──UserID─┬─────Comment──┐
│ 2020-12-11 12:44:57 │ 1 │ username1 │
│ 2020-08-11 12:44:57 │ 2 │ username2 │
└───────────────────────┴─────────┴──────────────┘
```
### See Also
- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression).
- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column).

View File

@ -13,9 +13,7 @@ Basic query format:
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
``` ```
You can specify a list of columns to insert using the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax. You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax.
For example, consider the table: For example, consider the table:
@ -23,9 +21,8 @@ For example, consider the table:
SHOW CREATE insert_select_testtable; SHOW CREATE insert_select_testtable;
``` ```
``` ```text
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ CREATE TABLE insert_select_testtable
│ CREATE TABLE insert_select_testtable
( (
`a` Int8, `a` Int8,
`b` String, `b` String,
@ -33,8 +30,7 @@ SHOW CREATE insert_select_testtable;
) )
ENGINE = MergeTree() ENGINE = MergeTree()
ORDER BY a ORDER BY a
SETTINGS index_granularity = 8192 │ SETTINGS index_granularity = 8192
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
``` ```
``` sql ``` sql

View File

@ -0,0 +1,21 @@
---
toc_title: ALL
---
# ALL Clause {#select-all}
`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
- If `ALL` specified, ignore it.
- If both `ALL` and `DISTINCT` specified, exception will be thrown.
`ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
```sql
SELECT sum(ALL number) FROM numbers(10);
```
equals to
```sql
SELECT sum(number) FROM numbers(10);
```

View File

@ -18,10 +18,6 @@ It is possible to obtain the same result by applying [GROUP BY](../../../sql-ref
- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read. - When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read.
- Data blocks are output as they are processed, without waiting for the entire query to finish running. - Data blocks are output as they are processed, without waiting for the entire query to finish running.
## Limitations {#limitations}
`DISTINCT` is not supported if `SELECT` has at least one array column.
## Examples {#examples} ## Examples {#examples}
ClickHouse supports using the `DISTINCT` and `ORDER BY` clauses for different columns in one query. The `DISTINCT` clause is executed before the `ORDER BY` clause. ClickHouse supports using the `DISTINCT` and `ORDER BY` clauses for different columns in one query. The `DISTINCT` clause is executed before the `ORDER BY` clause.

View File

@ -44,9 +44,9 @@ Además, necesitamos descargar macOS X SDK en el árbol de trabajo.
``` bash ``` bash
cd ClickHouse cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz' wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64 mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1 tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
``` ```
# Construir ClickHouse {#build-clickhouse} # Construir ClickHouse {#build-clickhouse}

View File

@ -13,6 +13,7 @@ toc_title: Client Libraries
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client) - [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient) - [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP - PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -44,9 +44,9 @@ En outre, nous devons télécharger macOS X SDK dans l'arbre de travail.
``` bash ``` bash
cd ClickHouse cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz' wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64 mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1 tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
``` ```
# Construire ClickHouse {#build-clickhouse} # Construire ClickHouse {#build-clickhouse}

View File

@ -15,6 +15,7 @@ toc_title: "Biblioth\xE8ques Clientes"
- [clickhouse-chauffeur](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-chauffeur](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client) - [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient) - [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP - PHP
- [smi2 / phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [smi2 / phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov / clickhouse-PHP-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - [8bitov / clickhouse-PHP-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -45,9 +45,9 @@ make install
``` bash ``` bash
cd ClickHouse cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz' wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64 mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1 tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
``` ```
# ビルドClickHouse {#build-clickhouse} # ビルドClickHouse {#build-clickhouse}

View File

@ -15,6 +15,7 @@ toc_title: "\u30AF\u30E9\u30A4\u30A2\u30F3\u30C8"
- [clickhouse-ドライバ](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-ドライバ](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-クライアント](https://github.com/yurial/clickhouse-client) - [clickhouse-クライアント](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient) - [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP - PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-クライアント](https://packagist.org/packages/8bitov/clickhouse-php-client) - [8bitov/clickhouse-php-クライアント](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -133,7 +133,7 @@ ClickHouse имеет сильную типизацию, поэтому нет
## Агрегатные функции {#aggregate-functions} ## Агрегатные функции {#aggregate-functions}
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`). Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание. Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание.

View File

@ -77,17 +77,19 @@ ORDER BY expr
- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (необязательные): - `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (необязательные):
- `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage). - `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
- `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage). - `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
- `min_index_granularity_bytes` — минимально допустимый размер гранул данных в байтах. Значение по умолчанию — 1024b. Для обеспечения защиты от случайного создания таблиц с очень низким значением `index_granularity_bytes`. Смотрите [Хранение данных](#mergetree-data-storage). - `min_index_granularity_bytes` — минимально допустимый размер гранул данных в байтах. Значение по умолчанию — 1024b. Для обеспечения защиты от случайного создания таблиц с очень низким значением `index_granularity_bytes`. Смотрите [Хранение данных](#mergetree-data-storage).
- `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`. - `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
- `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера". - `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов. - `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
- <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день). - <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
- `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её. - `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
- `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192. - `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
- `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes). - `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
- `min_bytes_for_wide_part`, `min_rows_for_wide_part` — минимальное количество байт/строк в куске данных для хранения в формате `Wide`. Можно задать одну или обе настройки или не задавать ни одной. Подробнее см. в разделе [Хранение данных](#mergetree-data-storage). - `min_bytes_for_wide_part`, `min_rows_for_wide_part` — минимальное количество байт/строк в куске данных для хранения в формате `Wide`. Можно задать одну или обе настройки или не задавать ни одной. Подробнее см. в разделе [Хранение данных](#mergetree-data-storage).
- `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
- `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
**Пример задания секций** **Пример задания секций**

View File

@ -13,6 +13,7 @@ toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client) - [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient) - [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP - PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -811,23 +811,27 @@ log_query_threads=1
## max_compress_block_size {#max-compress-block-size} ## max_compress_block_size {#max-compress-block-size}
Максимальный размер блоков не сжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативки. Как правило, не имеет смысла менять эту настройку. Максимальный размер блоков несжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативной памяти.
!!! note "Предупреждение"
Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse.
Не путайте блоки для сжатия (кусок памяти, состоящий из байт) и блоки для обработки запроса (пачка строк из таблицы). Не путайте блоки для сжатия (кусок памяти, состоящий из байт) и блоки для обработки запроса (пачка строк из таблицы).
## min_compress_block_size {#min-compress-block-size} ## min_compress_block_size {#min-compress-block-size}
Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min_compress_block_size. По умолчанию - 65 536. Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше `min_compress_block_size`. По умолчанию - 65 536.
Реальный размер блока, если несжатых данных меньше max_compress_block_size, будет не меньше этого значения и не меньше объёма данных на одну засечку. Реальный размер блока, если несжатых данных меньше `max_compress_block_size`, будет не меньше этого значения и не меньше объёма данных на одну засечку.
Рассмотрим пример. Пусть index_granularity, указанная при создании таблицы - 8192. Рассмотрим пример. Пусть `index_granularity`, указанная при создании таблицы - 8192.
Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как min_compress_block_size = 65 536, сжатый блок будет сформирован на каждые две засечки. Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как `min_compress_block_size` = 65 536, сжатый блок будет сформирован на каждые две засечки.
Пусть мы записываем столбец URL типа String (средний размер - 60 байт на значение). При записи 8192 строк, будет, в среднем, чуть меньше 500 КБ данных. Так как это больше 65 536 строк, то сжатый блок будет сформирован на каждую засечку. В этом случае, при чтении с диска данных из диапазона в одну засечку, не будет разжато лишних данных. Пусть мы записываем столбец URL типа String (средний размер - 60 байт на значение). При записи 8192 строк, будет, в среднем, чуть меньше 500 КБ данных. Так как это больше 65 536 строк, то сжатый блок будет сформирован на каждую засечку. В этом случае, при чтении с диска данных из диапазона в одну засечку, не будет разжато лишних данных.
Как правило, не имеет смысла менять эту настройку. !!! note "Предупреждение"
Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse.
## max_query_size {#settings-max_query_size} ## max_query_size {#settings-max_query_size}
@ -2339,6 +2343,45 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow;
Значение по умолчанию: `0`. Значение по умолчанию: `0`.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Включает или отключает перезапись всех агрегатных функций в запросе, с добавлением к ним суффикса [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull). Включите для совместимости со стандартом SQL.
Реализуется с помощью перезаписи запросов (аналогично настройке [count_distinct_implementation](#settings-count_distinct_implementation)), чтобы получить согласованные результаты для распределенных запросов.
Возможные значения:
- 0 — выключена.
- 1 — включена.
Значение по умолчанию: 0.
**Пример**
Рассмотрим запрос с агрегирующими функциями:
```sql
SELECT
SUM(-1),
MAX(0)
FROM system.one
WHERE 0
```
Результат запроса с настройкой `aggregate_functions_null_for_empty = 0`:
```text
┌─SUM(-1)─┬─MAX(0)─┐
│ 0 │ 0 │
└─────────┴────────┘
```
Результат запроса с настройкой `aggregate_functions_null_for_empty = 1`:
```text
┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
│ NULL │ NULL │
└───────────────┴──────────────┘
```
## union_default_mode {#union-default-mode} ## union_default_mode {#union-default-mode}
Устанавливает режим объединения результатов `SELECT` запросов. Настройка используется только при совместном использовании с [UNION](../../sql-reference/statements/select/union.md) без явного указания `UNION ALL` или `UNION DISTINCT`. Устанавливает режим объединения результатов `SELECT` запросов. Настройка используется только при совместном использовании с [UNION](../../sql-reference/statements/select/union.md) без явного указания `UNION ALL` или `UNION DISTINCT`.
@ -2353,6 +2396,7 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow;
Смотрите примеры в разделе [UNION](../../sql-reference/statements/select/union.md). Смотрите примеры в разделе [UNION](../../sql-reference/statements/select/union.md).
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold} ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
Включает особую логику выполнения слияний на репликах. Включает особую логику выполнения слияний на репликах.

View File

@ -8,7 +8,7 @@
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — значение метрики. - `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — значение метрики.
- `description` ([String](../../sql-reference/data-types/string.md)) — описание метрики. - `description` ([String](../../sql-reference/data-types/string.md)) — описание метрики.
Список поддержанных метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp). Список поддерживаемых метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
**Пример** **Пример**

View File

@ -54,10 +54,10 @@ LIFETIME(MIN 300 MAX 360)
При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md):
> - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется. > - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется.
> - Для таблиц типа MyISAM, время модификации проверяется запросом `SHOW TABLE STATUS`. > - Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`.
> - Словари из других источников по умолчанию обновляются каждый раз. > - Словари из других источников по умолчанию обновляются каждый раз.
Для источников MySQL (InnoDB), ODBC и ClickHouse можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия: Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
> - В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике. > - В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике.
> - В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле `<invalidate_query>` настроек [источника](external-dicts-dict-sources.md). > - В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле `<invalidate_query>` настроек [источника](external-dicts-dict-sources.md).

View File

@ -12,6 +12,7 @@ toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u
- [CLEAR COLUMN](#alter_clear-column) — сбрасывает все значения в столбце для заданной партиции; - [CLEAR COLUMN](#alter_clear-column) — сбрасывает все значения в столбце для заданной партиции;
- [COMMENT COLUMN](#alter_comment-column) — добавляет комментарий к столбцу; - [COMMENT COLUMN](#alter_comment-column) — добавляет комментарий к столбцу;
- [MODIFY COLUMN](#alter_modify-column) — изменяет тип столбца, выражение для значения по умолчанию и TTL. - [MODIFY COLUMN](#alter_modify-column) — изменяет тип столбца, выражение для значения по умолчанию и TTL.
- [MODIFY COLUMN REMOVE](#modify-remove) — удаляет какое-либо из свойств столбца.
Подробное описание для каждого действия приведено ниже. Подробное описание для каждого действия приведено ниже.
@ -135,6 +136,28 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
Запрос `ALTER` на изменение столбцов реплицируется. Соответствующие инструкции сохраняются в ZooKeeper, и затем каждая реплика их применяет. Все запросы `ALTER` выполняются в одном и том же порядке. Запрос ждёт выполнения соответствующих действий на всех репликах. Но при этом, запрос на изменение столбцов в реплицируемой таблице можно прервать, и все действия будут осуществлены асинхронно. Запрос `ALTER` на изменение столбцов реплицируется. Соответствующие инструкции сохраняются в ZooKeeper, и затем каждая реплика их применяет. Все запросы `ALTER` выполняются в одном и том же порядке. Запрос ждёт выполнения соответствующих действий на всех репликах. Но при этом, запрос на изменение столбцов в реплицируемой таблице можно прервать, и все действия будут осуществлены асинхронно.
## MODIFY COLUMN REMOVE {#modify-remove}
Удаляет какое-либо из свойств столбца: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
Синтаксис:
```sql
ALTER TABLE table_name MODIFY column_name REMOVE property;
```
**Пример**
Удаление свойства TTL:
```sql
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
```
## Смотрите также
- [REMOVE TTL](ttl.md).
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter} ## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}
Запрос `ALTER` позволяет создавать и удалять отдельные элементы (столбцы) вложенных структур данных, но не вложенные структуры данных целиком. Для добавления вложенной структуры данных, вы можете добавить столбцы с именем вида `name.nested_name` и типом `Array(T)` - вложенная структура данных полностью эквивалентна нескольким столбцам-массивам с именем, имеющим одинаковый префикс до точки. Запрос `ALTER` позволяет создавать и удалять отдельные элементы (столбцы) вложенных структур данных, но не вложенные структуры данных целиком. Для добавления вложенной структуры данных, вы можете добавить столбцы с именем вида `name.nested_name` и типом `Array(T)` - вложенная структура данных полностью эквивалентна нескольким столбцам-массивам с именем, имеющим одинаковый префикс до точки.

View File

@ -288,7 +288,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать: Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать:
- Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../../../operations/system-tables/parts.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`. - Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../../../operations/system-tables/parts.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`.
- Произвольное выражение из столбцов исходной таблицы. Также поддерживаются константы и константные выражения. Например, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. - Кортеж из выражений или констант, совпадающий (в типах) с кортежем партиционирования. В случае ключа партиционирования из одного элемента, выражение следует обернуть в функцию `tuple(...)`. Например, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
- Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`. - Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
- Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. - Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.

View File

@ -5,10 +5,82 @@ toc_title: TTL
# Манипуляции с TTL таблицы {#manipuliatsii-s-ttl-tablitsy} # Манипуляции с TTL таблицы {#manipuliatsii-s-ttl-tablitsy}
## MODIFY TTL {#modify-ttl}
Вы можете изменить [TTL для таблицы](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl) запросом следующего вида: Вы можете изменить [TTL для таблицы](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl) запросом следующего вида:
``` sql ``` sql
ALTER TABLE table-name MODIFY TTL ttl-expression ALTER TABLE table-name MODIFY TTL ttl-expression
``` ```
## REMOVE TTL {#remove-ttl}
Удалить табличный TTL можно запросом следующего вида:
```sql
ALTER TABLE table_name REMOVE TTL
```
**Пример**
Создадим таблицу с табличным `TTL` и заполним её данными:
```sql
CREATE TABLE table_with_ttl
(
event_time DateTime,
UserID UInt64,
Comment String
)
ENGINE MergeTree()
ORDER BY tuple()
TTL event_time + INTERVAL 3 MONTH;
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
```
Выполним `OPTIMIZE` для принудительной очистки по `TTL`:
```sql
OPTIMIZE TABLE table_with_ttl FINAL;
SELECT * FROM table_with_ttl;
```
В результате видно, что вторая строка удалена.
```text
┌─────────event_time────┬──UserID─┬─────Comment──┐
│ 2020-12-11 12:44:57 │ 1 │ username1 │
└───────────────────────┴─────────┴──────────────┘
```
Удаляем табличный `TTL`:
```sql
ALTER TABLE table_with_ttl REMOVE TTL;
```
Заново вставляем удаленную строку и снова принудительно запускаем очистку по `TTL` с помощью `OPTIMIZE`:
```sql
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
OPTIMIZE TABLE table_with_ttl FINAL;
SELECT * FROM table_with_ttl;
```
`TTL` больше нет, поэтому данные не удаляются:
```text
┌─────────event_time────┬──UserID─┬─────Comment──┐
│ 2020-12-11 12:44:57 │ 1 │ username1 │
│ 2020-08-11 12:44:57 │ 2 │ username2 │
└───────────────────────┴─────────┴──────────────┘
```
### Смотрите также
- Подробнее о [свойстве TTL](../../../engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-ttl).
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide--> [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->

View File

@ -13,9 +13,7 @@ toc_title: INSERT INTO
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
``` ```
Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`. Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`.
В качестве примера рассмотрим таблицу: В качестве примера рассмотрим таблицу:

View File

@ -18,10 +18,6 @@ toc_title: DISTINCT
- Когда секция [ORDER BY](order-by.md) опущена, а секция [LIMIT](limit.md) присутствует, запрос прекращает выполнение сразу после считывания необходимого количества различных строк. - Когда секция [ORDER BY](order-by.md) опущена, а секция [LIMIT](limit.md) присутствует, запрос прекращает выполнение сразу после считывания необходимого количества различных строк.
- Блоки данных выводятся по мере их обработки, не дожидаясь завершения выполнения всего запроса. - Блоки данных выводятся по мере их обработки, не дожидаясь завершения выполнения всего запроса.
## Ограничения {#limitations}
`DISTINCT` не поддерживается, если `SELECT` имеет по крайней мере один столбец-массив.
## Примеры {#examples} ## Примеры {#examples}
ClickHouse поддерживает использование секций `DISTINCT` и `ORDER BY` для разных столбцов в одном запросе. Секция `DISTINCT` выполняется до секции `ORDER BY`. ClickHouse поддерживает использование секций `DISTINCT` и `ORDER BY` для разных столбцов в одном запросе. Секция `DISTINCT` выполняется до секции `ORDER BY`.

View File

@ -33,8 +33,8 @@ cd cctools-port/cctools
make install make install
cd ${CCTOOLS} cd ${CCTOOLS}
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
tar xJf MacOSX10.14.sdk.tar.xz tar xJf MacOSX10.15.sdk.tar.xz
``` ```
# 编译 ClickHouse {#bian-yi-clickhouse} # 编译 ClickHouse {#bian-yi-clickhouse}
@ -46,7 +46,7 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_SYSTEM_NAME=Darwin \
-DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \ -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \
-DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \ -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \
-DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld \ -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld \
-DSDK_PATH=${CCTOOLS}/MacOSX10.14.sdk -DSDK_PATH=${CCTOOLS}/MacOSX10.15.sdk
ninja -C build-osx ninja -C build-osx
``` ```

View File

@ -13,6 +13,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client) - [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient) - [aiochclient](https://github.com/maximdanilchenko/aiochclient)
- [asynch](https://github.com/long2ice/asynch)
- PHP - PHP
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)

View File

@ -22,9 +22,35 @@ toc_title: "\u7CFB\u7EDF\u8868"
大多数系统表将数据存储在RAM中。 ClickHouse服务器在开始时创建此类系统表。 大多数系统表将数据存储在RAM中。 ClickHouse服务器在开始时创建此类系统表。
与其他系统表不同,系统表 [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) 由 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改则ClickHouse会重命名当前表并创建一个新表。 与其他系统表不同,系统日志表 [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log), [part_log](../../operations/system-tables/part_log.md#system.part_log), crash_log and text_log 默认采用[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改则ClickHouse会重命名当前表并创建一个新表。
默认情况下,表增长是无限的。 要控制表的大小,可以使用 [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。 用户可以通过在`/etc/clickhouse-server/config.d/`下创建与系统表同名的配置文件, 或者在`/etc/clickhouse-server/config.xml`中设置相应配置项,来自定义系统日志表的结构。可以自定义的配置项如下:
- `database`: 系统日志表所在的数据库。这个选项目前已经废弃。所有的系统日表都位于`system`库中。
- `table`: 系统日志表名。
- `partition_by`: 指定[PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md)表达式。
- `ttl`: 指定系统日志表TTL选项。
- `flush_interval_milliseconds`: 指定系统日志表数据落盘时间。
- `engine`: 指定完整的表引擎定义。(以`ENGINE = `开始)。 这个选项与`partition_by`以及`ttl`冲突。如果两者一起设置,服务启动时会抛出异常并且退出。
一个配置定义的例子如下:
```
<yandex>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
<!--
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
```
默认情况下,表增长是无限的。 要控制表的大小,可以使用 TTL 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。
## 系统指标的来源 {#system-tables-sources-of-system-metrics} ## 系统指标的来源 {#system-tables-sources-of-system-metrics}

View File

@ -29,7 +29,7 @@ SELECT 1 - 0.9
- 当一行行阅读浮点数的时候,浮点数的结果可能不是机器最近显示的数值。 - 当一行行阅读浮点数的时候,浮点数的结果可能不是机器最近显示的数值。
## 和Inf {#data_type-float-nan-inf} ## NaN和Inf {#data_type-float-nan-inf}
与标准SQL相比ClickHouse 支持以下类别的浮点数: 与标准SQL相比ClickHouse 支持以下类别的浮点数:

View File

@ -318,6 +318,10 @@ else ()
if (USE_GDB_ADD_INDEX) if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM) add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM)
endif() endif()
if (USE_BINARY_HASH)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM)
endif()
endif () endif ()
if (ENABLE_TESTS AND USE_GTEST) if (ENABLE_TESTS AND USE_GTEST)

View File

@ -801,7 +801,8 @@ private:
connection->setDefaultDatabase(connection_parameters.default_database); connection->setDefaultDatabase(connection_parameters.default_database);
ReadBufferFromFile in(queries_file); ReadBufferFromFile in(queries_file);
readStringUntilEOF(text, in); readStringUntilEOF(text, in);
processMultiQuery(text); if (!processMultiQuery(text))
break;
} }
return; return;
} }
@ -984,7 +985,8 @@ private:
if (query_fuzzer_runs) if (query_fuzzer_runs)
{ {
processWithFuzzing(full_query); if (!processWithFuzzing(full_query))
return false;
} }
else else
{ {
@ -1034,7 +1036,8 @@ private:
} }
void processWithFuzzing(const String & text) /// Returns false when server is not available.
bool processWithFuzzing(const String & text)
{ {
ASTPtr orig_ast; ASTPtr orig_ast;
@ -1052,7 +1055,7 @@ private:
if (!orig_ast) if (!orig_ast)
{ {
// Can't continue after a parsing error // Can't continue after a parsing error
return; return true;
} }
// Don't repeat inserts, the tables grow too big. Also don't repeat // Don't repeat inserts, the tables grow too big. Also don't repeat
@ -1147,7 +1150,7 @@ private:
// Probably the server is dead because we found an assertion // Probably the server is dead because we found an assertion
// failure. Fail fast. // failure. Fail fast.
fmt::print(stderr, "Lost connection to the server\n"); fmt::print(stderr, "Lost connection to the server\n");
return; return false;
} }
// The server is still alive so we're going to continue fuzzing. // The server is still alive so we're going to continue fuzzing.
@ -1173,6 +1176,8 @@ private:
fuzz_base = ast_to_process; fuzz_base = ast_to_process;
} }
} }
return true;
} }
void processTextAsSingleQuery(const String & text_) void processTextAsSingleQuery(const String & text_)

View File

@ -273,11 +273,12 @@ try
global_context->setCurrentDatabase(default_database); global_context->setCurrentDatabase(default_database);
applyCmdOptions(*global_context); applyCmdOptions(*global_context);
String path = global_context->getPath(); if (config().has("path"))
if (!path.empty())
{ {
String path = global_context->getPath();
/// Lock path directory before read /// Lock path directory before read
status.emplace(global_context->getPath() + "status", StatusFile::write_full_info); status.emplace(path + "status", StatusFile::write_full_info);
LOG_DEBUG(log, "Loading metadata from {}", path); LOG_DEBUG(log, "Loading metadata from {}", path);
Poco::File(path + "data/").createDirectories(); Poco::File(path + "data/").createDirectories();
@ -288,7 +289,7 @@ try
DatabaseCatalog::instance().loadDatabases(); DatabaseCatalog::instance().loadDatabases();
LOG_DEBUG(log, "Loaded metadata."); LOG_DEBUG(log, "Loaded metadata.");
} }
else else if (!config().has("no-system-tables"))
{ {
attachSystemTables(*global_context); attachSystemTables(*global_context);
} }
@ -540,6 +541,7 @@ void LocalServer::init(int argc, char ** argv)
("logger.log", po::value<std::string>(), "Log file name") ("logger.log", po::value<std::string>(), "Log file name")
("logger.level", po::value<std::string>(), "Log level") ("logger.level", po::value<std::string>(), "Log level")
("ignore-error", "do not stop processing if a query failed") ("ignore-error", "do not stop processing if a query failed")
("no-system-tables", "do not attach system tables (better startup time)")
("version,V", "print version information and exit") ("version,V", "print version information and exit")
; ;
@ -602,6 +604,8 @@ void LocalServer::init(int argc, char ** argv)
config().setString("logger.level", options["logger.level"].as<std::string>()); config().setString("logger.level", options["logger.level"].as<std::string>());
if (options.count("ignore-error")) if (options.count("ignore-error"))
config().setBool("ignore-error", true); config().setBool("ignore-error", true);
if (options.count("no-system-tables"))
config().setBool("no-system-tables", true);
std::vector<std::string> arguments; std::vector<std::string> arguments;
for (int arg_num = 1; arg_num < argc; ++arg_num) for (int arg_num = 1; arg_num < argc; ++arg_num)

View File

@ -18,6 +18,7 @@
#endif #endif
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Common/getHashOfLoadedBinary.h>
#include <common/phdr_cache.h> #include <common/phdr_cache.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
@ -62,6 +63,14 @@ int mainEntryClickHouseStatus(int argc, char ** argv);
int mainEntryClickHouseRestart(int argc, char ** argv); int mainEntryClickHouseRestart(int argc, char ** argv);
#endif #endif
int mainEntryClickHouseHashBinary(int, char **)
{
/// Intentionally without newline. So you can run:
/// objcopy --add-section .note.ClickHouse.hash=<(./clickhouse hash-binary) clickhouse
std::cout << getHashOfLoadedBinaryHex();
return 0;
}
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
namespace namespace
@ -110,6 +119,7 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
{"status", mainEntryClickHouseStatus}, {"status", mainEntryClickHouseStatus},
{"restart", mainEntryClickHouseRestart}, {"restart", mainEntryClickHouseRestart},
#endif #endif
{"hash-binary", mainEntryClickHouseHashBinary},
}; };

View File

@ -65,6 +65,8 @@
#include <Server/TCPHandlerFactory.h> #include <Server/TCPHandlerFactory.h>
#include <Common/SensitiveDataMasker.h> #include <Common/SensitiveDataMasker.h>
#include <Common/ThreadFuzzer.h> #include <Common/ThreadFuzzer.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/Elf.h>
#include <Server/MySQLHandlerFactory.h> #include <Server/MySQLHandlerFactory.h>
#include <Server/PostgreSQLHandlerFactory.h> #include <Server/PostgreSQLHandlerFactory.h>
#include <Server/ProtocolServerAdapter.h> #include <Server/ProtocolServerAdapter.h>
@ -184,6 +186,7 @@ namespace ErrorCodes
extern const int FAILED_TO_GETPWUID; extern const int FAILED_TO_GETPWUID;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR; extern const int NETWORK_ERROR;
extern const int CORRUPTED_DATA;
} }
@ -436,7 +439,44 @@ int Server::main(const std::vector<std::string> & /*args*/)
#if defined(OS_LINUX) #if defined(OS_LINUX)
std::string executable_path = getExecutablePath(); std::string executable_path = getExecutablePath();
if (executable_path.empty())
if (!executable_path.empty())
{
/// Integrity check based on checksum of the executable code.
/// Note: it is not intended to protect from malicious party,
/// because the reference checksum can be easily modified as well.
/// And we don't involve asymmetric encryption with PKI yet.
/// It's only intended to protect from faulty hardware.
/// Note: it is only based on machine code.
/// But there are other sections of the binary (e.g. exception handling tables)
/// that are interpreted (not executed) but can alter the behaviour of the program as well.
String calculated_binary_hash = getHashOfLoadedBinaryHex();
if (stored_binary_hash.empty())
{
LOG_WARNING(log, "Calculated checksum of the binary: {}."
" There is no information about the reference checksum.", calculated_binary_hash);
}
else if (calculated_binary_hash == stored_binary_hash)
{
LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
}
else
{
throw Exception(ErrorCodes::CORRUPTED_DATA,
"Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file {2} was changed just after startup;"
" - the file {2} is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
" - the file {2} was intentionally modified;"
" - logical error in code."
, calculated_binary_hash, stored_binary_hash, executable_path);
}
}
else
executable_path = "/usr/bin/clickhouse"; /// It is used for information messages. executable_path = "/usr/bin/clickhouse"; /// It is used for information messages.
/// After full config loaded /// After full config loaded

View File

@ -676,7 +676,7 @@
<database>system</database> <database>system</database>
<table>query_log</table> <table>query_log</table>
<!-- <!--
PARTITION BY expr: https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/ PARTITION BY expr: https://clickhouse.yandex/docs/en/table_engines/mergetree-family/custom_partitioning_key/
Example: Example:
event_date event_date
toMonday(event_date) toMonday(event_date)

View File

@ -287,7 +287,7 @@
</div> </div>
<div id="run_div"> <div id="run_div">
<button class="shadow" id="run">Run</button> <button class="shadow" id="run">Run</button>
<span class="hint">&nbsp;(Ctrl+Enter)</span> <span class="hint">&nbsp;(Ctrl/Cmd+Enter)</span>
<span id="hourglass"></span> <span id="hourglass"></span>
<span id="check-mark"></span> <span id="check-mark"></span>
<span id="stats"></span> <span id="stats"></span>
@ -424,10 +424,10 @@
post(); post();
} }
document.onkeypress = function(event) document.onkeydown = function(event)
{ {
/// Firefox has code 13 for Enter and Chromium has code 10. /// Firefox has code 13 for Enter and Chromium has code 10.
if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) { if ((event.metaKey || event.ctrlKey) && (event.keyCode == 13 || event.keyCode == 10)) {
post(); post();
} }
} }

View File

@ -112,7 +112,6 @@ class GroupArrayNumericImpl final
{ {
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>; using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
static constexpr bool limit_num_elems = Trait::has_limit; static constexpr bool limit_num_elems = Trait::has_limit;
DataTypePtr & data_type;
UInt64 max_elems; UInt64 max_elems;
UInt64 seed; UInt64 seed;
@ -121,7 +120,6 @@ public:
const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456) const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>( : IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, {}) {data_type_}, {})
, data_type(this->argument_types[0])
, max_elems(max_elems_) , max_elems(max_elems_)
, seed(seed_) , seed(seed_)
{ {
@ -129,7 +127,7 @@ public:
String getName() const override { return getNameByTrait<Trait>(); } String getName() const override { return getNameByTrait<Trait>(); }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); } DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(this->argument_types[0]); }
void insert(Data & a, const T & v, Arena * arena) const void insert(Data & a, const T & v, Arena * arena) const
{ {

View File

@ -168,7 +168,7 @@ public:
{ {
for (const auto & x : small) for (const auto & x : small)
{ {
if (rb->contains(static_cast<Value>(x.getValue()))) if (r1.rb->contains(static_cast<Value>(x.getValue())))
buffer.push_back(x.getValue()); buffer.push_back(x.getValue());
} }
@ -264,7 +264,7 @@ public:
{ {
for (const auto & x : small) for (const auto & x : small)
{ {
if (rb->contains(static_cast<Value>(x.getValue()))) if (r1.rb->contains(static_cast<Value>(x.getValue())))
++ret; ++ret;
} }
} }
@ -419,7 +419,7 @@ public:
if (isSmall()) if (isSmall())
return small.find(x) != small.end(); return small.find(x) != small.end();
else else
return rb->contains(x); return rb->contains(static_cast<Value>(x));
} }
/** /**
@ -613,7 +613,7 @@ public:
/** /**
* Replace value * Replace value
*/ */
void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num) void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
{ {
if (isSmall()) if (isSmall())
toLarge(); toLarge();
@ -622,9 +622,9 @@ public:
{ {
if (from_vals[i] == to_vals[i]) if (from_vals[i] == to_vals[i])
continue; continue;
bool changed = rb->removeChecked(from_vals[i]); bool changed = rb->removeChecked(static_cast<Value>(from_vals[i]));
if (changed) if (changed)
rb->add(to_vals[i]); rb->add(static_cast<Value>(to_vals[i]));
} }
} }
}; };

View File

@ -56,7 +56,7 @@ public:
DataTypePtr getReturnType() const override DataTypePtr getReturnType() const override
{ {
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>()); return std::make_shared<DataTypeArray>(this->argument_types[0]);
} }
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override

View File

@ -19,12 +19,12 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
} }
struct ComparePairFirst final struct ComparePair final
{ {
template <typename T1, typename T2> template <typename T1, typename T2>
bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
{ {
return lhs.first < rhs.first; return lhs.first == rhs.first ? lhs.second < rhs.second : lhs.first < rhs.first;
} }
}; };
@ -33,8 +33,8 @@ template <typename T>
struct AggregateFunctionWindowFunnelData struct AggregateFunctionWindowFunnelData
{ {
using TimestampEvent = std::pair<T, UInt8>; using TimestampEvent = std::pair<T, UInt8>;
using TimestampEvents = PODArray<TimestampEvent, 64>; using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
using Comparator = ComparePairFirst; using Comparator = ComparePair;
bool sorted = true; bool sorted = true;
TimestampEvents events_list; TimestampEvents events_list;
@ -47,8 +47,13 @@ struct AggregateFunctionWindowFunnelData
void add(T timestamp, UInt8 event) void add(T timestamp, UInt8 event)
{ {
// Since most events should have already been sorted by timestamp. // Since most events should have already been sorted by timestamp.
if (sorted && events_list.size() > 0 && events_list.back().first > timestamp) if (sorted && events_list.size() > 0)
sorted = false; {
if (events_list.back().first == timestamp)
sorted = events_list.back().second <= event;
else
sorted = events_list.back().first <= timestamp;
}
events_list.emplace_back(timestamp, event); events_list.emplace_back(timestamp, event);
} }

View File

@ -670,4 +670,32 @@ ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction &
{ {
} }
MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
{
if (size == 0)
return cloneEmpty();
size_t from_size = data.size();
if (size <= from_size)
{
auto res = createView();
auto & res_data = res->data;
res_data.assign(data.begin(), data.begin() + size);
return res;
}
else
{
/// Create a new column to return.
MutableColumnPtr cloned_col = cloneEmpty();
auto * res = typeid_cast<ColumnAggregateFunction *>(cloned_col.get());
res->insertRangeFrom(*this, 0, from_size);
for (size_t i = from_size; i < size; ++i)
res->insertDefault();
return cloned_col;
}
}
} }

View File

@ -215,7 +215,7 @@ public:
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;
bool structureEquals(const IColumn &) const override; bool structureEquals(const IColumn &) const override;
MutableColumnPtr cloneResized(size_t size) const override;
}; };
} }

View File

@ -0,0 +1,41 @@
#include <Common/DirectorySyncGuard.h>
#include <Common/Exception.h>
#include <Disks/IDisk.h>
#include <fcntl.h> // O_RDWR
/// OSX does not have O_DIRECTORY
#ifndef O_DIRECTORY
#define O_DIRECTORY O_RDWR
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_FSYNC;
}
DirectorySyncGuard::DirectorySyncGuard(const DiskPtr & disk_, const String & path)
: disk(disk_)
, fd(disk_->open(path, O_DIRECTORY))
{}
DirectorySyncGuard::~DirectorySyncGuard()
{
try
{
#if defined(OS_DARWIN)
if (fcntl(fd, F_FULLFSYNC, 0))
throwFromErrno("Cannot fcntl(F_FULLFSYNC)", ErrorCodes::CANNOT_FSYNC);
#endif
disk->sync(fd);
disk->close(fd);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}

View File

@ -1,36 +1,26 @@
#pragma once #pragma once
#include <Disks/IDisk.h> #include <string>
#include <memory>
namespace DB namespace DB
{ {
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
/// Helper class, that receives file descriptor and does fsync for it in destructor. /// Helper class, that receives file descriptor and does fsync for it in destructor.
/// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end. /// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end.
/// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version. /// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version.
/// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496 /// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496
class FileSyncGuard class DirectorySyncGuard
{ {
public: public:
/// NOTE: If you have already opened descriptor, it's preferred to use /// NOTE: If you have already opened descriptor, it's preferred to use
/// this constructor instead of constructor with path. /// this constructor instead of constructor with path.
FileSyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {} DirectorySyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {}
DirectorySyncGuard(const DiskPtr & disk_, const std::string & path);
FileSyncGuard(const DiskPtr & disk_, const String & path) ~DirectorySyncGuard();
: disk(disk_), fd(disk_->open(path, O_RDWR)) {}
~FileSyncGuard()
{
try
{
disk->sync(fd);
disk->close(fd);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
private: private:
DiskPtr disk; DiskPtr disk;

Some files were not shown because too many files have changed in this diff Show More