mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into kssenii-pg2ch
This commit is contained in:
commit
a19e7edd14
@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Question
|
||||
about: Ask question about ClickHouse
|
||||
about: Ask a question about ClickHouse
|
||||
title: ''
|
||||
labels: question
|
||||
assignees: ''
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Unexpected behaviour
|
||||
about: Create a report to help us improve ClickHouse
|
||||
about: Some feature is working in non-obvious way
|
||||
title: ''
|
||||
labels: unexpected behaviour
|
||||
assignees: ''
|
30
.github/ISSUE_TEMPLATE/35_incomplete_implementation.md
vendored
Normal file
30
.github/ISSUE_TEMPLATE/35_incomplete_implementation.md
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
name: Incomplete implementation
|
||||
about: Implementation of existing feature is not finished
|
||||
title: ''
|
||||
labels: unfinished code
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Describe the unexpected behaviour**
|
||||
A clear and concise description of what works not as it is supposed to.
|
||||
|
||||
**How to reproduce**
|
||||
* Which ClickHouse server version to use
|
||||
* Which interface to use, if matters
|
||||
* Non-default settings, if any
|
||||
* `CREATE TABLE` statements for all tables involved
|
||||
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
|
||||
* Queries to run that lead to unexpected result
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Error message and/or stacktrace**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Usability issue
|
||||
about: Create a report to help us improve ClickHouse
|
||||
about: Report something can be made more convenient to use
|
||||
title: ''
|
||||
labels: usability
|
||||
assignees: ''
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Backward compatibility issue
|
||||
about: Create a report to help us improve ClickHouse
|
||||
about: Report the case when the behaviour of a new version can break existing use cases
|
||||
title: ''
|
||||
labels: backward compatibility
|
||||
assignees: ''
|
16
.github/ISSUE_TEMPLATE/90_fuzzing-report.md
vendored
Normal file
16
.github/ISSUE_TEMPLATE/90_fuzzing-report.md
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
---
|
||||
name: Assertion found via fuzzing
|
||||
about: Potential issue has been found via Fuzzer or Stress tests
|
||||
title: ''
|
||||
labels: fuzz
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
(you don't have to strictly follow this form)
|
||||
|
||||
**Describe the bug**
|
||||
A link to the report
|
||||
|
||||
**How to reproduce**
|
||||
Try to reproduce the report and copy the tables and queries involved.
|
@ -220,6 +220,13 @@ if (LINKER_NAME MATCHES "lld$")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1")
|
||||
endif ()
|
||||
|
||||
# Add a section with the hash of the compiled machine code for integrity checks.
|
||||
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
|
||||
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
|
||||
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
|
||||
set (USE_BINARY_HASH 1)
|
||||
endif ()
|
||||
|
||||
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
|
||||
|
||||
|
||||
|
@ -56,6 +56,9 @@
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <Common/SymbolIndex.h>
|
||||
#include <Common/getExecutablePath.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
#include <Common/Elf.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config_version.h>
|
||||
@ -80,28 +83,6 @@ static void call_default_signal_handler(int sig)
|
||||
raise(sig);
|
||||
}
|
||||
|
||||
// Apparently strsignal is not instrumented by MemorySanitizer, so we
|
||||
// have to unpoison it to avoid msan reports inside fmt library when we
|
||||
// print it.
|
||||
const char * msan_strsignal(int sig)
|
||||
{
|
||||
// no glibc in osx/freebsd
|
||||
#if !defined(__GLIBC_PREREQ)
|
||||
#define __GLIBC_PREREQ(x, y) 0
|
||||
#endif
|
||||
|
||||
// glibc 2.32+ deprecates sys_siglist[]
|
||||
// newer glibc is a problem only for unbundled build.
|
||||
#if __GLIBC_PREREQ(2, 32)
|
||||
const char * signal_name = sigdescr_np(sig);
|
||||
#else
|
||||
const char * signal_name = sys_siglist[sig];
|
||||
#endif
|
||||
|
||||
__msan_unpoison_string(signal_name);
|
||||
return signal_name;
|
||||
}
|
||||
|
||||
static constexpr size_t max_query_id_size = 127;
|
||||
|
||||
static const size_t signal_pipe_buf_size =
|
||||
@ -131,11 +112,13 @@ static void writeSignalIDtoSignalPipe(int sig)
|
||||
/** Signal handler for HUP / USR1 */
|
||||
static void closeLogsSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
@ -144,6 +127,7 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
|
||||
*/
|
||||
static void signalHandler(int sig, siginfo_t * info, void * context)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
@ -306,13 +290,13 @@ private:
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
|
||||
thread_num, msan_strsignal(sig), sig);
|
||||
thread_num, strsignal(sig), sig);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
|
||||
thread_num, query_id, msan_strsignal(sig), sig);
|
||||
thread_num, query_id, strsignal(sig), sig);
|
||||
}
|
||||
|
||||
String error_message;
|
||||
@ -340,6 +324,32 @@ private:
|
||||
/// Write symbolized stack trace line by line for better grep-ability.
|
||||
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
|
||||
|
||||
#if defined(__linux__)
|
||||
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
|
||||
String calculated_binary_hash = getHashOfLoadedBinaryHex();
|
||||
if (daemon.stored_binary_hash.empty())
|
||||
{
|
||||
LOG_FATAL(log, "Calculated checksum of the binary: {}."
|
||||
" There is no information about the reference checksum.", calculated_binary_hash);
|
||||
}
|
||||
else if (calculated_binary_hash == daemon.stored_binary_hash)
|
||||
{
|
||||
LOG_FATAL(log, "Checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "Calculated checksum of the ClickHouse binary ({0}) does not correspond"
|
||||
" to the reference checksum stored in the binary ({1})."
|
||||
" It may indicate one of the following:"
|
||||
" - the file was changed just after startup;"
|
||||
" - the file is damaged on disk due to faulty hardware;"
|
||||
" - the loaded executable is damaged in memory due to faulty hardware;"
|
||||
" - the file was intentionally modified;"
|
||||
" - logical error in code."
|
||||
, calculated_binary_hash, daemon.stored_binary_hash);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Write crash to system.crash_log table if available.
|
||||
if (collectCrashLog)
|
||||
collectCrashLog(sig, thread_num, query_id, stack_trace);
|
||||
@ -493,8 +503,9 @@ void BaseDaemon::kill()
|
||||
{
|
||||
dumpCoverageReportIfPossible();
|
||||
pid_file.reset();
|
||||
if (::raise(SIGKILL) != 0)
|
||||
throw Poco::SystemException("cannot kill process");
|
||||
/// Exit with the same code as it is usually set by shell when process is terminated by SIGKILL.
|
||||
/// It's better than doing 'raise' or 'kill', because they have no effect for 'init' process (with pid = 0, usually in Docker).
|
||||
_exit(128 + SIGKILL);
|
||||
}
|
||||
|
||||
std::string BaseDaemon::getDefaultCorePath() const
|
||||
@ -799,6 +810,13 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
#else
|
||||
build_id_info = "no build id";
|
||||
#endif
|
||||
|
||||
#if defined(__linux__)
|
||||
std::string executable_path = getExecutablePath();
|
||||
|
||||
if (!executable_path.empty())
|
||||
stored_binary_hash = DB::Elf(executable_path).getBinaryHash();
|
||||
#endif
|
||||
}
|
||||
|
||||
void BaseDaemon::logRevision() const
|
||||
@ -858,13 +876,13 @@ void BaseDaemon::handleSignal(int signal_id)
|
||||
onInterruptSignals(signal_id);
|
||||
}
|
||||
else
|
||||
throw DB::Exception(std::string("Unsupported signal: ") + msan_strsignal(signal_id), 0);
|
||||
throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0);
|
||||
}
|
||||
|
||||
void BaseDaemon::onInterruptSignals(int signal_id)
|
||||
{
|
||||
is_cancelled = true;
|
||||
LOG_INFO(&logger(), "Received termination signal ({})", msan_strsignal(signal_id));
|
||||
LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id));
|
||||
|
||||
if (sigint_signals_counter >= 2)
|
||||
{
|
||||
@ -1010,3 +1028,9 @@ void BaseDaemon::setupWatchdog()
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
String BaseDaemon::getStoredBinaryHash() const
|
||||
{
|
||||
return stored_binary_hash;
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
static void terminate();
|
||||
|
||||
/// Forceful shutdown
|
||||
void kill();
|
||||
[[noreturn]] void kill();
|
||||
|
||||
/// Cancellation request has been received.
|
||||
bool isCancelled() const
|
||||
@ -121,6 +121,9 @@ public:
|
||||
/// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly).
|
||||
void shouldSetupWatchdog(char * argv0_);
|
||||
|
||||
/// Hash of the binary for integrity checks.
|
||||
String getStoredBinaryHash() const;
|
||||
|
||||
protected:
|
||||
virtual void logRevision() const;
|
||||
|
||||
@ -168,6 +171,7 @@ protected:
|
||||
Poco::Util::AbstractConfiguration * last_configuration = nullptr;
|
||||
|
||||
String build_id_info;
|
||||
String stored_binary_hash;
|
||||
|
||||
std::vector<int> handled_signals;
|
||||
|
||||
|
125
base/glibc-compatibility/musl/strsignal.c
Normal file
125
base/glibc-compatibility/musl/strsignal.c
Normal file
@ -0,0 +1,125 @@
|
||||
#include <signal.h>
|
||||
#include <string.h>
|
||||
|
||||
#if (SIGHUP == 1) && (SIGINT == 2) && (SIGQUIT == 3) && (SIGILL == 4) \
|
||||
&& (SIGTRAP == 5) && (SIGABRT == 6) && (SIGBUS == 7) && (SIGFPE == 8) \
|
||||
&& (SIGKILL == 9) && (SIGUSR1 == 10) && (SIGSEGV == 11) && (SIGUSR2 == 12) \
|
||||
&& (SIGPIPE == 13) && (SIGALRM == 14) && (SIGTERM == 15) && (SIGSTKFLT == 16) \
|
||||
&& (SIGCHLD == 17) && (SIGCONT == 18) && (SIGSTOP == 19) && (SIGTSTP == 20) \
|
||||
&& (SIGTTIN == 21) && (SIGTTOU == 22) && (SIGURG == 23) && (SIGXCPU == 24) \
|
||||
&& (SIGXFSZ == 25) && (SIGVTALRM == 26) && (SIGPROF == 27) && (SIGWINCH == 28) \
|
||||
&& (SIGPOLL == 29) && (SIGPWR == 30) && (SIGSYS == 31)
|
||||
|
||||
#define sigmap(x) x
|
||||
|
||||
#else
|
||||
|
||||
static const char map[] = {
|
||||
[SIGHUP] = 1,
|
||||
[SIGINT] = 2,
|
||||
[SIGQUIT] = 3,
|
||||
[SIGILL] = 4,
|
||||
[SIGTRAP] = 5,
|
||||
[SIGABRT] = 6,
|
||||
[SIGBUS] = 7,
|
||||
[SIGFPE] = 8,
|
||||
[SIGKILL] = 9,
|
||||
[SIGUSR1] = 10,
|
||||
[SIGSEGV] = 11,
|
||||
[SIGUSR2] = 12,
|
||||
[SIGPIPE] = 13,
|
||||
[SIGALRM] = 14,
|
||||
[SIGTERM] = 15,
|
||||
#if defined(SIGSTKFLT)
|
||||
[SIGSTKFLT] = 16,
|
||||
#elif defined(SIGEMT)
|
||||
[SIGEMT] = 16,
|
||||
#endif
|
||||
[SIGCHLD] = 17,
|
||||
[SIGCONT] = 18,
|
||||
[SIGSTOP] = 19,
|
||||
[SIGTSTP] = 20,
|
||||
[SIGTTIN] = 21,
|
||||
[SIGTTOU] = 22,
|
||||
[SIGURG] = 23,
|
||||
[SIGXCPU] = 24,
|
||||
[SIGXFSZ] = 25,
|
||||
[SIGVTALRM] = 26,
|
||||
[SIGPROF] = 27,
|
||||
[SIGWINCH] = 28,
|
||||
[SIGPOLL] = 29,
|
||||
[SIGPWR] = 30,
|
||||
[SIGSYS] = 31
|
||||
};
|
||||
|
||||
#define sigmap(x) ((x) >= sizeof map ? (x) : map[(x)])
|
||||
|
||||
#endif
|
||||
|
||||
static const char strings[] =
|
||||
"Unknown signal\0"
|
||||
"Hangup\0"
|
||||
"Interrupt\0"
|
||||
"Quit\0"
|
||||
"Illegal instruction\0"
|
||||
"Trace/breakpoint trap\0"
|
||||
"Aborted\0"
|
||||
"Bus error\0"
|
||||
"Arithmetic exception\0"
|
||||
"Killed\0"
|
||||
"User defined signal 1\0"
|
||||
"Segmentation fault\0"
|
||||
"User defined signal 2\0"
|
||||
"Broken pipe\0"
|
||||
"Alarm clock\0"
|
||||
"Terminated\0"
|
||||
#if defined(SIGSTKFLT)
|
||||
"Stack fault\0"
|
||||
#elif defined(SIGEMT)
|
||||
"Emulator trap\0"
|
||||
#else
|
||||
"Unknown signal\0"
|
||||
#endif
|
||||
"Child process status\0"
|
||||
"Continued\0"
|
||||
"Stopped (signal)\0"
|
||||
"Stopped\0"
|
||||
"Stopped (tty input)\0"
|
||||
"Stopped (tty output)\0"
|
||||
"Urgent I/O condition\0"
|
||||
"CPU time limit exceeded\0"
|
||||
"File size limit exceeded\0"
|
||||
"Virtual timer expired\0"
|
||||
"Profiling timer expired\0"
|
||||
"Window changed\0"
|
||||
"I/O possible\0"
|
||||
"Power failure\0"
|
||||
"Bad system call\0"
|
||||
"RT32"
|
||||
"\0RT33\0RT34\0RT35\0RT36\0RT37\0RT38\0RT39\0RT40"
|
||||
"\0RT41\0RT42\0RT43\0RT44\0RT45\0RT46\0RT47\0RT48"
|
||||
"\0RT49\0RT50\0RT51\0RT52\0RT53\0RT54\0RT55\0RT56"
|
||||
"\0RT57\0RT58\0RT59\0RT60\0RT61\0RT62\0RT63\0RT64"
|
||||
#if _NSIG > 65
|
||||
"\0RT65\0RT66\0RT67\0RT68\0RT69\0RT70\0RT71\0RT72"
|
||||
"\0RT73\0RT74\0RT75\0RT76\0RT77\0RT78\0RT79\0RT80"
|
||||
"\0RT81\0RT82\0RT83\0RT84\0RT85\0RT86\0RT87\0RT88"
|
||||
"\0RT89\0RT90\0RT91\0RT92\0RT93\0RT94\0RT95\0RT96"
|
||||
"\0RT97\0RT98\0RT99\0RT100\0RT101\0RT102\0RT103\0RT104"
|
||||
"\0RT105\0RT106\0RT107\0RT108\0RT109\0RT110\0RT111\0RT112"
|
||||
"\0RT113\0RT114\0RT115\0RT116\0RT117\0RT118\0RT119\0RT120"
|
||||
"\0RT121\0RT122\0RT123\0RT124\0RT125\0RT126\0RT127\0RT128"
|
||||
#endif
|
||||
"";
|
||||
|
||||
char *strsignal(int signum)
|
||||
{
|
||||
const char *s = strings;
|
||||
|
||||
signum = sigmap(signum);
|
||||
if (signum - 1U >= _NSIG-1) signum = 0;
|
||||
|
||||
for (; signum--; s++) for (; *s; s++);
|
||||
|
||||
return (char *)s;
|
||||
}
|
@ -142,7 +142,7 @@ TRAP(qecvt)
|
||||
TRAP(qfcvt)
|
||||
TRAP(register_printf_function)
|
||||
TRAP(seed48)
|
||||
TRAP(setenv)
|
||||
//TRAP(setenv)
|
||||
TRAP(setfsent)
|
||||
TRAP(setgrent)
|
||||
TRAP(sethostent)
|
||||
@ -164,7 +164,7 @@ TRAP(sigsuspend)
|
||||
TRAP(sleep)
|
||||
TRAP(srand48)
|
||||
//TRAP(strerror) // Used by RocksDB and many other libraries, unfortunately.
|
||||
TRAP(strsignal)
|
||||
//TRAP(strsignal) // This function is imported from Musl and is thread safe.
|
||||
TRAP(strtok)
|
||||
TRAP(tcflow)
|
||||
TRAP(tcsendbreak)
|
||||
|
@ -1,9 +1,9 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
SET(VERSION_REVISION 54444)
|
||||
SET(VERSION_MAJOR 20)
|
||||
SET(VERSION_MINOR 13)
|
||||
SET(VERSION_REVISION 54445)
|
||||
SET(VERSION_MAJOR 21)
|
||||
SET(VERSION_MINOR 1)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471)
|
||||
SET(VERSION_DESCRIBE v20.13.1.1-prestable)
|
||||
SET(VERSION_STRING 20.13.1.1)
|
||||
SET(VERSION_GITHASH 667dd0cf0ccecdaa6f334177b7ece2f53bd196a1)
|
||||
SET(VERSION_DESCRIBE v21.1.1.5646-prestable)
|
||||
SET(VERSION_STRING 21.1.1.5646)
|
||||
# end of autochange
|
||||
|
@ -32,12 +32,21 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
|
||||
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
|
||||
|
||||
# 4+ ccache respect SOURCE_DATE_EPOCH (always includes it into the hash
|
||||
# of the manifest) and debian will extract these from d/changelog, and
|
||||
# makes cache of ccache unusable
|
||||
# debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is
|
||||
# filled from the debian/changelog or current time.
|
||||
#
|
||||
# FIXME: once sloppiness will be introduced for this this can be removed.
|
||||
if (CCACHE_VERSION VERSION_GREATER "4.0")
|
||||
# - 4.0+ ccache always includes this environment variable into the hash
|
||||
# of the manifest, which do not allow to use previous cache,
|
||||
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
|
||||
#
|
||||
# So for:
|
||||
# - 4.2+ time_macros sloppiness is used,
|
||||
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
|
||||
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
|
||||
message(STATUS "Use time_macros sloppiness for ccache")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
|
||||
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
|
||||
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
||||
|
@ -1,5 +1,4 @@
|
||||
# Freebsd: contrib/cppkafka/include/cppkafka/detail/endianness.h:53:23: error: 'betoh16' was not declared in this scope
|
||||
if (NOT ARCH_ARM AND NOT OS_FREEBSD AND OPENSSL_FOUND)
|
||||
if (NOT ARCH_ARM AND OPENSSL_FOUND)
|
||||
option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES})
|
||||
elseif(ENABLE_RDKAFKA AND NOT OPENSSL_FOUND)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use librdkafka without SSL")
|
||||
|
@ -1,2 +1,2 @@
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
|
||||
tar xJf MacOSX10.14.sdk.tar.xz --strip-components=1
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
|
||||
tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1
|
||||
|
6
contrib/CMakeLists.txt
vendored
6
contrib/CMakeLists.txt
vendored
@ -18,7 +18,11 @@ if (WITH_COVERAGE)
|
||||
set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE})
|
||||
separate_arguments(WITHOUT_COVERAGE_LIST)
|
||||
# disable coverage for contib files and build with optimisations
|
||||
add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST})
|
||||
if (COMPILER_CLANG)
|
||||
add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST})
|
||||
else()
|
||||
add_compile_options(-O3 -DNDEBUG -finline-functions ${WITHOUT_COVERAGE_LIST})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (SANITIZE STREQUAL "undefined")
|
||||
|
2
contrib/libcxx
vendored
2
contrib/libcxx
vendored
@ -1 +1 @@
|
||||
Subproject commit 95650a0db4399ee871d5fd698ad12384fe9fa964
|
||||
Subproject commit 8b80a151d12b98ffe2d0c22f7cec12c3b9ff88d7
|
@ -5,6 +5,8 @@ set(LIBCXX_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx)
|
||||
set(SRCS
|
||||
${LIBCXX_SOURCE_DIR}/src/algorithm.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/any.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/atomic.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/barrier.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/bind.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/charconv.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/chrono.cpp
|
||||
@ -20,6 +22,7 @@ ${LIBCXX_SOURCE_DIR}/src/functional.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/future.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/hash.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/ios.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/iostream.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/locale.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/memory.cpp
|
||||
@ -28,6 +31,7 @@ ${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/new.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/optional.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/random.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/regex.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp
|
||||
${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp
|
||||
|
2
contrib/libcxxabi
vendored
2
contrib/libcxxabi
vendored
@ -1 +1 @@
|
||||
Subproject commit 1ebc83af4c06dbcd56b4d166c1314a7d4c1173f9
|
||||
Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076
|
@ -11,7 +11,6 @@ ${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_unexpected.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp
|
||||
${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp
|
||||
|
@ -83,7 +83,8 @@
|
||||
#if (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <= 101400)
|
||||
#define _TTHREAD_EMULATE_TIMESPEC_GET_
|
||||
#endif
|
||||
|
||||
#elif defined(__FreeBSD__)
|
||||
#define HAVE_PTHREAD_SETNAME_FREEBSD 1
|
||||
#else
|
||||
// pthread_setname_gnu
|
||||
#define HAVE_PTHREAD_SETNAME_GNU 1
|
||||
|
2
contrib/rocksdb
vendored
2
contrib/rocksdb
vendored
@ -1 +1 @@
|
||||
Subproject commit 8b966f0ca298fc1475bd09d9775f32dff0fdce0a
|
||||
Subproject commit 54a0decabbcf4c0bb5cf7befa9c597f28289bff5
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@ -1,5 +1,5 @@
|
||||
clickhouse (20.13.1.1) unstable; urgency=low
|
||||
clickhouse (21.1.0) unstable; urgency=low
|
||||
|
||||
* Modified source code
|
||||
|
||||
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 23 Nov 2020 10:29:24 +0300
|
||||
-- Alexey Milovidov <milovidov@yandex-team.ru> Mon, 11 Jan 2021 03:51:08 +0300
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.13.1.*
|
||||
ARG version=21.1.0
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
|
@ -45,7 +45,8 @@
|
||||
"name": "yandex/clickhouse-stateless-test",
|
||||
"dependent": [
|
||||
"docker/test/stateful",
|
||||
"docker/test/coverage"
|
||||
"docker/test/coverage",
|
||||
"docker/test/unit"
|
||||
]
|
||||
},
|
||||
"docker/test/stateless_pytest": {
|
||||
@ -134,7 +135,9 @@
|
||||
"name": "yandex/clickhouse-test-base",
|
||||
"dependent": [
|
||||
"docker/test/stateless",
|
||||
"docker/test/stateless_pytest"
|
||||
"docker/test/stateless_unbundled",
|
||||
"docker/test/stateless_pytest",
|
||||
"docker/test/integration/base"
|
||||
]
|
||||
},
|
||||
"docker/packager/unbundled": {
|
||||
@ -151,5 +154,9 @@
|
||||
"docker/test/integration/kerberized_hadoop": {
|
||||
"name": "yandex/clickhouse-kerberized-hadoop",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/sqlancer": {
|
||||
"name": "yandex/clickhouse-sqlancer-test",
|
||||
"dependent": []
|
||||
}
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
|
||||
&& rm -rf cctools-port
|
||||
|
||||
# Download toolchain for Darwin
|
||||
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
|
||||
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
|
||||
|
||||
# Download toolchain for ARM
|
||||
# It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.
|
||||
|
@ -3,7 +3,7 @@
|
||||
set -x -e
|
||||
|
||||
mkdir -p build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
|
||||
mkdir -p build/cmake/toolchain/linux-aarch64
|
||||
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1
|
||||
|
@ -4,5 +4,5 @@ alpine-root/install/*
|
||||
# docs (looks useless)
|
||||
alpine-root/usr/share/doc/*
|
||||
|
||||
# packages, etc. (used by prepare.sh)
|
||||
alpine-root/tgz-packages/*
|
||||
# packages, etc. (used by alpine-build.sh)
|
||||
tgz-packages/*
|
||||
|
3
docker/server/.gitignore
vendored
3
docker/server/.gitignore
vendored
@ -1 +1,2 @@
|
||||
alpine-root/*
|
||||
alpine-root/*
|
||||
tgz-packages/*
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.13.1.*
|
||||
ARG version=21.1.0
|
||||
ARG gosu_ver=1.10
|
||||
|
||||
RUN apt-get update \
|
||||
|
@ -16,7 +16,7 @@ RUN addgroup clickhouse \
|
||||
&& chown root:clickhouse /var/log/clickhouse-server \
|
||||
&& chmod 775 /var/log/clickhouse-server \
|
||||
&& chmod +x /entrypoint.sh \
|
||||
&& apk add --no-cache su-exec
|
||||
&& apk add --no-cache su-exec bash
|
||||
|
||||
EXPOSE 9000 8123 9009
|
||||
|
||||
|
@ -4,6 +4,7 @@ set -x
|
||||
REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
|
||||
REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
|
||||
VERSION="${VERSION:-20.9.3.45}"
|
||||
DOCKER_IMAGE="${DOCKER_IMAGE:-yandex/clickhouse-server}"
|
||||
|
||||
# where original files live
|
||||
DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
|
||||
@ -11,12 +12,12 @@ DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
|
||||
# we will create root for our image here
|
||||
CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"
|
||||
|
||||
# where to put downloaded tgz
|
||||
TGZ_PACKAGES_FOLDER="${CONTAINER_ROOT_FOLDER}/tgz-packages"
|
||||
|
||||
# clean up the root from old runs
|
||||
# clean up the root from old runs, it's reconstructed each time
|
||||
rm -rf "$CONTAINER_ROOT_FOLDER"
|
||||
mkdir -p "$CONTAINER_ROOT_FOLDER"
|
||||
|
||||
# where to put downloaded tgz
|
||||
TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
|
||||
mkdir -p "$TGZ_PACKAGES_FOLDER"
|
||||
|
||||
PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
|
||||
@ -24,7 +25,7 @@ PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
|
||||
# download tars from the repo
|
||||
for package in "${PACKAGES[@]}"
|
||||
do
|
||||
wget -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
|
||||
wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
|
||||
done
|
||||
|
||||
# unpack tars
|
||||
@ -42,7 +43,7 @@ mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
|
||||
"${CONTAINER_ROOT_FOLDER}/lib64"
|
||||
|
||||
cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
|
||||
cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
|
||||
cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
|
||||
|
||||
## get glibc components from ubuntu 20.04 and put them to expected place
|
||||
docker pull ubuntu:20.04
|
||||
@ -56,4 +57,5 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAIN
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
|
||||
|
||||
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
|
||||
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
|
||||
rm -rf "$CONTAINER_ROOT_FOLDER"
|
||||
|
@ -1,152 +0,0 @@
|
||||
#!/bin/sh
|
||||
#set -x
|
||||
|
||||
DO_CHOWN=1
|
||||
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
|
||||
DO_CHOWN=0
|
||||
fi
|
||||
|
||||
CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
|
||||
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
|
||||
|
||||
# support --user
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
USER=$CLICKHOUSE_UID
|
||||
GROUP=$CLICKHOUSE_GID
|
||||
# busybox has setuidgid & chpst buildin
|
||||
gosu="su-exec $USER:$GROUP"
|
||||
else
|
||||
USER="$(id -u)"
|
||||
GROUP="$(id -g)"
|
||||
gosu=""
|
||||
DO_CHOWN=0
|
||||
fi
|
||||
|
||||
# set some vars
|
||||
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
|
||||
|
||||
# port is needed to check if clickhouse-server is ready for connections
|
||||
HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)"
|
||||
|
||||
# get CH directories locations
|
||||
DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)"
|
||||
TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)"
|
||||
USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)"
|
||||
LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)"
|
||||
LOG_DIR="$(dirname "${LOG_PATH}" || true)"
|
||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)"
|
||||
ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)"
|
||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)"
|
||||
|
||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
||||
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
|
||||
|
||||
for dir in "$DATA_DIR" \
|
||||
"$ERROR_LOG_DIR" \
|
||||
"$LOG_DIR" \
|
||||
"$TMP_DIR" \
|
||||
"$USER_PATH" \
|
||||
"$FORMAT_SCHEMA_PATH"
|
||||
do
|
||||
# check if variable not empty
|
||||
[ -z "$dir" ] && continue
|
||||
# ensure directories exist
|
||||
if ! mkdir -p "$dir"; then
|
||||
echo "Couldn't create necessary directory: $dir"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$DO_CHOWN" = "1" ]; then
|
||||
# ensure proper directories permissions
|
||||
chown -R "$USER:$GROUP" "$dir"
|
||||
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
|
||||
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# if clickhouse user is defined - create it (user "default" already exists out of box)
|
||||
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
|
||||
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
|
||||
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
|
||||
<yandex>
|
||||
<!-- Docs: <https://clickhouse.tech/docs/en/operations/settings/settings_users/> -->
|
||||
<users>
|
||||
<!-- Remove default user -->
|
||||
<default remove="remove">
|
||||
</default>
|
||||
|
||||
<${CLICKHOUSE_USER}>
|
||||
<profile>default</profile>
|
||||
<networks>
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
<password>${CLICKHOUSE_PASSWORD}</password>
|
||||
<quota>default</quota>
|
||||
</${CLICKHOUSE_USER}>
|
||||
</users>
|
||||
</yandex>
|
||||
EOT
|
||||
fi
|
||||
|
||||
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
|
||||
# Listen only on localhost until the initialization is done
|
||||
$gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 &
|
||||
pid="$!"
|
||||
|
||||
# check if clickhouse is ready to accept connections
|
||||
# will try to send ping clickhouse via http_port (max 6 retries, with 1 sec timeout and 1 sec delay between retries)
|
||||
tries=6
|
||||
while ! wget --spider -T 1 -q "http://localhost:$HTTP_PORT/ping" 2>/dev/null; do
|
||||
if [ "$tries" -le "0" ]; then
|
||||
echo >&2 'ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
tries=$(( tries-1 ))
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ -n "$CLICKHOUSE_PASSWORD" ]; then
|
||||
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
|
||||
fi
|
||||
|
||||
clickhouseclient="clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD "
|
||||
|
||||
# create default database, if defined
|
||||
if [ -n "$CLICKHOUSE_DB" ]; then
|
||||
echo "$0: create database '$CLICKHOUSE_DB'"
|
||||
"$clickhouseclient" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
|
||||
fi
|
||||
|
||||
for f in /docker-entrypoint-initdb.d/*; do
|
||||
case "$f" in
|
||||
*.sh)
|
||||
if [ -x "$f" ]; then
|
||||
echo "$0: running $f"
|
||||
"$f"
|
||||
else
|
||||
echo "$0: sourcing $f"
|
||||
. "$f"
|
||||
fi
|
||||
;;
|
||||
*.sql) echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;;
|
||||
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;;
|
||||
*) echo "$0: ignoring $f" ;;
|
||||
esac
|
||||
echo
|
||||
done
|
||||
|
||||
if ! kill -s TERM "$pid" || ! wait "$pid"; then
|
||||
echo >&2 'Finishing of ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
|
||||
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
|
||||
exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@"
|
||||
fi
|
||||
|
||||
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
|
||||
exec "$@"
|
71
docker/server/entrypoint.sh
Normal file → Executable file
71
docker/server/entrypoint.sh
Normal file → Executable file
@ -1,7 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
shopt -s nullglob
|
||||
|
||||
DO_CHOWN=1
|
||||
if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
|
||||
if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
|
||||
DO_CHOWN=0
|
||||
fi
|
||||
|
||||
@ -9,10 +12,17 @@ CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
|
||||
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
|
||||
|
||||
# support --user
|
||||
if [ x"$UID" == x0 ]; then
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
USER=$CLICKHOUSE_UID
|
||||
GROUP=$CLICKHOUSE_GID
|
||||
gosu="gosu $USER:$GROUP"
|
||||
if command -v gosu &> /dev/null; then
|
||||
gosu="gosu $USER:$GROUP"
|
||||
elif command -v su-exec &> /dev/null; then
|
||||
gosu="su-exec $USER:$GROUP"
|
||||
else
|
||||
echo "No gosu/su-exec detected!"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
USER="$(id -u)"
|
||||
GROUP="$(id -g)"
|
||||
@ -23,18 +33,23 @@ fi
|
||||
# set some vars
|
||||
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
|
||||
|
||||
if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then
|
||||
echo "Configuration file '$dir' isn't readable by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# port is needed to check if clickhouse-server is ready for connections
|
||||
HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
|
||||
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
|
||||
|
||||
# get CH directories locations
|
||||
DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
|
||||
TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
|
||||
USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
|
||||
LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
|
||||
LOG_DIR="$(dirname $LOG_PATH || true)"
|
||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
|
||||
ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
|
||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
|
||||
DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)"
|
||||
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
|
||||
USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
|
||||
LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
|
||||
LOG_DIR="$(dirname "$LOG_PATH" || true)"
|
||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
|
||||
ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
|
||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"
|
||||
|
||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
||||
@ -58,8 +73,8 @@ do
|
||||
if [ "$DO_CHOWN" = "1" ]; then
|
||||
# ensure proper directories permissions
|
||||
chown -R "$USER:$GROUP" "$dir"
|
||||
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
|
||||
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
|
||||
elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
|
||||
echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
@ -90,21 +105,22 @@ fi
|
||||
|
||||
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
|
||||
# Listen only on localhost until the initialization is done
|
||||
$gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 &
|
||||
$gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
|
||||
pid="$!"
|
||||
|
||||
# check if clickhouse is ready to accept connections
|
||||
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec delay)
|
||||
if ! wget --spider --quiet --prefer-family=IPv6 --tries="${CLICKHOUSE_INIT_TIMEOUT:-12}" --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
|
||||
echo >&2 'ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries)
|
||||
tries=${CLICKHOUSE_INIT_TIMEOUT:-12}
|
||||
while ! wget --spider -T 1 -q "http://127.0.0.1:$HTTP_PORT/ping" 2>/dev/null; do
|
||||
if [ "$tries" -le "0" ]; then
|
||||
echo >&2 'ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
tries=$(( tries-1 ))
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then
|
||||
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
|
||||
fi
|
||||
|
||||
clickhouseclient=( clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD )
|
||||
clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
|
||||
|
||||
echo
|
||||
|
||||
@ -122,10 +138,11 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
|
||||
"$f"
|
||||
else
|
||||
echo "$0: sourcing $f"
|
||||
# shellcheck source=/dev/null
|
||||
. "$f"
|
||||
fi
|
||||
;;
|
||||
*.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;;
|
||||
*.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
|
||||
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
|
||||
*) echo "$0: ignoring $f" ;;
|
||||
esac
|
||||
@ -140,7 +157,7 @@ fi
|
||||
|
||||
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
|
||||
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
|
||||
exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
|
||||
exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@"
|
||||
fi
|
||||
|
||||
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
|
||||
ARG version=20.13.1.*
|
||||
ARG version=21.1.0
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y apt-transport-https dirmngr && \
|
||||
|
@ -329,6 +329,7 @@ function run_tests
|
||||
|
||||
# nc - command not found
|
||||
01601_proxy_protocol
|
||||
01622_defaults_for_url_engine
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
@ -30,3 +30,4 @@ RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-od
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
|
13
docker/test/sqlancer/Dockerfile
Normal file
13
docker/test/sqlancer/Dockerfile
Normal file
@ -0,0 +1,13 @@
|
||||
# docker build -t yandex/clickhouse-sqlancer-test .
|
||||
FROM ubuntu:20.04
|
||||
|
||||
RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven --yes --no-install-recommends
|
||||
|
||||
RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
|
||||
RUN mkdir /sqlancer && \
|
||||
cd /sqlancer && \
|
||||
unzip /sqlancer.zip
|
||||
RUN cd /sqlancer/sqlancer-master && mvn package -DskipTests
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
15
docker/test/sqlancer/run.sh
Executable file
15
docker/test/sqlancer/run.sh
Executable file
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e -x
|
||||
|
||||
dpkg -i package_folder/clickhouse-common-static_*.deb
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i package_folder/clickhouse-server_*.deb
|
||||
dpkg -i package_folder/clickhouse-client_*.deb
|
||||
|
||||
service clickhouse-server start && sleep 5
|
||||
|
||||
cd /sqlancer/sqlancer-master
|
||||
CLICKHOUSE_AVAILABLE=true mvn -Dtest=TestClickHouse test
|
||||
|
||||
cp /sqlancer/sqlancer-master/target/surefire-reports/TEST-sqlancer.dbms.TestClickHouse.xml /test_output/result.xml
|
@ -66,3 +66,6 @@ function run_tests()
|
||||
export -f run_tests
|
||||
|
||||
timeout "$MAX_RUN_TIME" bash -c run_tests ||:
|
||||
|
||||
tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
|
||||
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
|
||||
|
@ -86,3 +86,4 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
||||
|
@ -7,3 +7,4 @@ RUN apt-get install gdb
|
||||
|
||||
CMD service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; \
|
||||
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
|
||||
|
||||
|
@ -42,9 +42,9 @@ Also, we need to download macOS X SDK into the working tree.
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
|
||||
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
```
|
||||
|
||||
## Build ClickHouse {#build-clickhouse}
|
||||
|
@ -98,7 +98,9 @@ For a description of parameters, see the [CREATE query description](../../../sql
|
||||
- `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
|
||||
- `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
|
||||
- `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage).
|
||||
- `max_parts_in_total` — Maximum number of parts in all partitions.
|
||||
- `max_parts_in_total` — Maximum number of parts in all partitions.
|
||||
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
|
||||
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
|
||||
|
||||
**Example of Sections Setting**
|
||||
|
||||
|
@ -25,10 +25,27 @@ The Distributed engine accepts parameters:
|
||||
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
|
||||
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
|
||||
|
||||
Also it accept the following settings:
|
||||
|
||||
- `fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk.
|
||||
|
||||
- `fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to asynchronous inserts on Distributed table (after insert, after sending the data to shard, etc).
|
||||
|
||||
!!! note "Note"
|
||||
|
||||
**Durability settings** (`fsync_...`):
|
||||
|
||||
- Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards.
|
||||
- May significantly decrease the inserts' performance
|
||||
- Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings`
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
Distributed(logs, default, hits[, sharding_key[, policy_name]])
|
||||
SETTINGS
|
||||
fsync_after_insert=0,
|
||||
fsync_directories=0;
|
||||
```
|
||||
|
||||
Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.
|
||||
|
@ -13,6 +13,7 @@ toc_title: Client Libraries
|
||||
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
|
||||
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
|
||||
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
|
||||
- [asynch](https://github.com/long2ice/asynch)
|
||||
- PHP
|
||||
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
|
||||
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)
|
||||
|
@ -844,23 +844,27 @@ Higher values will lead to higher memory usage.
|
||||
|
||||
## max_compress_block_size {#max-compress-block-size}
|
||||
|
||||
The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isn’t any reason to change this setting.
|
||||
The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
|
||||
|
||||
!!! note "Warning"
|
||||
This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse.
|
||||
|
||||
Don’t confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table).
|
||||
|
||||
## min_compress_block_size {#min-compress-block-size}
|
||||
|
||||
For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)" tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least ‘min_compress_block_size’. By default, 65,536.
|
||||
For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least `min_compress_block_size`. By default, 65,536.
|
||||
|
||||
The actual size of the block, if the uncompressed data is less than ‘max_compress_block_size’, is no less than this value and no less than the volume of data for one mark.
|
||||
The actual size of the block, if the uncompressed data is less than `max_compress_block_size`, is no less than this value and no less than the volume of data for one mark.
|
||||
|
||||
Let’s look at an example. Assume that ‘index_granularity’ was set to 8192 during table creation.
|
||||
Let’s look at an example. Assume that `index_granularity` was set to 8192 during table creation.
|
||||
|
||||
We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks.
|
||||
|
||||
We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed.
|
||||
|
||||
There usually isn’t any reason to change this setting.
|
||||
!!! note "Warning"
|
||||
This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse.
|
||||
|
||||
## max_query_size {#settings-max_query_size}
|
||||
|
||||
@ -2470,6 +2474,45 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
|
||||
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
|
||||
|
||||
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
|
||||
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the following query with aggregate functions:
|
||||
```sql
|
||||
SELECT
|
||||
SUM(-1),
|
||||
MAX(0)
|
||||
FROM system.one
|
||||
WHERE 0
|
||||
```
|
||||
|
||||
With `aggregate_functions_null_for_empty = 0` it would produce:
|
||||
```text
|
||||
┌─SUM(-1)─┬─MAX(0)─┐
|
||||
│ 0 │ 0 │
|
||||
└─────────┴────────┘
|
||||
```
|
||||
|
||||
With `aggregate_functions_null_for_empty = 1` the result would be:
|
||||
```text
|
||||
┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
|
||||
│ NULL │ NULL │
|
||||
└───────────────┴──────────────┘
|
||||
```
|
||||
|
||||
|
||||
## union_default_mode {#union-default-mode}
|
||||
|
||||
Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`.
|
||||
@ -2484,6 +2527,7 @@ Default value: `''`.
|
||||
|
||||
See examples in [UNION](../../sql-reference/statements/select/union.md).
|
||||
|
||||
|
||||
## data_type_default_nullable {#data_type_default_nullable}
|
||||
|
||||
Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
|
||||
@ -2495,6 +2539,7 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
|
||||
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
|
||||
|
||||
Enables special logic to perform merges on replicas.
|
||||
|
67
docs/en/operations/system-tables/distributed_ddl_queue.md
Normal file
67
docs/en/operations/system-tables/distributed_ddl_queue.md
Normal file
@ -0,0 +1,67 @@
|
||||
# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
|
||||
|
||||
Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster.
|
||||
|
||||
Columns:
|
||||
|
||||
- `entry` ([String](../../sql-reference/data-types/string.md)) - Query id.
|
||||
- `host_name` ([String](../../sql-reference/data-types/string.md)) - Hostname.
|
||||
- `host_address` ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to.
|
||||
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port.
|
||||
- `status` ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query.
|
||||
- `cluster` ([String](../../sql-reference/data-types/string.md)) - Cluster name.
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) - Query executed.
|
||||
- `initiator` ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query.
|
||||
- `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time.
|
||||
- `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds.
|
||||
- `exception_code` ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.distributed_ddl_queue
|
||||
WHERE cluster = 'test_cluster'
|
||||
LIMIT 2
|
||||
FORMAT Vertical
|
||||
|
||||
Query id: f544e72a-6641-43f1-836b-24baa1c9632a
|
||||
|
||||
Row 1:
|
||||
──────
|
||||
entry: query-0000000000
|
||||
host_name: clickhouse01
|
||||
host_address: 172.23.0.11
|
||||
port: 9000
|
||||
status: Finished
|
||||
cluster: test_cluster
|
||||
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
|
||||
initiator: clickhouse01:9000
|
||||
query_start_time: 2020-12-30 13:07:51
|
||||
query_finish_time: 2020-12-30 13:07:51
|
||||
query_duration_ms: 6
|
||||
exception_code: ZOK
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
entry: query-0000000000
|
||||
host_name: clickhouse02
|
||||
host_address: 172.23.0.12
|
||||
port: 9000
|
||||
status: Finished
|
||||
cluster: test_cluster
|
||||
query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
|
||||
initiator: clickhouse01:9000
|
||||
query_start_time: 2020-12-30 13:07:51
|
||||
query_finish_time: 2020-12-30 13:07:51
|
||||
query_duration_ms: 6
|
||||
exception_code: ZOK
|
||||
|
||||
2 rows in set. Elapsed: 0.025 sec.
|
||||
```
|
||||
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
|
||||
|
@ -20,7 +20,33 @@ System tables:
|
||||
|
||||
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
|
||||
|
||||
Unlike other system tables, the system tables [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
|
||||
Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), crash_log and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
|
||||
|
||||
System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are:
|
||||
|
||||
- `database`: database the system log table belongs to. This option is deprecated now. All system log tables are under database `system`.
|
||||
- `table`: table to insert data.
|
||||
- `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression.
|
||||
- `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression.
|
||||
- `flush_interval_milliseconds`: interval of flushing data to disk.
|
||||
- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit.
|
||||
|
||||
An example:
|
||||
|
||||
```
|
||||
<yandex>
|
||||
<query_log>
|
||||
<database>system</database>
|
||||
<table>query_log</table>
|
||||
<partition_by>toYYYYMM(event_date)</partition_by>
|
||||
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
|
||||
<!--
|
||||
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
|
||||
-->
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</query_log>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.
|
||||
|
||||
|
@ -11,6 +11,7 @@ This table contains the following columns (the column type is shown in brackets)
|
||||
- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`.
|
||||
- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md).
|
||||
- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication.
|
||||
- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting).
|
||||
|
||||
Example:
|
||||
|
||||
@ -21,11 +22,11 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree')
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┐
|
||||
│ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │
|
||||
│ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │
|
||||
│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │
|
||||
└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘
|
||||
┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┬─supports_parallel_insert─┐
|
||||
│ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ 1 │
|
||||
│ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │
|
||||
│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │
|
||||
└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
@ -55,10 +55,10 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con
|
||||
When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md):
|
||||
|
||||
- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated.
|
||||
- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query.
|
||||
- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`.
|
||||
- Dictionaries from other sources are updated every time by default.
|
||||
|
||||
For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
|
||||
For other sources (ODBC, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
|
||||
|
||||
- The dictionary table must have a field that always changes when the source data is updated.
|
||||
- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `<invalidate_query>` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md).
|
||||
|
@ -583,7 +583,7 @@ Example of settings:
|
||||
or
|
||||
|
||||
``` sql
|
||||
SOURCE(MONGO(
|
||||
SOURCE(MONGODB(
|
||||
host 'localhost'
|
||||
port 27017
|
||||
user ''
|
||||
|
@ -1290,22 +1290,65 @@ Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference
|
||||
|
||||
## arrayMin(\[func,\] arr1, …) {#array-min}
|
||||
|
||||
Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements.
|
||||
Returns the min of the `func` values. If the function is omitted, it just returns the min of the array elements.
|
||||
|
||||
Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
|
||||
|
||||
Examples:
|
||||
```sql
|
||||
SELECT arrayMin([1, 2, 4]) AS res
|
||||
┌─res─┐
|
||||
│ 1 │
|
||||
└─────┘
|
||||
|
||||
|
||||
SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res
|
||||
┌─res─┐
|
||||
│ -4 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## arrayMax(\[func,\] arr1, …) {#array-max}
|
||||
|
||||
Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements.
|
||||
Returns the max of the `func` values. If the function is omitted, it just returns the max of the array elements.
|
||||
|
||||
Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
|
||||
|
||||
Examples:
|
||||
```sql
|
||||
SELECT arrayMax([1, 2, 4]) AS res
|
||||
┌─res─┐
|
||||
│ 4 │
|
||||
└─────┘
|
||||
|
||||
|
||||
SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res
|
||||
┌─res─┐
|
||||
│ -1 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## arraySum(\[func,\] arr1, …) {#array-sum}
|
||||
|
||||
Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements.
|
||||
|
||||
Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
|
||||
|
||||
Examples:
|
||||
```sql
|
||||
SELECT arraySum([2,3]) AS res
|
||||
┌─res─┐
|
||||
│ 5 │
|
||||
└─────┘
|
||||
|
||||
|
||||
SELECT arraySum(x -> x*x, [2, 3]) AS res
|
||||
┌─res─┐
|
||||
│ 13 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
|
||||
## arrayAvg(\[func,\] arr1, …) {#array-avg}
|
||||
|
||||
Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements.
|
||||
|
@ -23,6 +23,7 @@ The following actions are supported:
|
||||
- [CLEAR COLUMN](#alter_clear-column) — Resets column values.
|
||||
- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
|
||||
- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
|
||||
- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.
|
||||
|
||||
These actions are described in detail below.
|
||||
|
||||
@ -145,6 +146,26 @@ The `ALTER` query is atomic. For MergeTree tables it is also lock-free.
|
||||
|
||||
The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously.
|
||||
|
||||
## MODIFY COLUMN REMOVE {#modify-remove}
|
||||
|
||||
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
|
||||
|
||||
Syntax:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
## Limitations {#alter-query-limitations}
|
||||
|
||||
The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
|
||||
|
@ -286,7 +286,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
|
||||
You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:
|
||||
|
||||
- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`.
|
||||
- As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
|
||||
- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
|
||||
- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
|
||||
- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
|
||||
|
@ -3,10 +3,83 @@ toc_priority: 44
|
||||
toc_title: TTL
|
||||
---
|
||||
|
||||
### Manipulations with Table TTL {#manipulations-with-table-ttl}
|
||||
# Manipulations with Table TTL {#manipulations-with-table-ttl}
|
||||
|
||||
## MODIFY TTL {#modify-ttl}
|
||||
|
||||
You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table-name MODIFY TTL ttl-expression
|
||||
ALTER TABLE table_name MODIFY TTL ttl_expression;
|
||||
```
|
||||
|
||||
## REMOVE TTL {#remove-ttl}
|
||||
|
||||
TTL-property can be removed from table with the following query:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name REMOVE TTL
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table with table `TTL`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_ttl
|
||||
(
|
||||
event_time DateTime,
|
||||
UserID UInt64,
|
||||
Comment String
|
||||
)
|
||||
ENGINE MergeTree()
|
||||
ORDER BY tuple()
|
||||
TTL event_time + INTERVAL 3 MONTH;
|
||||
SETTINGS min_bytes_for_wide_part = 0;
|
||||
|
||||
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
|
||||
|
||||
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
|
||||
```
|
||||
|
||||
Run `OPTIMIZE` to force `TTL` cleanup:
|
||||
|
||||
```sql
|
||||
OPTIMIZE TABLE table_with_ttl FINAL;
|
||||
SELECT * FROM table_with_ttl FORMAT PrettyCompact;
|
||||
```
|
||||
Second row was deleted from table.
|
||||
|
||||
```text
|
||||
┌─────────event_time────┬──UserID─┬─────Comment──┐
|
||||
│ 2020-12-11 12:44:57 │ 1 │ username1 │
|
||||
└───────────────────────┴─────────┴──────────────┘
|
||||
```
|
||||
|
||||
Now remove table `TTL` with the following query:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
Re-insert the deleted row and force the `TTL` cleanup again with `OPTIMIZE`:
|
||||
|
||||
```sql
|
||||
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
|
||||
OPTIMIZE TABLE table_with_ttl FINAL;
|
||||
SELECT * FROM table_with_ttl FORMAT PrettyCompact;
|
||||
```
|
||||
|
||||
The `TTL` is no longer there, so the second row is not deleted:
|
||||
|
||||
```text
|
||||
┌─────────event_time────┬──UserID─┬─────Comment──┐
|
||||
│ 2020-12-11 12:44:57 │ 1 │ username1 │
|
||||
│ 2020-08-11 12:44:57 │ 2 │ username2 │
|
||||
└───────────────────────┴─────────┴──────────────┘
|
||||
```
|
||||
|
||||
### See Also
|
||||
|
||||
- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression).
|
||||
- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column).
|
||||
|
@ -13,9 +13,7 @@ Basic query format:
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
You can specify a list of columns to insert using the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax.
|
||||
|
||||
Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax.
|
||||
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
|
||||
|
||||
For example, consider the table:
|
||||
|
||||
@ -23,9 +21,8 @@ For example, consider the table:
|
||||
SHOW CREATE insert_select_testtable;
|
||||
```
|
||||
|
||||
```
|
||||
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ CREATE TABLE insert_select_testtable
|
||||
```text
|
||||
CREATE TABLE insert_select_testtable
|
||||
(
|
||||
`a` Int8,
|
||||
`b` String,
|
||||
@ -33,8 +30,7 @@ SHOW CREATE insert_select_testtable;
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY a
|
||||
SETTINGS index_granularity = 8192 │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
|
21
docs/en/sql-reference/statements/select/all.md
Normal file
21
docs/en/sql-reference/statements/select/all.md
Normal file
@ -0,0 +1,21 @@
|
||||
---
|
||||
toc_title: ALL
|
||||
---
|
||||
|
||||
# ALL Clause {#select-all}
|
||||
|
||||
`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
|
||||
|
||||
- If `ALL` specified, ignore it.
|
||||
- If both `ALL` and `DISTINCT` specified, exception will be thrown.
|
||||
|
||||
`ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
|
||||
|
||||
```sql
|
||||
SELECT sum(ALL number) FROM numbers(10);
|
||||
```
|
||||
equals to
|
||||
|
||||
```sql
|
||||
SELECT sum(number) FROM numbers(10);
|
||||
```
|
@ -18,10 +18,6 @@ It is possible to obtain the same result by applying [GROUP BY](../../../sql-ref
|
||||
- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read.
|
||||
- Data blocks are output as they are processed, without waiting for the entire query to finish running.
|
||||
|
||||
## Limitations {#limitations}
|
||||
|
||||
`DISTINCT` is not supported if `SELECT` has at least one array column.
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
ClickHouse supports using the `DISTINCT` and `ORDER BY` clauses for different columns in one query. The `DISTINCT` clause is executed before the `ORDER BY` clause.
|
||||
|
@ -44,9 +44,9 @@ Además, necesitamos descargar macOS X SDK en el árbol de trabajo.
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
|
||||
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
```
|
||||
|
||||
# Construir ClickHouse {#build-clickhouse}
|
||||
|
@ -13,6 +13,7 @@ toc_title: Client Libraries
|
||||
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
|
||||
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
|
||||
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
|
||||
- [asynch](https://github.com/long2ice/asynch)
|
||||
- PHP
|
||||
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
|
||||
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)
|
||||
|
@ -44,9 +44,9 @@ En outre, nous devons télécharger macOS X SDK dans l'arbre de travail.
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
|
||||
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
```
|
||||
|
||||
# Construire ClickHouse {#build-clickhouse}
|
||||
|
@ -15,6 +15,7 @@ toc_title: "Biblioth\xE8ques Clientes"
|
||||
- [clickhouse-chauffeur](https://github.com/mymarilyn/clickhouse-driver)
|
||||
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
|
||||
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
|
||||
- [asynch](https://github.com/long2ice/asynch)
|
||||
- PHP
|
||||
- [smi2 / phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
|
||||
- [8bitov / clickhouse-PHP-client](https://packagist.org/packages/8bitov/clickhouse-php-client)
|
||||
|
@ -45,9 +45,9 @@ make install
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
|
||||
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
|
||||
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
|
||||
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
```
|
||||
|
||||
# ビルドClickHouse {#build-clickhouse}
|
||||
|
@ -15,6 +15,7 @@ toc_title: "\u30AF\u30E9\u30A4\u30A2\u30F3\u30C8"
|
||||
- [clickhouse-ドライバ](https://github.com/mymarilyn/clickhouse-driver)
|
||||
- [clickhouse-クライアント](https://github.com/yurial/clickhouse-client)
|
||||
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
|
||||
- [asynch](https://github.com/long2ice/asynch)
|
||||
- PHP
|
||||
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
|
||||
- [8bitov/clickhouse-php-クライアント](https://packagist.org/packages/8bitov/clickhouse-php-client)
|
||||
|
@ -133,7 +133,7 @@ ClickHouse имеет сильную типизацию, поэтому нет
|
||||
|
||||
## Агрегатные функции {#aggregate-functions}
|
||||
|
||||
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
|
||||
Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
|
||||
|
||||
Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание.
|
||||
|
||||
|
@ -77,17 +77,19 @@ ORDER BY expr
|
||||
|
||||
- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (необязательные):
|
||||
|
||||
- `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
|
||||
- `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
|
||||
- `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
|
||||
- `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
|
||||
- `min_index_granularity_bytes` — минимально допустимый размер гранул данных в байтах. Значение по умолчанию — 1024b. Для обеспечения защиты от случайного создания таблиц с очень низким значением `index_granularity_bytes`. Смотрите [Хранение данных](#mergetree-data-storage).
|
||||
- `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
|
||||
- `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
|
||||
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
|
||||
- <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
|
||||
- `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
|
||||
- `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
|
||||
- `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
|
||||
- `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
|
||||
- `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
|
||||
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
|
||||
- <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
|
||||
- `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
|
||||
- `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
|
||||
- `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
|
||||
- `min_bytes_for_wide_part`, `min_rows_for_wide_part` — минимальное количество байт/строк в куске данных для хранения в формате `Wide`. Можно задать одну или обе настройки или не задавать ни одной. Подробнее см. в разделе [Хранение данных](#mergetree-data-storage).
|
||||
- `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
|
||||
- `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
|
||||
|
||||
**Пример задания секций**
|
||||
|
||||
|
@ -13,6 +13,7 @@ toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u
|
||||
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
|
||||
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
|
||||
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
|
||||
- [asynch](https://github.com/long2ice/asynch)
|
||||
- PHP
|
||||
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
|
||||
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)
|
||||
|
@ -811,23 +811,27 @@ log_query_threads=1
|
||||
|
||||
## max_compress_block_size {#max-compress-block-size}
|
||||
|
||||
Максимальный размер блоков не сжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативки. Как правило, не имеет смысла менять эту настройку.
|
||||
Максимальный размер блоков несжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативной памяти.
|
||||
|
||||
!!! note "Предупреждение"
|
||||
Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse.
|
||||
|
||||
Не путайте блоки для сжатия (кусок памяти, состоящий из байт) и блоки для обработки запроса (пачка строк из таблицы).
|
||||
|
||||
## min_compress_block_size {#min-compress-block-size}
|
||||
|
||||
Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min_compress_block_size. По умолчанию - 65 536.
|
||||
Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше `min_compress_block_size`. По умолчанию - 65 536.
|
||||
|
||||
Реальный размер блока, если несжатых данных меньше max_compress_block_size, будет не меньше этого значения и не меньше объёма данных на одну засечку.
|
||||
Реальный размер блока, если несжатых данных меньше `max_compress_block_size`, будет не меньше этого значения и не меньше объёма данных на одну засечку.
|
||||
|
||||
Рассмотрим пример. Пусть index_granularity, указанная при создании таблицы - 8192.
|
||||
Рассмотрим пример. Пусть `index_granularity`, указанная при создании таблицы - 8192.
|
||||
|
||||
Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как min_compress_block_size = 65 536, сжатый блок будет сформирован на каждые две засечки.
|
||||
Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как `min_compress_block_size` = 65 536, сжатый блок будет сформирован на каждые две засечки.
|
||||
|
||||
Пусть мы записываем столбец URL типа String (средний размер - 60 байт на значение). При записи 8192 строк, будет, в среднем, чуть меньше 500 КБ данных. Так как это больше 65 536 строк, то сжатый блок будет сформирован на каждую засечку. В этом случае, при чтении с диска данных из диапазона в одну засечку, не будет разжато лишних данных.
|
||||
|
||||
Как правило, не имеет смысла менять эту настройку.
|
||||
!!! note "Предупреждение"
|
||||
Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse.
|
||||
|
||||
## max_query_size {#settings-max_query_size}
|
||||
|
||||
@ -2339,6 +2343,45 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow;
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
|
||||
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
|
||||
|
||||
Включает или отключает перезапись всех агрегатных функций в запросе, с добавлением к ним суффикса [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull). Включите для совместимости со стандартом SQL.
|
||||
Реализуется с помощью перезаписи запросов (аналогично настройке [count_distinct_implementation](#settings-count_distinct_implementation)), чтобы получить согласованные результаты для распределенных запросов.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — выключена.
|
||||
- 1 — включена.
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим запрос с агрегирующими функциями:
|
||||
```sql
|
||||
SELECT
|
||||
SUM(-1),
|
||||
MAX(0)
|
||||
FROM system.one
|
||||
WHERE 0
|
||||
```
|
||||
|
||||
Результат запроса с настройкой `aggregate_functions_null_for_empty = 0`:
|
||||
```text
|
||||
┌─SUM(-1)─┬─MAX(0)─┐
|
||||
│ 0 │ 0 │
|
||||
└─────────┴────────┘
|
||||
```
|
||||
|
||||
Результат запроса с настройкой `aggregate_functions_null_for_empty = 1`:
|
||||
```text
|
||||
┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
|
||||
│ NULL │ NULL │
|
||||
└───────────────┴──────────────┘
|
||||
```
|
||||
|
||||
|
||||
## union_default_mode {#union-default-mode}
|
||||
|
||||
Устанавливает режим объединения результатов `SELECT` запросов. Настройка используется только при совместном использовании с [UNION](../../sql-reference/statements/select/union.md) без явного указания `UNION ALL` или `UNION DISTINCT`.
|
||||
@ -2353,6 +2396,7 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow;
|
||||
|
||||
Смотрите примеры в разделе [UNION](../../sql-reference/statements/select/union.md).
|
||||
|
||||
|
||||
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
|
||||
|
||||
Включает особую логику выполнения слияний на репликах.
|
||||
|
@ -8,7 +8,7 @@
|
||||
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — значение метрики.
|
||||
- `description` ([String](../../sql-reference/data-types/string.md)) — описание метрики.
|
||||
|
||||
Список поддержанных метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
|
||||
Список поддерживаемых метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -54,10 +54,10 @@ LIFETIME(MIN 300 MAX 360)
|
||||
При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md):
|
||||
|
||||
> - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется.
|
||||
> - Для таблиц типа MyISAM, время модификации проверяется запросом `SHOW TABLE STATUS`.
|
||||
> - Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`.
|
||||
> - Словари из других источников по умолчанию обновляются каждый раз.
|
||||
|
||||
Для источников MySQL (InnoDB), ODBC и ClickHouse можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
|
||||
Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
|
||||
|
||||
> - В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике.
|
||||
> - В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле `<invalidate_query>` настроек [источника](external-dicts-dict-sources.md).
|
||||
|
@ -12,6 +12,7 @@ toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u
|
||||
- [CLEAR COLUMN](#alter_clear-column) — сбрасывает все значения в столбце для заданной партиции;
|
||||
- [COMMENT COLUMN](#alter_comment-column) — добавляет комментарий к столбцу;
|
||||
- [MODIFY COLUMN](#alter_modify-column) — изменяет тип столбца, выражение для значения по умолчанию и TTL.
|
||||
- [MODIFY COLUMN REMOVE](#modify-remove) — удаляет какое-либо из свойств столбца.
|
||||
|
||||
Подробное описание для каждого действия приведено ниже.
|
||||
|
||||
@ -135,6 +136,28 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
|
||||
|
||||
Запрос `ALTER` на изменение столбцов реплицируется. Соответствующие инструкции сохраняются в ZooKeeper, и затем каждая реплика их применяет. Все запросы `ALTER` выполняются в одном и том же порядке. Запрос ждёт выполнения соответствующих действий на всех репликах. Но при этом, запрос на изменение столбцов в реплицируемой таблице можно прервать, и все действия будут осуществлены асинхронно.
|
||||
|
||||
## MODIFY COLUMN REMOVE {#modify-remove}
|
||||
|
||||
Удаляет какое-либо из свойств столбца: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
|
||||
|
||||
Синтаксис:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY column_name REMOVE property;
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
Удаление свойства TTL:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
## Смотрите также
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}
|
||||
|
||||
Запрос `ALTER` позволяет создавать и удалять отдельные элементы (столбцы) вложенных структур данных, но не вложенные структуры данных целиком. Для добавления вложенной структуры данных, вы можете добавить столбцы с именем вида `name.nested_name` и типом `Array(T)` - вложенная структура данных полностью эквивалентна нескольким столбцам-массивам с именем, имеющим одинаковый префикс до точки.
|
||||
|
@ -288,7 +288,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
|
||||
Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать:
|
||||
|
||||
- Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../../../operations/system-tables/parts.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`.
|
||||
- Произвольное выражение из столбцов исходной таблицы. Также поддерживаются константы и константные выражения. Например, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
|
||||
- Кортеж из выражений или констант, совпадающий (в типах) с кортежем партиционирования. В случае ключа партиционирования из одного элемента, выражение следует обернуть в функцию `tuple(...)`. Например, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
|
||||
- Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
|
||||
- Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
|
||||
@ -306,4 +306,4 @@ OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;
|
||||
|
||||
Примеры запросов `ALTER ... PARTITION` можно посмотреть в тестах: [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) и [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
|
||||
|
@ -5,10 +5,82 @@ toc_title: TTL
|
||||
|
||||
# Манипуляции с TTL таблицы {#manipuliatsii-s-ttl-tablitsy}
|
||||
|
||||
## MODIFY TTL {#modify-ttl}
|
||||
|
||||
Вы можете изменить [TTL для таблицы](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl) запросом следующего вида:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table-name MODIFY TTL ttl-expression
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->
|
||||
## REMOVE TTL {#remove-ttl}
|
||||
|
||||
Удалить табличный TTL можно запросом следующего вида:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name REMOVE TTL
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
Создадим таблицу с табличным `TTL` и заполним её данными:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_ttl
|
||||
(
|
||||
event_time DateTime,
|
||||
UserID UInt64,
|
||||
Comment String
|
||||
)
|
||||
ENGINE MergeTree()
|
||||
ORDER BY tuple()
|
||||
TTL event_time + INTERVAL 3 MONTH;
|
||||
SETTINGS min_bytes_for_wide_part = 0;
|
||||
|
||||
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
|
||||
|
||||
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
|
||||
```
|
||||
|
||||
Выполним `OPTIMIZE` для принудительной очистки по `TTL`:
|
||||
|
||||
```sql
|
||||
OPTIMIZE TABLE table_with_ttl FINAL;
|
||||
SELECT * FROM table_with_ttl;
|
||||
```
|
||||
В результате видно, что вторая строка удалена.
|
||||
|
||||
```text
|
||||
┌─────────event_time────┬──UserID─┬─────Comment──┐
|
||||
│ 2020-12-11 12:44:57 │ 1 │ username1 │
|
||||
└───────────────────────┴─────────┴──────────────┘
|
||||
```
|
||||
|
||||
Удаляем табличный `TTL`:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_with_ttl REMOVE TTL;
|
||||
```
|
||||
|
||||
Заново вставляем удаленную строку и снова принудительно запускаем очистку по `TTL` с помощью `OPTIMIZE`:
|
||||
|
||||
```sql
|
||||
INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
|
||||
OPTIMIZE TABLE table_with_ttl FINAL;
|
||||
SELECT * FROM table_with_ttl;
|
||||
```
|
||||
|
||||
`TTL` больше нет, поэтому данные не удаляются:
|
||||
|
||||
```text
|
||||
┌─────────event_time────┬──UserID─┬─────Comment──┐
|
||||
│ 2020-12-11 12:44:57 │ 1 │ username1 │
|
||||
│ 2020-08-11 12:44:57 │ 2 │ username2 │
|
||||
└───────────────────────┴─────────┴──────────────┘
|
||||
```
|
||||
|
||||
### Смотрите также
|
||||
|
||||
- Подробнее о [свойстве TTL](../../../engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-ttl).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->
|
||||
|
@ -13,9 +13,7 @@ toc_title: INSERT INTO
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`.
|
||||
|
||||
Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`.
|
||||
Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`.
|
||||
|
||||
В качестве примера рассмотрим таблицу:
|
||||
|
||||
|
@ -18,10 +18,6 @@ toc_title: DISTINCT
|
||||
- Когда секция [ORDER BY](order-by.md) опущена, а секция [LIMIT](limit.md) присутствует, запрос прекращает выполнение сразу после считывания необходимого количества различных строк.
|
||||
- Блоки данных выводятся по мере их обработки, не дожидаясь завершения выполнения всего запроса.
|
||||
|
||||
## Ограничения {#limitations}
|
||||
|
||||
`DISTINCT` не поддерживается, если `SELECT` имеет по крайней мере один столбец-массив.
|
||||
|
||||
## Примеры {#examples}
|
||||
|
||||
ClickHouse поддерживает использование секций `DISTINCT` и `ORDER BY` для разных столбцов в одном запросе. Секция `DISTINCT` выполняется до секции `ORDER BY`.
|
||||
|
@ -33,8 +33,8 @@ cd cctools-port/cctools
|
||||
make install
|
||||
|
||||
cd ${CCTOOLS}
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
|
||||
tar xJf MacOSX10.14.sdk.tar.xz
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
|
||||
tar xJf MacOSX10.15.sdk.tar.xz
|
||||
```
|
||||
|
||||
# 编译 ClickHouse {#bian-yi-clickhouse}
|
||||
@ -46,7 +46,7 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_SYSTEM_NAME=Darwin \
|
||||
-DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \
|
||||
-DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \
|
||||
-DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld \
|
||||
-DSDK_PATH=${CCTOOLS}/MacOSX10.14.sdk
|
||||
-DSDK_PATH=${CCTOOLS}/MacOSX10.15.sdk
|
||||
ninja -C build-osx
|
||||
```
|
||||
|
||||
|
@ -13,6 +13,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
|
||||
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
|
||||
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
|
||||
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
|
||||
- [asynch](https://github.com/long2ice/asynch)
|
||||
- PHP
|
||||
- [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
|
||||
- [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)
|
||||
|
@ -22,9 +22,35 @@ toc_title: "\u7CFB\u7EDF\u8868"
|
||||
|
||||
大多数系统表将数据存储在RAM中。 ClickHouse服务器在开始时创建此类系统表。
|
||||
|
||||
与其他系统表不同,系统表 [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) 由 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表,ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改,则ClickHouse会重命名当前表并创建一个新表。
|
||||
与其他系统表不同,系统日志表 [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log), [part_log](../../operations/system-tables/part_log.md#system.part_log), crash_log and text_log 默认采用[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表,ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改,则ClickHouse会重命名当前表并创建一个新表。
|
||||
|
||||
默认情况下,表增长是无限的。 要控制表的大小,可以使用 [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。
|
||||
用户可以通过在`/etc/clickhouse-server/config.d/`下创建与系统表同名的配置文件, 或者在`/etc/clickhouse-server/config.xml`中设置相应配置项,来自定义系统日志表的结构。可以自定义的配置项如下:
|
||||
|
||||
- `database`: 系统日志表所在的数据库。这个选项目前已经废弃。所有的系统日表都位于`system`库中。
|
||||
- `table`: 系统日志表名。
|
||||
- `partition_by`: 指定[PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md)表达式。
|
||||
- `ttl`: 指定系统日志表TTL选项。
|
||||
- `flush_interval_milliseconds`: 指定系统日志表数据落盘时间。
|
||||
- `engine`: 指定完整的表引擎定义。(以`ENGINE = `开始)。 这个选项与`partition_by`以及`ttl`冲突。如果两者一起设置,服务启动时会抛出异常并且退出。
|
||||
|
||||
一个配置定义的例子如下:
|
||||
|
||||
```
|
||||
<yandex>
|
||||
<query_log>
|
||||
<database>system</database>
|
||||
<table>query_log</table>
|
||||
<partition_by>toYYYYMM(event_date)</partition_by>
|
||||
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
|
||||
<!--
|
||||
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
|
||||
-->
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</query_log>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
默认情况下,表增长是无限的。 要控制表的大小,可以使用 TTL 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。
|
||||
|
||||
## 系统指标的来源 {#system-tables-sources-of-system-metrics}
|
||||
|
||||
|
@ -29,7 +29,7 @@ SELECT 1 - 0.9
|
||||
|
||||
- 当一行行阅读浮点数的时候,浮点数的结果可能不是机器最近显示的数值。
|
||||
|
||||
## 南和Inf {#data_type-float-nan-inf}
|
||||
## NaN和Inf {#data_type-float-nan-inf}
|
||||
|
||||
与标准SQL相比,ClickHouse 支持以下类别的浮点数:
|
||||
|
||||
|
@ -318,6 +318,10 @@ else ()
|
||||
if (USE_GDB_ADD_INDEX)
|
||||
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM)
|
||||
endif()
|
||||
|
||||
if (USE_BINARY_HASH)
|
||||
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM)
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if (ENABLE_TESTS AND USE_GTEST)
|
||||
|
@ -801,7 +801,8 @@ private:
|
||||
connection->setDefaultDatabase(connection_parameters.default_database);
|
||||
ReadBufferFromFile in(queries_file);
|
||||
readStringUntilEOF(text, in);
|
||||
processMultiQuery(text);
|
||||
if (!processMultiQuery(text))
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -984,7 +985,8 @@ private:
|
||||
|
||||
if (query_fuzzer_runs)
|
||||
{
|
||||
processWithFuzzing(full_query);
|
||||
if (!processWithFuzzing(full_query))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1034,7 +1036,8 @@ private:
|
||||
}
|
||||
|
||||
|
||||
void processWithFuzzing(const String & text)
|
||||
/// Returns false when server is not available.
|
||||
bool processWithFuzzing(const String & text)
|
||||
{
|
||||
ASTPtr orig_ast;
|
||||
|
||||
@ -1052,7 +1055,7 @@ private:
|
||||
if (!orig_ast)
|
||||
{
|
||||
// Can't continue after a parsing error
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Don't repeat inserts, the tables grow too big. Also don't repeat
|
||||
@ -1147,7 +1150,7 @@ private:
|
||||
// Probably the server is dead because we found an assertion
|
||||
// failure. Fail fast.
|
||||
fmt::print(stderr, "Lost connection to the server\n");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// The server is still alive so we're going to continue fuzzing.
|
||||
@ -1173,6 +1176,8 @@ private:
|
||||
fuzz_base = ast_to_process;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void processTextAsSingleQuery(const String & text_)
|
||||
|
@ -273,11 +273,12 @@ try
|
||||
global_context->setCurrentDatabase(default_database);
|
||||
applyCmdOptions(*global_context);
|
||||
|
||||
String path = global_context->getPath();
|
||||
if (!path.empty())
|
||||
if (config().has("path"))
|
||||
{
|
||||
String path = global_context->getPath();
|
||||
|
||||
/// Lock path directory before read
|
||||
status.emplace(global_context->getPath() + "status", StatusFile::write_full_info);
|
||||
status.emplace(path + "status", StatusFile::write_full_info);
|
||||
|
||||
LOG_DEBUG(log, "Loading metadata from {}", path);
|
||||
Poco::File(path + "data/").createDirectories();
|
||||
@ -288,7 +289,7 @@ try
|
||||
DatabaseCatalog::instance().loadDatabases();
|
||||
LOG_DEBUG(log, "Loaded metadata.");
|
||||
}
|
||||
else
|
||||
else if (!config().has("no-system-tables"))
|
||||
{
|
||||
attachSystemTables(*global_context);
|
||||
}
|
||||
@ -540,6 +541,7 @@ void LocalServer::init(int argc, char ** argv)
|
||||
("logger.log", po::value<std::string>(), "Log file name")
|
||||
("logger.level", po::value<std::string>(), "Log level")
|
||||
("ignore-error", "do not stop processing if a query failed")
|
||||
("no-system-tables", "do not attach system tables (better startup time)")
|
||||
("version,V", "print version information and exit")
|
||||
;
|
||||
|
||||
@ -602,6 +604,8 @@ void LocalServer::init(int argc, char ** argv)
|
||||
config().setString("logger.level", options["logger.level"].as<std::string>());
|
||||
if (options.count("ignore-error"))
|
||||
config().setBool("ignore-error", true);
|
||||
if (options.count("no-system-tables"))
|
||||
config().setBool("no-system-tables", true);
|
||||
|
||||
std::vector<std::string> arguments;
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
|
@ -18,6 +18,7 @@
|
||||
#endif
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
|
||||
#include <common/phdr_cache.h>
|
||||
#include <ext/scope_guard.h>
|
||||
@ -62,6 +63,14 @@ int mainEntryClickHouseStatus(int argc, char ** argv);
|
||||
int mainEntryClickHouseRestart(int argc, char ** argv);
|
||||
#endif
|
||||
|
||||
int mainEntryClickHouseHashBinary(int, char **)
|
||||
{
|
||||
/// Intentionally without newline. So you can run:
|
||||
/// objcopy --add-section .note.ClickHouse.hash=<(./clickhouse hash-binary) clickhouse
|
||||
std::cout << getHashOfLoadedBinaryHex();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
|
||||
|
||||
namespace
|
||||
@ -110,6 +119,7 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
|
||||
{"status", mainEntryClickHouseStatus},
|
||||
{"restart", mainEntryClickHouseRestart},
|
||||
#endif
|
||||
{"hash-binary", mainEntryClickHouseHashBinary},
|
||||
};
|
||||
|
||||
|
||||
|
@ -65,6 +65,8 @@
|
||||
#include <Server/TCPHandlerFactory.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
#include <Common/Elf.h>
|
||||
#include <Server/MySQLHandlerFactory.h>
|
||||
#include <Server/PostgreSQLHandlerFactory.h>
|
||||
#include <Server/ProtocolServerAdapter.h>
|
||||
@ -184,6 +186,7 @@ namespace ErrorCodes
|
||||
extern const int FAILED_TO_GETPWUID;
|
||||
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
|
||||
extern const int NETWORK_ERROR;
|
||||
extern const int CORRUPTED_DATA;
|
||||
}
|
||||
|
||||
|
||||
@ -436,7 +439,44 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
std::string executable_path = getExecutablePath();
|
||||
if (executable_path.empty())
|
||||
|
||||
if (!executable_path.empty())
|
||||
{
|
||||
/// Integrity check based on checksum of the executable code.
|
||||
/// Note: it is not intended to protect from malicious party,
|
||||
/// because the reference checksum can be easily modified as well.
|
||||
/// And we don't involve asymmetric encryption with PKI yet.
|
||||
/// It's only intended to protect from faulty hardware.
|
||||
/// Note: it is only based on machine code.
|
||||
/// But there are other sections of the binary (e.g. exception handling tables)
|
||||
/// that are interpreted (not executed) but can alter the behaviour of the program as well.
|
||||
|
||||
String calculated_binary_hash = getHashOfLoadedBinaryHex();
|
||||
|
||||
if (stored_binary_hash.empty())
|
||||
{
|
||||
LOG_WARNING(log, "Calculated checksum of the binary: {}."
|
||||
" There is no information about the reference checksum.", calculated_binary_hash);
|
||||
}
|
||||
else if (calculated_binary_hash == stored_binary_hash)
|
||||
{
|
||||
LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::CORRUPTED_DATA,
|
||||
"Calculated checksum of the ClickHouse binary ({0}) does not correspond"
|
||||
" to the reference checksum stored in the binary ({1})."
|
||||
" It may indicate one of the following:"
|
||||
" - the file {2} was changed just after startup;"
|
||||
" - the file {2} is damaged on disk due to faulty hardware;"
|
||||
" - the loaded executable is damaged in memory due to faulty hardware;"
|
||||
" - the file {2} was intentionally modified;"
|
||||
" - logical error in code."
|
||||
, calculated_binary_hash, stored_binary_hash, executable_path);
|
||||
}
|
||||
}
|
||||
else
|
||||
executable_path = "/usr/bin/clickhouse"; /// It is used for information messages.
|
||||
|
||||
/// After full config loaded
|
||||
|
@ -676,7 +676,7 @@
|
||||
<database>system</database>
|
||||
<table>query_log</table>
|
||||
<!--
|
||||
PARTITION BY expr: https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
|
||||
PARTITION BY expr: https://clickhouse.yandex/docs/en/table_engines/mergetree-family/custom_partitioning_key/
|
||||
Example:
|
||||
event_date
|
||||
toMonday(event_date)
|
||||
|
@ -287,7 +287,7 @@
|
||||
</div>
|
||||
<div id="run_div">
|
||||
<button class="shadow" id="run">Run</button>
|
||||
<span class="hint"> (Ctrl+Enter)</span>
|
||||
<span class="hint"> (Ctrl/Cmd+Enter)</span>
|
||||
<span id="hourglass">⧗</span>
|
||||
<span id="check-mark">✔</span>
|
||||
<span id="stats"></span>
|
||||
@ -424,10 +424,10 @@
|
||||
post();
|
||||
}
|
||||
|
||||
document.onkeypress = function(event)
|
||||
document.onkeydown = function(event)
|
||||
{
|
||||
/// Firefox has code 13 for Enter and Chromium has code 10.
|
||||
if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) {
|
||||
if ((event.metaKey || event.ctrlKey) && (event.keyCode == 13 || event.keyCode == 10)) {
|
||||
post();
|
||||
}
|
||||
}
|
||||
|
@ -112,7 +112,6 @@ class GroupArrayNumericImpl final
|
||||
{
|
||||
using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
|
||||
static constexpr bool limit_num_elems = Trait::has_limit;
|
||||
DataTypePtr & data_type;
|
||||
UInt64 max_elems;
|
||||
UInt64 seed;
|
||||
|
||||
@ -121,7 +120,6 @@ public:
|
||||
const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
|
||||
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
|
||||
{data_type_}, {})
|
||||
, data_type(this->argument_types[0])
|
||||
, max_elems(max_elems_)
|
||||
, seed(seed_)
|
||||
{
|
||||
@ -129,7 +127,7 @@ public:
|
||||
|
||||
String getName() const override { return getNameByTrait<Trait>(); }
|
||||
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(this->argument_types[0]); }
|
||||
|
||||
void insert(Data & a, const T & v, Arena * arena) const
|
||||
{
|
||||
|
@ -168,7 +168,7 @@ public:
|
||||
{
|
||||
for (const auto & x : small)
|
||||
{
|
||||
if (rb->contains(static_cast<Value>(x.getValue())))
|
||||
if (r1.rb->contains(static_cast<Value>(x.getValue())))
|
||||
buffer.push_back(x.getValue());
|
||||
}
|
||||
|
||||
@ -264,7 +264,7 @@ public:
|
||||
{
|
||||
for (const auto & x : small)
|
||||
{
|
||||
if (rb->contains(static_cast<Value>(x.getValue())))
|
||||
if (r1.rb->contains(static_cast<Value>(x.getValue())))
|
||||
++ret;
|
||||
}
|
||||
}
|
||||
@ -419,7 +419,7 @@ public:
|
||||
if (isSmall())
|
||||
return small.find(x) != small.end();
|
||||
else
|
||||
return rb->contains(x);
|
||||
return rb->contains(static_cast<Value>(x));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -613,7 +613,7 @@ public:
|
||||
/**
|
||||
* Replace value
|
||||
*/
|
||||
void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num)
|
||||
void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
|
||||
{
|
||||
if (isSmall())
|
||||
toLarge();
|
||||
@ -622,9 +622,9 @@ public:
|
||||
{
|
||||
if (from_vals[i] == to_vals[i])
|
||||
continue;
|
||||
bool changed = rb->removeChecked(from_vals[i]);
|
||||
bool changed = rb->removeChecked(static_cast<Value>(from_vals[i]));
|
||||
if (changed)
|
||||
rb->add(to_vals[i]);
|
||||
rb->add(static_cast<Value>(to_vals[i]));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>());
|
||||
return std::make_shared<DataTypeArray>(this->argument_types[0]);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
|
@ -19,12 +19,12 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
struct ComparePairFirst final
|
||||
struct ComparePair final
|
||||
{
|
||||
template <typename T1, typename T2>
|
||||
bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
|
||||
{
|
||||
return lhs.first < rhs.first;
|
||||
return lhs.first == rhs.first ? lhs.second < rhs.second : lhs.first < rhs.first;
|
||||
}
|
||||
};
|
||||
|
||||
@ -33,8 +33,8 @@ template <typename T>
|
||||
struct AggregateFunctionWindowFunnelData
|
||||
{
|
||||
using TimestampEvent = std::pair<T, UInt8>;
|
||||
using TimestampEvents = PODArray<TimestampEvent, 64>;
|
||||
using Comparator = ComparePairFirst;
|
||||
using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
|
||||
using Comparator = ComparePair;
|
||||
|
||||
bool sorted = true;
|
||||
TimestampEvents events_list;
|
||||
@ -47,8 +47,13 @@ struct AggregateFunctionWindowFunnelData
|
||||
void add(T timestamp, UInt8 event)
|
||||
{
|
||||
// Since most events should have already been sorted by timestamp.
|
||||
if (sorted && events_list.size() > 0 && events_list.back().first > timestamp)
|
||||
sorted = false;
|
||||
if (sorted && events_list.size() > 0)
|
||||
{
|
||||
if (events_list.back().first == timestamp)
|
||||
sorted = events_list.back().second <= event;
|
||||
else
|
||||
sorted = events_list.back().first <= timestamp;
|
||||
}
|
||||
events_list.emplace_back(timestamp, event);
|
||||
}
|
||||
|
||||
|
@ -670,4 +670,32 @@ ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction &
|
||||
{
|
||||
}
|
||||
|
||||
MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
|
||||
{
|
||||
if (size == 0)
|
||||
return cloneEmpty();
|
||||
|
||||
size_t from_size = data.size();
|
||||
|
||||
if (size <= from_size)
|
||||
{
|
||||
auto res = createView();
|
||||
auto & res_data = res->data;
|
||||
res_data.assign(data.begin(), data.begin() + size);
|
||||
return res;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Create a new column to return.
|
||||
MutableColumnPtr cloned_col = cloneEmpty();
|
||||
auto * res = typeid_cast<ColumnAggregateFunction *>(cloned_col.get());
|
||||
|
||||
res->insertRangeFrom(*this, 0, from_size);
|
||||
for (size_t i = from_size; i < size; ++i)
|
||||
res->insertDefault();
|
||||
|
||||
return cloned_col;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -215,7 +215,7 @@ public:
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
|
||||
bool structureEquals(const IColumn &) const override;
|
||||
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
41
src/Common/DirectorySyncGuard.cpp
Normal file
41
src/Common/DirectorySyncGuard.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
#include <Common/DirectorySyncGuard.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <fcntl.h> // O_RDWR
|
||||
|
||||
/// OSX does not have O_DIRECTORY
|
||||
#ifndef O_DIRECTORY
|
||||
#define O_DIRECTORY O_RDWR
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_FSYNC;
|
||||
}
|
||||
|
||||
DirectorySyncGuard::DirectorySyncGuard(const DiskPtr & disk_, const String & path)
|
||||
: disk(disk_)
|
||||
, fd(disk_->open(path, O_DIRECTORY))
|
||||
{}
|
||||
|
||||
DirectorySyncGuard::~DirectorySyncGuard()
|
||||
{
|
||||
try
|
||||
{
|
||||
#if defined(OS_DARWIN)
|
||||
if (fcntl(fd, F_FULLFSYNC, 0))
|
||||
throwFromErrno("Cannot fcntl(F_FULLFSYNC)", ErrorCodes::CANNOT_FSYNC);
|
||||
#endif
|
||||
disk->sync(fd);
|
||||
disk->close(fd);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,36 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <Disks/IDisk.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IDisk;
|
||||
using DiskPtr = std::shared_ptr<IDisk>;
|
||||
|
||||
/// Helper class, that receives file descriptor and does fsync for it in destructor.
|
||||
/// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end.
|
||||
/// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version.
|
||||
/// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496
|
||||
class FileSyncGuard
|
||||
class DirectorySyncGuard
|
||||
{
|
||||
public:
|
||||
/// NOTE: If you have already opened descriptor, it's preferred to use
|
||||
/// this constructor instead of constructor with path.
|
||||
FileSyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {}
|
||||
|
||||
FileSyncGuard(const DiskPtr & disk_, const String & path)
|
||||
: disk(disk_), fd(disk_->open(path, O_RDWR)) {}
|
||||
|
||||
~FileSyncGuard()
|
||||
{
|
||||
try
|
||||
{
|
||||
disk->sync(fd);
|
||||
disk->close(fd);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
DirectorySyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {}
|
||||
DirectorySyncGuard(const DiskPtr & disk_, const std::string & path);
|
||||
~DirectorySyncGuard();
|
||||
|
||||
private:
|
||||
DiskPtr disk;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user