Merge branch 'master' into fasttest

This commit is contained in:
alesapin 2020-07-14 15:19:05 +03:00
commit 0740b88d6b
480 changed files with 9992 additions and 4049 deletions

2
.gitmodules vendored
View File

@ -49,7 +49,7 @@
url = https://github.com/ClickHouse-Extras/boost.git url = https://github.com/ClickHouse-Extras/boost.git
[submodule "contrib/base64"] [submodule "contrib/base64"]
path = contrib/base64 path = contrib/base64
url = https://github.com/powturbo/Turbo-Base64.git url = https://github.com/ClickHouse-Extras/Turbo-Base64.git
[submodule "contrib/arrow"] [submodule "contrib/arrow"]
path = contrib/arrow path = contrib/arrow
url = https://github.com/apache/arrow url = https://github.com/apache/arrow

View File

@ -12,6 +12,20 @@ foreach(policy
endif() endif()
endforeach() endforeach()
# set default policy
foreach(default_policy_var_name
# make option() honor normal variables for BUILD_SHARED_LIBS:
# - re2
# - snappy
CMAKE_POLICY_DEFAULT_CMP0077
# Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should
# set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_<CONFIG>)? over
# INTERFACE_LINK_LIBRARIES.
CMAKE_POLICY_DEFAULT_CMP0022
)
set(${default_policy_var_name} NEW)
endforeach()
project(ClickHouse) project(ClickHouse)
include (cmake/arch.cmake) include (cmake/arch.cmake)
@ -378,10 +392,6 @@ include (cmake/find/mysqlclient.cmake)
# When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc. # When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc.
if (OS_LINUX AND NOT ENABLE_JEMALLOC)
message (WARNING "Non default allocator is disabled. This is not recommended for production Linux builds.")
endif ()
if (USE_OPENCL) if (USE_OPENCL)
if (OS_DARWIN) if (OS_DARWIN)
set(OPENCL_LINKER_FLAGS "-framework OpenCL") set(OPENCL_LINKER_FLAGS "-framework OpenCL")
@ -397,6 +407,10 @@ endif ()
add_subdirectory (contrib EXCLUDE_FROM_ALL) add_subdirectory (contrib EXCLUDE_FROM_ALL)
if (NOT ENABLE_JEMALLOC)
message (WARNING "Non default allocator is disabled. This is not recommended for production builds.")
endif ()
macro (add_executable target) macro (add_executable target)
# invoke built-in add_executable # invoke built-in add_executable
# explicitly acquire and interpose malloc symbols by clickhouse_malloc # explicitly acquire and interpose malloc symbols by clickhouse_malloc

View File

@ -18,3 +18,4 @@ ClickHouse is an open-source column-oriented database management system that all
* [ClickHouse for genetic data (in Russian)](https://cloud.yandex.ru/events/152) on July 14, 2020. * [ClickHouse for genetic data (in Russian)](https://cloud.yandex.ru/events/152) on July 14, 2020.
* [ClickHouse virtual office hours](https://www.eventbrite.com/e/clickhouse-july-virtual-meetup-tickets-111199787558) on July 15, 2020. * [ClickHouse virtual office hours](https://www.eventbrite.com/e/clickhouse-july-virtual-meetup-tickets-111199787558) on July 15, 2020.
* [ClickHouse at ByteDance (in Chinese)](https://mp.weixin.qq.com/s/Em-HjPylO8D7WPui4RREAQ) on July 17, 2020.

View File

@ -18,5 +18,4 @@ currently being supported with security updates:
## Reporting a Vulnerability ## Reporting a Vulnerability
To report a potential vulnerability in ClickHouse please use the security advisory feature of GitHub: To report a potential vulnerability in ClickHouse please send the details about it to [clickhouse-feedback@yandex-team.com](mailto:clickhouse-feedback@yandex-team.com).
https://github.com/ClickHouse/ClickHouse/security/advisories

View File

@ -163,7 +163,8 @@ public:
enum Signals : int enum Signals : int
{ {
StdTerminate = -1, StdTerminate = -1,
StopThread = -2 StopThread = -2,
SanitizerTrap = -3,
}; };
explicit SignalListener(BaseDaemon & daemon_) explicit SignalListener(BaseDaemon & daemon_)
@ -223,8 +224,12 @@ public:
std::string query_id; std::string query_id;
DB::ThreadStatus * thread_ptr{}; DB::ThreadStatus * thread_ptr{};
DB::readPODBinary(info, in); if (sig != SanitizerTrap)
DB::readPODBinary(context, in); {
DB::readPODBinary(info, in);
DB::readPODBinary(context, in);
}
DB::readPODBinary(stack_trace, in); DB::readPODBinary(stack_trace, in);
DB::readBinary(thread_num, in); DB::readBinary(thread_num, in);
DB::readBinary(query_id, in); DB::readBinary(query_id, in);
@ -279,7 +284,14 @@ private:
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, query_id, strsignal(sig), sig); VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, query_id, strsignal(sig), sig);
} }
LOG_FATAL(log, signalToErrorMessage(sig, info, context)); String error_message;
if (sig != SanitizerTrap)
error_message = signalToErrorMessage(sig, info, context);
else
error_message = "Sanitizer trap.";
LOG_FATAL(log, error_message);
if (stack_trace.getSize()) if (stack_trace.getSize())
{ {
@ -305,12 +317,12 @@ private:
String build_id_hex{}; String build_id_hex{};
#endif #endif
SentryWriter::onFault(sig, info, context, stack_trace, build_id_hex); if (sig != SanitizerTrap)
SentryWriter::onFault(sig, error_message, stack_trace, build_id_hex);
/// When everything is done, we will try to send these error messages to client. /// When everything is done, we will try to send these error messages to client.
if (thread_ptr) if (thread_ptr)
thread_ptr->onFatalError(); thread_ptr->onFatalError();
} }
}; };
@ -320,35 +332,27 @@ extern "C" void __sanitizer_set_death_callback(void (*)());
static void sanitizerDeathCallback() static void sanitizerDeathCallback()
{ {
Poco::Logger * log = &Poco::Logger::get("BaseDaemon"); /// Also need to send data via pipe. Otherwise it may lead to deadlocks or failures in printing diagnostic info.
StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe. char buf[signal_pipe_buf_size];
DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
if (query_id.size == 0) const StackTrace stack_trace;
{
LOG_FATAL(log, "(version {}{}) (from thread {}) (no query) Sanitizer trap.",
VERSION_STRING, VERSION_OFFICIAL, getThreadId());
}
else
{
LOG_FATAL(log, "(version {}{}) (from thread {}) (query_id: {}) Sanitizer trap.",
VERSION_STRING, VERSION_OFFICIAL, getThreadId(), query_id);
}
/// Just in case print our own stack trace. In case when llvm-symbolizer does not work. StringRef query_id = DB::CurrentThread::getQueryId();
StackTrace stack_trace; query_id.size = std::min(query_id.size, max_query_id_size);
if (stack_trace.getSize())
{
std::stringstream bare_stacktrace;
bare_stacktrace << "Stack trace:";
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
bare_stacktrace << ' ' << stack_trace.getFramePointers()[i];
LOG_FATAL(log, bare_stacktrace.str()); int sig = SignalListener::SanitizerTrap;
} DB::writeBinary(sig, out);
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
/// Write symbolized stack trace line by line for better grep-ability. out.next();
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
/// The time that is usually enough for separate thread to print info into log.
sleepForSeconds(10);
} }
#endif #endif

View File

@ -2,23 +2,26 @@
#include <Poco/File.h> #include <Poco/File.h>
#include <Poco/Util/Application.h> #include <Poco/Util/Application.h>
#include <Poco/Util/LayeredConfiguration.h>
#include <common/defines.h> #include <common/defines.h>
#include <common/getFQDNOrHostName.h> #include <common/getFQDNOrHostName.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <Common/StackTrace.h>
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
# include "Common/config_version.h" # include "Common/config_version.h"
# include <Common/config.h> # include <Common/config.h>
#endif #endif
#if USE_SENTRY #if USE_SENTRY
# include <sentry.h> // Y_IGNORE # include <sentry.h> // Y_IGNORE
# include <stdio.h> # include <stdio.h>
# include <filesystem> # include <filesystem>
#endif
#if USE_SENTRY
namespace namespace
{ {
@ -76,12 +79,12 @@ void sentry_logger(sentry_level_t level, const char * message, va_list args)
} }
} }
} }
} }
#endif
void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config)
{ {
#if USE_SENTRY
bool enabled = false; bool enabled = false;
bool debug = config.getBool("send_crash_reports.debug", false); bool debug = config.getBool("send_crash_reports.debug", false);
auto * logger = &Poco::Logger::get("SentryWriter"); auto * logger = &Poco::Logger::get("SentryWriter");
@ -146,28 +149,19 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config)
{ {
LOG_INFO(logger, "Sending crash reports is disabled"); LOG_INFO(logger, "Sending crash reports is disabled");
} }
#else
UNUSED(config);
#endif
} }
void SentryWriter::shutdown() void SentryWriter::shutdown()
{ {
#if USE_SENTRY
if (initialized) if (initialized)
{
sentry_shutdown(); sentry_shutdown();
}
#endif
} }
void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & context, const StackTrace & stack_trace, const String & build_id_hex) void SentryWriter::onFault(int sig, const std::string & error_message, const StackTrace & stack_trace, const std::string & build_id_hex)
{ {
#if USE_SENTRY
auto * logger = &Poco::Logger::get("SentryWriter"); auto * logger = &Poco::Logger::get("SentryWriter");
if (initialized) if (initialized)
{ {
const std::string & error_message = signalToErrorMessage(sig, info, context);
sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str());
sentry_set_tag("signal", strsignal(sig)); sentry_set_tag("signal", strsignal(sig));
sentry_set_extra("signal_number", sentry_value_new_int32(sig)); sentry_set_extra("signal_number", sentry_value_new_int32(sig));
@ -240,11 +234,12 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c
{ {
LOG_INFO(logger, "Not sending crash report"); LOG_INFO(logger, "Not sending crash report");
} }
#else
UNUSED(sig);
UNUSED(info);
UNUSED(context);
UNUSED(stack_trace);
UNUSED(build_id_hex);
#endif
} }
#else
void SentryWriter::initialize(Poco::Util::LayeredConfiguration &) {}
void SentryWriter::shutdown() {}
void SentryWriter::onFault(int, const std::string &, const StackTrace &, const std::string &) {}
#endif

View File

@ -1,12 +1,12 @@
#pragma once #pragma once
#include <common/types.h>
#include <Common/StackTrace.h>
#include <Poco/Util/LayeredConfiguration.h>
#include <string> #include <string>
namespace Poco { namespace Util { class LayeredConfiguration; }}
class StackTrace;
/// \brief Sends crash reports to ClickHouse core developer team via https://sentry.io /// \brief Sends crash reports to ClickHouse core developer team via https://sentry.io
/// ///
/// This feature can enabled with "send_crash_reports.enabled" server setting, /// This feature can enabled with "send_crash_reports.enabled" server setting,
@ -14,20 +14,16 @@
/// ///
/// It is possible to send those reports to your own sentry account or account of consulting company you hired /// It is possible to send those reports to your own sentry account or account of consulting company you hired
/// by overriding "send_crash_reports.endpoint" setting. "send_crash_reports.debug" setting will allow to do that for /// by overriding "send_crash_reports.endpoint" setting. "send_crash_reports.debug" setting will allow to do that for
class SentryWriter namespace SentryWriter
{ {
public: void initialize(Poco::Util::LayeredConfiguration & config);
SentryWriter() = delete; void shutdown();
static void initialize(Poco::Util::LayeredConfiguration & config);
static void shutdown();
/// Not signal safe and can't be called from a signal handler /// Not signal safe and can't be called from a signal handler
static void onFault( void onFault(
int sig, int sig,
const siginfo_t & info, const std::string & error_message,
const ucontext_t & context,
const StackTrace & stack_trace, const StackTrace & stack_trace,
const String & build_id_hex const std::string & build_id_hex
); );
}; };

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh: # This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54436) SET(VERSION_REVISION 54437)
SET(VERSION_MAJOR 20) SET(VERSION_MAJOR 20)
SET(VERSION_MINOR 6) SET(VERSION_MINOR 7)
SET(VERSION_PATCH 1) SET(VERSION_PATCH 1)
SET(VERSION_GITHASH efc57fb063b3fb4df968d916720ec4d4ced4642e) SET(VERSION_GITHASH d64e51d1a78c1b53c33915ca0f75c97b2333844f)
SET(VERSION_DESCRIBE v20.6.1.1-prestable) SET(VERSION_DESCRIBE v20.7.1.1-prestable)
SET(VERSION_STRING 20.6.1.1) SET(VERSION_STRING 20.7.1.1)
# end of autochange # end of autochange

View File

@ -44,13 +44,8 @@ endif ()
if (USE_INTERNAL_RE2_LIBRARY) if (USE_INTERNAL_RE2_LIBRARY)
set(RE2_BUILD_TESTING 0 CACHE INTERNAL "") set(RE2_BUILD_TESTING 0 CACHE INTERNAL "")
function(re2_support) add_subdirectory (re2)
# make option() honor normal variables for BUILD_SHARED_LIBS add_subdirectory (re2_st)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
add_subdirectory (re2)
add_subdirectory (re2_st)
endfunction()
re2_support()
endif () endif ()
if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY) if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
@ -227,19 +222,11 @@ if (USE_INTERNAL_AVRO_LIBRARY)
endif() endif()
if(USE_INTERNAL_GTEST_LIBRARY) if(USE_INTERNAL_GTEST_LIBRARY)
# Wrap into function because of CMAKE_POLICY_DEFAULT_CMP0022 set(GOOGLETEST_VERSION 1.10.0) # master
function(googletest_support) # Google Test from sources
set(GOOGLETEST_VERSION 1.10.0) # master add_subdirectory(${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
# Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should # avoid problems with <regexp.h>
# set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_<CONFIG>)? over target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0)
# INTERFACE_LINK_LIBRARIES.
set(CMAKE_POLICY_DEFAULT_CMP0022 NEW)
# Google Test from sources
add_subdirectory(${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
# avoid problems with <regexp.h>
target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0)
endfunction()
googletest_support()
elseif(GTEST_SRC_DIR) elseif(GTEST_SRC_DIR)
add_subdirectory(${GTEST_SRC_DIR}/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest) add_subdirectory(${GTEST_SRC_DIR}/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
target_compile_definitions(gtest INTERFACE GTEST_HAS_POSIX_RE=0) target_compile_definitions(gtest INTERFACE GTEST_HAS_POSIX_RE=0)

View File

@ -24,7 +24,7 @@ set (SRCS
add_library(amqp-cpp ${SRCS}) add_library(amqp-cpp ${SRCS})
target_compile_options (amqp-cpp target_compile_options (amqp-cpp
PUBLIC PRIVATE
-Wno-old-style-cast -Wno-old-style-cast
-Wno-inconsistent-missing-destructor-override -Wno-inconsistent-missing-destructor-override
-Wno-deprecated -Wno-deprecated
@ -38,7 +38,7 @@ target_compile_options (amqp-cpp
-w -w
) )
target_include_directories (amqp-cpp PUBLIC ${LIBRARY_DIR}/include) target_include_directories (amqp-cpp SYSTEM PUBLIC ${LIBRARY_DIR}/include)
target_link_libraries (amqp-cpp PUBLIC ssl) target_link_libraries (amqp-cpp PUBLIC ssl)

View File

@ -20,5 +20,7 @@
#define ARROW_VERSION_PATCH #define ARROW_VERSION_PATCH
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH #define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */ #define ARROW_SO_VERSION ""
#define ARROW_FULL_SO_VERSION ""
/* #undef GRPCPP_PP_INCLUDE */ /* #undef GRPCPP_PP_INCLUDE */

2
contrib/base64 vendored

@ -1 +1 @@
Subproject commit 95ba56a9b041f9933f5cd2bbb2ee4e083468c20a Subproject commit af9b331f2b4f30b41c70f3a571ff904a8251c1d3

2
contrib/fmtlib vendored

@ -1 +1 @@
Subproject commit 297c3b2ed551a4989826fc8c4780bf533e964bd9 Subproject commit c108ee1d590089ccf642fc85652b845924067af2

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit e2131aa752d7e95441e08f9a18304c1445f2576a Subproject commit 1b666578c85094306b061352078022f6350bfab8

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.6.1.1) unstable; urgency=low clickhouse (20.7.1.1) unstable; urgency=low
* Modified source code * Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 22 Jun 2020 20:40:23 +0300 -- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 13 Jul 2020 18:25:58 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.6.1.* ARG version=20.7.1.*
RUN apt-get update \ RUN apt-get update \
&& apt-get install --yes --no-install-recommends \ && apt-get install --yes --no-install-recommends \

View File

@ -31,6 +31,10 @@
"name": "yandex/clickhouse-integration-test", "name": "yandex/clickhouse-integration-test",
"dependent": [] "dependent": []
}, },
"docker/test/fuzzer": {
"name": "yandex/clickhouse-fuzzer",
"dependent": []
},
"docker/test/performance-comparison": { "docker/test/performance-comparison": {
"name": "yandex/clickhouse-performance-comparison", "name": "yandex/clickhouse-performance-comparison",
"dependent": [] "dependent": []
@ -87,6 +91,14 @@
"docker/test/fasttest": { "docker/test/fasttest": {
"name": "yandex/clickhouse-fasttest", "name": "yandex/clickhouse-fasttest",
"dependent": [] "dependent": []
},
"docker/test/integration/s3_proxy": {
"name": "yandex/clickhouse-s3-proxy",
"dependent": []
},
"docker/test/integration/resolver": {
"name": "yandex/clickhouse-python-bottle",
"dependent": []
} }
} }

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04 FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.6.1.* ARG version=20.7.1.*
ARG gosu_ver=1.10 ARG gosu_ver=1.10
RUN apt-get update \ RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.6.1.* ARG version=20.7.1.*
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \ apt-get install -y apt-transport-https dirmngr && \

View File

@ -0,0 +1,36 @@
# docker build -t yandex/clickhouse-fuzzer .
FROM ubuntu:18.04
ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
bash \
ca-certificates \
curl \
gdb \
git \
libc6-dbg \
moreutils \
ncdu \
p7zip-full \
parallel \
psmisc \
rsync \
tree \
tzdata \
vim \
wget \
&& apt-get autoremove --yes \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
COPY * /
CMD cd /workspace \
&& /run-fuzzer.sh 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log
# docker run --network=host --volume <workspace>:/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-fuzzer

View File

@ -0,0 +1,7 @@
<yandex>
<profiles>
<default>
<max_execution_time>10</max_execution_time>
</default>
</profiles>
</yandex>

124
docker/test/fuzzer/run-fuzzer.sh Executable file
View File

@ -0,0 +1,124 @@
#!/bin/bash
set -eux
set -o pipefail
trap "exit" INT TERM
trap 'kill $(jobs -pr) ||:' EXIT
stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir"
repo_dir=ch
function clone
{
(
rm -rf ch ||:
mkdir ch
cd ch
git init
git remote add origin https://github.com/ClickHouse/ClickHouse
git fetch --depth=1 origin "$SHA_TO_TEST"
# If not master, try to fetch pull/.../{head,merge}
if [ "$PR_TO_TEST" != "0" ]
then
git fetch --depth=1 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
fi
git checkout "$SHA_TO_TEST"
)
}
function download
{
# wget -O- -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/performance/performance.tgz" \
# | tar --strip-components=1 -zxv
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-10_debug_none_bundled_unsplitted_disable_False_binary/clickhouse"
chmod +x clickhouse
}
function configure
{
rm -rf db ||:
mkdir db ||:
cp -av "$repo_dir"/programs/server/config* db
cp -av "$repo_dir"/programs/server/user* db
cp -av "$repo_dir"/tests/config db/config.d
cp -av "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
}
function watchdog
{
sleep 3600
echo "Fuzzing run has timed out"
./clickhouse client --query "select elapsed, query from system.processes" ||:
killall -9 clickhouse clickhouse-server clickhouse-client ||:
}
function fuzz
{
./clickhouse server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
server_pid=$!
kill -0 $server_pid
while ! ./clickhouse client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
./clickhouse client --query "select 1"
kill -0 $server_pid
echo Server started
fuzzer_exit_code=0
./clickhouse client --query-fuzzer-runs=1000 \
< <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \
> >(tail -100000 > fuzzer.log) \
2>&1 \
|| fuzzer_exit_code=$?
echo "Fuzzer exit code is $fuzzer_exit_code"
./clickhouse client --query "select elapsed, query from system.processes" ||:
kill -9 $server_pid ||:
return $fuzzer_exit_code
}
case "$stage" in
"")
;&
"clone")
time clone
if [ -v FUZZ_LOCAL_SCRIPT ]
then
# just fall through
echo Using the testing script from docker container
:
else
# Run the testing script from the repository
echo Using the testing script from the repository
export stage=download
# Keep the error code
time ch/docker/test/fuzzer/run-fuzzer.sh || exit $?
fi
;&
"download")
time download
;&
"configure")
time configure
;&
"fuzz")
watchdog &
watchdog_pid=$!
fuzzer_exit_code=0
time fuzz || fuzzer_exit_code=$?
kill $watchdog_pid ||:
# Debug
date
sleep 10
jobs
pstree -aspgT
exit $fuzzer_exit_code
;&
esac

View File

@ -1,4 +1,5 @@
# docker build -t yandex/clickhouse-python-bottle .
# Helper docker container to run python bottle apps # Helper docker container to run python bottle apps
FROM python:3 FROM python:3
RUN python -m pip install bottle RUN python -m pip install bottle

View File

@ -76,4 +76,3 @@ VOLUME /var/lib/docker
EXPOSE 2375 EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"] ENTRYPOINT ["dockerd-entrypoint.sh"]
CMD ["sh", "-c", "pytest $PYTEST_OPTS"] CMD ["sh", "-c", "pytest $PYTEST_OPTS"]

View File

@ -5,50 +5,37 @@ services:
image: minio/minio image: minio/minio
volumes: volumes:
- data1-1:/data1 - data1-1:/data1
- ${MINIO_CERTS_DIR:-}:/certs
ports: ports:
- "9001:9001" - "9001:9001"
environment: environment:
MINIO_ACCESS_KEY: minio MINIO_ACCESS_KEY: minio
MINIO_SECRET_KEY: minio123 MINIO_SECRET_KEY: minio123
command: server --address :9001 /data1-1 command: server --address :9001 --certs-dir /certs /data1-1
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9001/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
depends_on: depends_on:
- redirect - proxy1
- resolver - proxy2
# Redirects all requests to origin Minio. # HTTP proxies for Minio.
redirect:
image: schmunk42/nginx-redirect
volumes:
- /nginx:/nginx
environment:
- SERVER_REDIRECT=minio1:9001
- SERVER_REDIRECT_CODE=307
- SERVER_ACCESS_LOG=/nginx/access.log
# HTTP proxies for Minio.
proxy1: proxy1:
image: vimagick/tinyproxy image: yandex/clickhouse-s3-proxy
ports: ports:
- "4081:8888" - "8080" # Redirect proxy port
- "80" # Reverse proxy port
- "443" # Reverse proxy port (secure)
proxy2: proxy2:
image: vimagick/tinyproxy image: yandex/clickhouse-s3-proxy
ports: ports:
- "4082:8888" - "8080"
- "80"
- "443"
# Empty container to run proxy resolver. # Empty container to run proxy resolver.
resolver: resolver:
build: image: yandex/clickhouse-python-bottle
context: ../../../docker/test/integration/
dockerfile: resolver/Dockerfile
network: host
ports: ports:
- "4083:8080" - "8080"
tty: true tty: true
depends_on: depends_on:
- proxy1 - proxy1

View File

@ -0,0 +1,11 @@
# docker build -t yandex/clickhouse-s3-proxy .
FROM nginx:alpine
COPY run.sh /run.sh
COPY server.crt /etc/ssl/certs/server.crt
COPY server.key /etc/ssl/certs/server.key
COPY nginx.conf /etc/nginx/nginx.conf
RUN chmod +x /run.sh
CMD ["/run.sh"]

View File

@ -0,0 +1,59 @@
events {
use epoll;
worker_connections 128;
}
http {
# Docker DNS resolver
resolver 127.0.0.11;
map $http_x_forwarded_proto $redirect_scheme {
default $scheme;
https https;
}
# Redirect proxy
server {
listen 8080;
server_name proxy1 proxy2;
# To allow special characters in headers
ignore_invalid_headers off;
return 307 $redirect_scheme://${S3_HOST}:${S3_PORT}$request_uri;
}
# Reverse proxy
server {
listen 80;
listen 443 ssl;
server_name proxy1 proxy2;
ssl_certificate /etc/ssl/certs/server.crt;
ssl_certificate_key /etc/ssl/certs/server.key;
# To allow special characters in headers
ignore_invalid_headers off;
# Allow any size file to be uploaded.
# Set to a value such as 1000m; to restrict file size to a specific value
client_max_body_size 0;
# To disable buffering
proxy_buffering off;
location / {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host $http_host;
proxy_connect_timeout 300;
# Default is HTTP/1, keepalive is only enabled in HTTP/1.1
proxy_http_version 1.1;
proxy_set_header Connection "";
chunked_transfer_encoding off;
proxy_pass $scheme://${S3_HOST}:${S3_PORT};
proxy_ssl_verify off;
}
}
}

View File

@ -0,0 +1,15 @@
#!/usr/bin/env sh
if [ -z "$S3_HOST" ] ; then
S3_HOST='minio1'
fi
if [ -z "$S3_PORT" ] ; then
S3_PORT='9001'
fi
# Replace config placeholders with environment variables
sed -i "s|\${S3_HOST}|${S3_HOST}|" /etc/nginx/nginx.conf
sed -i "s|\${S3_PORT}|${S3_PORT}|" /etc/nginx/nginx.conf
exec nginx -g 'daemon off;'

View File

@ -0,0 +1,19 @@
-----BEGIN CERTIFICATE-----
MIIDBTCCAe2gAwIBAgIRANb2pr4HgR8YFwKNJMUSWiIwDQYJKoZIhvcNAQELBQAw
EjEQMA4GA1UEChMHQWNtZSBDbzAeFw0yMDA3MDkxODE1MDBaFw0yMTA3MDkxODE1
MDBaMBIxEDAOBgNVBAoTB0FjbWUgQ28wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw
ggEKAoIBAQC9ORgaBCx42ejp9PSjc0uvwH/hTB6yZvZB4S+wxbzzfeKomX/JBcFH
mGCIJJVjVV0rafv3vw+9f9u4wrZpN4HZKnVyz3mBXEA1WDvLTLV8n8zVyso1qbnf
F9Fa8wnk89b0xGWyM7jie7/cTIGMrgm7hIPaM2zDzFwIfIAqZ1AexC4vADIffF9r
cFLLjNHuv1uAc32jdfQEPluvmBMzGkz254+MabxZWIZjkYn70kNSZDoyFmMGafBt
kRTUPNq2+fGv/eLJ9Lxm3153Ja0sCyzLlEo9+/z4ERqM5zwWre4vcwfO63c5pcSC
zGw84teTpmDwSyiSR70TYJdtBGQqZvLZAgMBAAGjVjBUMA4GA1UdDwEB/wQEAwIC
pDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/MBwGA1UdEQQV
MBOCBm1pbmlvMYIJbG9jYWxob3N0MA0GCSqGSIb3DQEBCwUAA4IBAQAKU2LhvFFz
RFfUibt/WTj3rtUfKEBrQuUOYt2A8MTbC8pyEu+UJASTzunluUFze5zchEm1s3pZ
YRLcNwbJqLE6CzUxQ9b2iUhaeWuKrx4ZoPkY0uGiaXM/iKfVKTuNmhF2Sf/P4xUE
Pt19yQjpIhcicWQc37BBQFvnvy+n5wgHa/pgl1+QUvAa/fwYhF9S28xRLESzZepm
NMYysopV+YMaxcFa9SH44toXtXnvRWwVdEorlq1W3/AiJg8hDPzSa9UXLMjA968J
ONtn3qvwac9Ot53+QsXJdsMmDZLWGCi6I1w0ZQetpr/0ubaA1F3GdK9eB/S0thqU
l2VUgn3c/kKS
-----END CERTIFICATE-----

View File

@ -0,0 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC9ORgaBCx42ejp
9PSjc0uvwH/hTB6yZvZB4S+wxbzzfeKomX/JBcFHmGCIJJVjVV0rafv3vw+9f9u4
wrZpN4HZKnVyz3mBXEA1WDvLTLV8n8zVyso1qbnfF9Fa8wnk89b0xGWyM7jie7/c
TIGMrgm7hIPaM2zDzFwIfIAqZ1AexC4vADIffF9rcFLLjNHuv1uAc32jdfQEPluv
mBMzGkz254+MabxZWIZjkYn70kNSZDoyFmMGafBtkRTUPNq2+fGv/eLJ9Lxm3153
Ja0sCyzLlEo9+/z4ERqM5zwWre4vcwfO63c5pcSCzGw84teTpmDwSyiSR70TYJdt
BGQqZvLZAgMBAAECggEANe8oJ4I5CtlRwh3H/S7Hy/iaeqUvuroORwjghwpVqTGg
gV3/RlUVmkqceTG0QvP58n3rC9qxqdnfzvHw/FyN7lBj2a25fF3HD21u3aunrzX9
NJLwwAr4p9YqHjpX/6JhCrNQKVMEx8luDmTgKDETJRfIXVF7FvQQ53pVLcD03U+g
MgN61HBzfT5L0TLHoiKNQbVi+Wm1gw3zvb/a9Z1rULRZfIuKGM0bNNqRZt4rUUAV
QicklDR0Qv59jhr5Y/zjinKkqF8qudvUkaNT2JH1DLfXiAhuC0OQugMjYzNntQB4
hMhkqARnjuk/WPMvnXivnqx9o69BL5wyXIj3vD4fgQKBgQDVKaXAZJ5bo3VfcpLm
cyjtUuOzAxLU1bVGI0Hm1ARqeGVxSTypZLSX8xFi2n5Bvbgh/Y60aEac/1uKoXA9
gej1MT4hKpXyagrARx97E8zk5nf88kVxkiKUrifMjP2lDzHIYhdKk9R3SiV6gWvA
FoJtjBwFhJ6uWUPyry4nqFSENQKBgQDjP9k6CTZF0EnDqbADiQr7VKpebqhtLWRD
U0bQh/l57VrWqGksVOlivIJChP49q1H+hQ1YgfKIEDag8JJnf/inUSpVsw1ljAjv
knqNzn0Gdd9lTsiNGgqlCjhmWedkh4eO8uau479TwQc6gB4PQdLAFynQtt8Kk45P
GxdpRx4AlQKBgQCgxUGbYwhBC37aF1sObqrenBbajCXm2qxXEv6Ab0ZJWzb/g4I6
LJc8x3pEeZCiWsoG8Otxy/f+L2bGn049Rb8DNzmp4Cmp5SrorHvk4yE1P1IeOEgC
CXsFcnjYATrJBDXC8aCpgefMdOLhi71N6mxC3VrBGq5nxzHFVzTTelUMRQKBgQDa
yekhiCb5liy+tcuhy7qH+Z7BpjaATrh+XVoLgS5+5jeT/basmN/OUQH0e0iwJRaf
Poh30zynJT0DPDsobLwAkxN4SRg30Vf1GAjoKIqUwr2fMvfBafYfqbRdTmeKkTXB
OjlA3kKhp3GHMDxAojX+/Q4kRTx+WUwk+0dR88d99QKBgEiYrkSLjKXUFllDmVyp
HtlYKZiq5c33DA06SA2uVOprCdTbnbvP4WrgUsLGvqBcaPEd06fGGbvJWwUdnkXM
HNAkqSeUe5ueovidtoPdF+aPyxdGg3Z8551xOoHZFYrvgdZ4YMPcJrwQQsvWCcYP
GDnSoD8Xjd2LmekTpDBt5ZVz
-----END PRIVATE KEY-----

View File

@ -282,6 +282,7 @@ do
sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv" sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv"
sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv" sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv"
sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv" sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv"
sed -n "s/^short\t/$test_name\t/p" < "$test_file" >> "analyze/marked-short-queries.tsv"
sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv" sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv"
done done
unset IFS unset IFS
@ -291,6 +292,9 @@ clickhouse-local --query "
create view query_runs as select * from file('analyze/query-runs.tsv', TSV, create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float'); 'test text, query_index int, query_id text, version UInt8, time float');
-- Separately process 'partial' queries which we could only run on the new server
-- because they use new functions. We can't make normal stats for them, but still
-- have to show some stats so that the PR author can tweak them.
create view partial_queries as select test, query_index create view partial_queries as select test, query_index
from file('analyze/partial-queries.tsv', TSV, from file('analyze/partial-queries.tsv', TSV,
'test text, query_index int, servers Array(int)'); 'test text, query_index int, servers Array(int)');
@ -303,6 +307,7 @@ create table partial_query_times engine File(TSVWithNamesAndTypes,
group by test, query_index group by test, query_index
; ;
-- Process queries that were run normally, on both servers.
create view left_query_log as select * create view left_query_log as select *
from file('left-query-log.tsv', TSVWithNamesAndTypes, from file('left-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "left-query-log.tsv.columns")'); '$(cat "left-query-log.tsv.columns")');
@ -317,7 +322,10 @@ create view query_logs as
select *, 1 version from right_query_log select *, 1 version from right_query_log
; ;
create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics-full.tsv') -- This is a single source of truth on all metrics we have for query runs. The
-- metrics include ProfileEvents from system.query_log, and query run times
-- reported by the perf.py test runner.
create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-arrays.tsv')
as as
with ( with (
-- sumMapState with the list of all keys with '-0.' values. Negative zero is because -- sumMapState with the list of all keys with '-0.' values. Negative zero is because
@ -349,18 +357,29 @@ create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics-
where (test, query_index) not in partial_queries where (test, query_index) not in partial_queries
; ;
create table query_run_metrics engine File( -- This is just for convenience -- human-readable + easy to make plots.
create table query_run_metrics_denorm engine File(TSV, 'analyze/query-run-metrics-denorm.tsv')
as select test, query_index, metric_names, version, query_id, metric_values
from query_run_metric_arrays
array join metric_names, metric_values
order by test, query_index, metric_names, version, query_id
;
-- This is for statistical processing with eqmed.sql
create table query_run_metrics_for_stats engine File(
TSV, -- do not add header -- will parse with grep TSV, -- do not add header -- will parse with grep
'analyze/query-run-metrics.tsv') 'analyze/query-run-metrics-for-stats.tsv')
as select test, query_index, 0 run, version, metric_values as select test, query_index, 0 run, version, metric_values
from query_run_metrics_full from query_run_metric_arrays
order by test, query_index, run, version order by test, query_index, run, version
; ;
-- This is the list of metric names, so that we can join them back after
-- statistical processing.
create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-names.tsv') create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-names.tsv')
as select metric_names from query_run_metrics_full limit 1 as select metric_names from query_run_metric_arrays limit 1
; ;
" " 2> >(tee -a analyze/errors.log 1>&2)
# This is a lateral join in bash... please forgive me. # This is a lateral join in bash... please forgive me.
# We don't have arrayPermute(), so I have to make random permutations with # We don't have arrayPermute(), so I have to make random permutations with
@ -370,16 +389,16 @@ create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-n
# for each file. I do this in parallel using GNU parallel. # for each file. I do this in parallel using GNU parallel.
( set +x # do not bloat the log ( set +x # do not bloat the log
IFS=$'\n' IFS=$'\n'
for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq) for prefix in $(cut -f1,2 "analyze/query-run-metrics-for-stats.tsv" | sort | uniq)
do do
file="analyze/tmp/$(echo "$prefix" | sed 's/\t/_/g').tsv" file="analyze/tmp/$(echo "$prefix" | sed 's/\t/_/g').tsv"
grep "^$prefix " "analyze/query-run-metrics.tsv" > "$file" & grep "^$prefix " "analyze/query-run-metrics-for-stats.tsv" > "$file" &
printf "%s\0\n" \ printf "%s\0\n" \
"clickhouse-local \ "clickhouse-local \
--file \"$file\" \ --file \"$file\" \
--structure 'test text, query text, run int, version UInt8, metrics Array(float)' \ --structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
--query \"$(cat "$script_dir/eqmed.sql")\" \ --query \"$(cat "$script_dir/eqmed.sql")\" \
>> \"analyze/query-reports.tsv\"" \ >> \"analyze/query-metric-stats.tsv\"" \
2>> analyze/errors.log \ 2>> analyze/errors.log \
>> analyze/commands.txt >> analyze/commands.txt
done done
@ -388,6 +407,33 @@ unset IFS
) )
parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
clickhouse-local --query "
-- Join the metric names back to the metric statistics we've calculated, and make
-- a denormalized table of them -- statistics for all metrics for all queries.
-- The WITH, ARRAY JOIN and CROSS JOIN do not like each other:
-- https://github.com/ClickHouse/ClickHouse/issues/11868
-- https://github.com/ClickHouse/ClickHouse/issues/11757
-- Because of this, we make a view with arrays first, and then apply all the
-- array joins.
create view query_metric_stat_arrays as
with (select * from file('analyze/query-run-metric-names.tsv',
TSV, 'n Array(String)')) as metric_name
select test, query_index, metric_name, left, right, diff, stat_threshold
from file('analyze/query-metric-stats.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports
order by test, query_index, metric_name
;
create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
'analyze/query-metric-stats-denorm.tsv')
as select test, query_index, metric_name, left, right, diff, stat_threshold
from query_metric_stat_arrays
left array join metric_name, left, right, diff, stat_threshold
order by test, query_index, metric_name
;
" 2> >(tee -a analyze/errors.log 1>&2)
} }
# Analyze results # Analyze results
@ -403,58 +449,46 @@ build_log_column_definitions
cat analyze/errors.log >> report/errors.log ||: cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||: cat profile-errors.log >> report/errors.log ||:
short_query_threshold="0.02"
clickhouse-local --query " clickhouse-local --query "
create view query_display_names as select * from create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV, file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text') 'test text, query_index int, query_display_name text')
; ;
create view partial_query_times as select * from
file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes,
'test text, query_index int, time_stddev float, time_median float')
;
-- Report for partial queries that we could only run on the new server (e.g.
-- queries with new functions added in the tested PR).
create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv')
as select floor(time_median, 3) m, floor(time_stddev / time_median, 3) v, as select floor(time_median, 3) time,
floor(time_stddev / time_median, 3) relative_time_stddev,
test, query_index, query_display_name test, query_index, query_display_name
from file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes, from partial_query_times
'test text, query_index int, time_stddev float, time_median float') t
join query_display_names using (test, query_index) join query_display_names using (test, query_index)
order by test, query_index order by test, query_index
; ;
-- WITH, ARRAY JOIN and CROSS JOIN do not like each other: create view query_metric_stats as
-- https://github.com/ClickHouse/ClickHouse/issues/11868 select * from file('analyze/query-metric-stats-denorm.tsv',
-- https://github.com/ClickHouse/ClickHouse/issues/11757 TSVWithNamesAndTypes,
-- Because of this, we make a view with arrays first, and then apply all the 'test text, query_index int, metric_name text, left float, right float,
-- array joins. diff float, stat_threshold float')
create view query_metric_stat_arrays as
with (select * from file('analyze/query-run-metric-names.tsv',
TSV, 'n Array(String)')) as metric_name
select metric_name, left, right, diff, stat_threshold, test, query_index,
query_display_name
from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports
left join query_display_names
on reports.test = query_display_names.test
and reports.query_index = query_display_names.query_index
;
create table query_metric_stats engine File(TSVWithNamesAndTypes,
'report/query-metric-stats.tsv')
as
select metric_name, left, right, diff, stat_threshold, test, query_index,
query_display_name
from query_metric_stat_arrays
left array join metric_name, left, right, diff, stat_threshold
; ;
-- Main statistics for queries -- query time as reported in query log. -- Main statistics for queries -- query time as reported in query log.
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select as select
-- FIXME Comparison mode doesn't make sense for queries that complete -- Comparison mode doesn't make sense for queries that complete
-- immediately (on the same order of time as noise). We compute average -- immediately (on the same order of time as noise). If query duration is
-- run time between old and new version, and if it is below a threshold, -- less that some threshold, we just skip it. If there is a significant
-- we just skip the query. If there is a significant regression, the -- regression in such query, the time will exceed the threshold, and we
-- average will be above threshold, we'll process it normally and will -- well process it normally and detect the regression.
-- detect the regression. right < $short_query_threshold as short,
(left + right) / 2 < 0.02 as short,
not short and abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail, not short and abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
not short and abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show, not short and abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
@ -469,63 +503,22 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
left join file('analyze/report-thresholds.tsv', TSV, left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') thresholds 'test text, report_threshold float') thresholds
on query_metric_stats.test = thresholds.test on query_metric_stats.test = thresholds.test
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
where metric_name = 'server_time' where metric_name = 'server_time'
order by test, query_index, metric_name order by test, query_index, metric_name
; ;
-- keep the table in old format so that we can analyze new and old data together create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') as
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
as select short, changed_fail, unstable_fail, left, right, diff,
stat_threshold, test, query_display_name query
from queries
;
-- save all test runs as JSON for the new comparison page
create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as
select test, query_index, query_display_name query,
left, right, diff, stat_threshold, report_threshold,
versions_runs[1] runs_left, versions_runs[2] runs_right
from (
select
test, query_index,
groupArrayInsertAt(runs, version) versions_runs
from (
select
test, query_index, version,
groupArray(metrics[1]) runs
from file('analyze/query-run-metrics.tsv', TSV,
'test text, query_index int, run int, version UInt8, metrics Array(float)')
group by test, query_index, version
)
group by test, query_index
) runs
left join query_display_names
on runs.test = query_display_names.test
and runs.query_index = query_display_names.query_index
left join file('analyze/report-thresholds.tsv',
TSV, 'test text, report_threshold float') thresholds
on runs.test = thresholds.test
left join query_metric_stats
on runs.test = query_metric_stats.test
and runs.query_index = query_metric_stats.query_index
where
query_metric_stats.metric_name = 'server_time'
;
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc; from queries where changed_show order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') as
select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc; from queries where unstable_show order by stat_threshold desc;
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, create table test_time_changes engine File(TSV, 'report/test-time-changes.tsv') as
'report/queries-for-flamegraph.tsv') as
select test, query_index from queries where unstable_show or changed_show
;
create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.tsv') as
select test, queries, average_time_change from ( select test, queries, average_time_change from (
select test, count(*) queries, select test, count(*) queries,
sum(left) as left, sum(right) as right, sum(left) as left, sum(right) as right,
@ -536,22 +529,22 @@ create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.ts
) )
; ;
create table unstable_tests_tsv engine File(TSV, 'report/unstable-tests.tsv') as create table unstable_tests engine File(TSV, 'report/unstable-tests.tsv') as
select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
from queries from queries
group by test group by test
order by total_unstable + total_changed desc order by total_unstable + total_changed desc
; ;
create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.tsv') as create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') as
select test, select test,
queries, queries,
coalesce(total_unstable, 0) total_unstable, coalesce(total_unstable, 0) total_unstable,
coalesce(total_changed, 0) total_changed, coalesce(total_changed, 0) total_changed,
total_unstable + total_changed total_bad, total_unstable + total_changed total_bad,
coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str
from test_time_changes_tsv from test_time_changes
full join unstable_tests_tsv full join unstable_tests
using test using test
where (abs(average_time_change) > 0.05 and queries > 5) where (abs(average_time_change) > 0.05 and queries > 5)
or (total_bad > 0) or (total_bad > 0)
@ -559,28 +552,28 @@ create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.ts
settings join_use_nulls = 1 settings join_use_nulls = 1
; ;
create table query_time engine Memory as select * create view total_client_time_per_query as select *
from file('analyze/client-times.tsv', TSV, from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float'); 'test text, query_index int, client float, server float');
create table wall_clock engine Memory as select * create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') as
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
select client, server, floor(client/server, 3) p, test, query_display_name select client, server, floor(client/server, 3) p, test, query_display_name
from query_time left join query_display_names using (test, query_index) from total_client_time_per_query left join query_display_names using (test, query_index)
where p > 1.02 order by p desc; where p > 1.02 order by p desc;
create table wall_clock_time_per_test engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table test_time engine Memory as create table test_time engine Memory as
select test, sum(client) total_client_time, select test, sum(client) total_client_time,
maxIf(client, not short) query_max, maxIf(client, not short) query_max,
minIf(client, not short) query_min, minIf(client, not short) query_min,
count(*) queries, sum(short) short_queries count(*) queries, sum(short) short_queries
from query_time full join queries using (test, query_index) from total_client_time_per_query full join queries using (test, query_index)
group by test; group by test;
create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as create table test_times_report engine File(TSV, 'report/test-times.tsv') as
select wall_clock.test, real, select wall_clock_time_per_test.test, real,
floor(total_client_time, 3), floor(total_client_time, 3),
queries, queries,
short_queries, short_queries,
@ -590,23 +583,62 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
from test_time from test_time
-- wall clock times are also measured for skipped tests, so don't -- wall clock times are also measured for skipped tests, so don't
-- do full join -- do full join
left join wall_clock using test left join wall_clock_time_per_test using test
order by avg_real_per_query desc; order by avg_real_per_query desc;
-- report for all queries page, only main metric -- report for all queries page, only main metric
create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as
select changed_fail, unstable_fail, select changed_fail, unstable_fail,
left, right, diff, left, right, diff,
floor(left > right ? left / right : right / left, 3), floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_index, query_display_name stat_threshold, test, query_index, query_display_name
from queries order by test, query_index; from queries order by test, query_index;
-- queries for which we will build flamegraphs (see below)
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
'report/queries-for-flamegraph.tsv') as
select test, query_index from queries where unstable_show or changed_show
;
-- List of queries that have 'short' duration, but are not marked as 'short' by
-- the test author (we report them).
create table unmarked_short_queries_report
engine File(TSV, 'report/unmarked-short-queries.tsv')
as select time, test, query_index, query_display_name
from (
select right time, test, query_index from queries where short
union all
select time_median, test, query_index from partial_query_times
where time_median < $short_query_threshold
) times
left join query_display_names
on times.test = query_display_names.test
and times.query_index = query_display_names.query_index
where (test, query_index) not in
(select * from file('analyze/marked-short-queries.tsv', TSV,
'test text, query_index int'))
order by test, query_index
;
--------------------------------------------------------------------------------
-- various compatibility data formats follow, not related to the main report
-- keep the table in old format so that we can analyze new and old data together
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
as select short, changed_fail, unstable_fail, left, right, diff,
stat_threshold, test, query_display_name query
from queries
;
-- new report for all queries with all metrics (no page yet) -- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
select metric_name, left, right, diff, select metric_name, left, right, diff,
floor(left > right ? left / right : right / left, 3), floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_index, query_display_name stat_threshold, test, query_index, query_display_name
from query_metric_stats from query_metric_stats
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
order by test, query_index; order by test, query_index;
" 2> >(tee -a report/errors.log 1>&2) " 2> >(tee -a report/errors.log 1>&2)

View File

@ -37,21 +37,44 @@ available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems: for e in subst_elems:
available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')] available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]
# Take care to keep the order of queries -- sometimes we have DROP IF EXISTS # Takes parallel lists of templates, substitutes them with all combos of
# parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
# followed by CREATE in create queries section, so the order matters. # followed by CREATE in create queries section, so the order matters.
def substitute_parameters(query_templates): def substitute_parameters(query_templates, other_templates = []):
result = [] query_results = []
for q in query_templates: other_results = [[]] * (len(other_templates))
for i, q in enumerate(query_templates):
keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n) keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n)
values = [available_parameters[k] for k in keys] values = [available_parameters[k] for k in keys]
result.extend([ combos = itertools.product(*values)
q.format(**dict(zip(keys, values_combo))) for c in combos:
for values_combo in itertools.product(*values)]) with_keys = dict(zip(keys, c))
return result query_results.append(q.format(**with_keys))
for j, t in enumerate(other_templates):
other_results[j].append(t[i].format(**with_keys))
if len(other_templates):
return query_results, other_results
else:
return query_results
# Build a list of test queries, substituting parameters to query templates,
# and reporting the queries marked as short.
test_queries = []
for e in root.findall('query'):
new_queries = []
if 'short' in e.attrib:
new_queries, [is_short] = substitute_parameters([e.text], [[e.attrib['short']]])
for i, s in enumerate(is_short):
# Don't print this if we only need to print the queries.
if eval(s) and not args.print_queries:
print(f'short\t{i + len(test_queries)}')
else:
new_queries = substitute_parameters([e.text])
test_queries += new_queries
# Build a list of test queries, processing all substitutions
test_query_templates = [q.text for q in root.findall('query')]
test_queries = substitute_parameters(test_query_templates)
# If we're only asked to print the queries, do that and exit # If we're only asked to print the queries, do that and exit
if args.print_queries: if args.print_queries:
@ -166,7 +189,7 @@ for conn_index, c in enumerate(connections):
c.execute(q) c.execute(q)
print(f'fill\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') print(f'fill\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
# Run test queries # Run test queries.
for query_index, q in enumerate(test_queries): for query_index, q in enumerate(test_queries):
query_prefix = f'{test_name}.query{query_index}' query_prefix = f'{test_name}.query{query_index}'

View File

@ -196,6 +196,12 @@ if args.report == 'main':
['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'], ['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
slow_on_client_rows) slow_on_client_rows)
unmarked_short_rows = tsvRows('report/unmarked-short-queries.tsv')
error_tests += len(unmarked_short_rows)
printSimpleTable('Short queries not marked as short',
['New client time, s', 'Test', '#', 'Query'],
unmarked_short_rows)
def print_partial(): def print_partial():
rows = tsvRows('report/partial-queries-report.tsv') rows = tsvRows('report/partial-queries-report.tsv')
if not rows: if not rows:

View File

@ -4,15 +4,14 @@ toc_priority: 70
toc_title: Introduction toc_title: Introduction
--- ---
# ClickHouse Commercial Services # ClickHouse Commercial Services {#clickhouse-commercial-services}
This section is a directory of commercial service providers specializing in ClickHouse. They are independent companies not necessarily affiliated with Yandex. This section is a directory of commercial service providers specializing in ClickHouse. They are independent companies not necessarily affiliated with Yandex.
Service categories: Service categories:
- [Cloud](cloud.md) - [Cloud](../commercial/cloud.md)
- [Support](support.md) - [Support](../commercial/support.md)
!!! note "For service providers" !!! note "For service providers"
If you happen to represent one of them, feel free to open a pull request adding your company to the respective section (or even adding a new section if the service doesn't fit into existing categories). The easiest way to open a pull-request for documentation page is by using a “pencil” edit button in the top-right corner. If your service available in some local market, make sure to mention it in a localized documentation page as well (or at least point it out in a pull-request description). If you happen to represent one of them, feel free to open a pull request adding your company to the respective section (or even adding a new section if the service doesnt fit into existing categories). The easiest way to open a pull-request for documentation page is by using a “pencil” edit button in the top-right corner. If your service available in some local market, make sure to mention it in a localized documentation page as well (or at least point it out in a pull-request description).

View File

@ -5,7 +5,7 @@ toc_title: Architecture Overview
# Overview of ClickHouse Architecture {#overview-of-clickhouse-architecture} # Overview of ClickHouse Architecture {#overview-of-clickhouse-architecture}
ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns). Whenever possible, operations are dispatched on arrays, rather than on individual values. It is called "vectorized query execution" and it helps lower the cost of actual data processing. ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns). Whenever possible, operations are dispatched on arrays, rather than on individual values. It is called “vectorized query execution” and it helps lower the cost of actual data processing.
> This idea is nothing new. It dates back to the `APL` (A programming language, 1957) and its descendants: `A +` (APL dialect), `J` (1990), `K` (1993), and `Q` (programming language from Kx Systems, 2003). Array programming is used in scientific data processing. Neither is this idea something new in relational databases: for example, it is used in the `VectorWise` system (also known as Actian Vector Analytic Database by Actian Corporation). > This idea is nothing new. It dates back to the `APL` (A programming language, 1957) and its descendants: `A +` (APL dialect), `J` (1990), `K` (1993), and `Q` (programming language from Kx Systems, 2003). Array programming is used in scientific data processing. Neither is this idea something new in relational databases: for example, it is used in the `VectorWise` system (also known as Actian Vector Analytic Database by Actian Corporation).
@ -21,11 +21,11 @@ Various `IColumn` implementations (`ColumnUInt8`, `ColumnString`, and so on) are
Nevertheless, it is possible to work with individual values as well. To represent an individual value, the `Field` is used. `Field` is just a discriminated union of `UInt64`, `Int64`, `Float64`, `String` and `Array`. `IColumn` has the `operator []` method to get the n-th value as a `Field`, and the `insert` method to append a `Field` to the end of a column. These methods are not very efficient, because they require dealing with temporary `Field` objects representing an individual value. There are more efficient methods, such as `insertFrom`, `insertRangeFrom`, and so on. Nevertheless, it is possible to work with individual values as well. To represent an individual value, the `Field` is used. `Field` is just a discriminated union of `UInt64`, `Int64`, `Float64`, `String` and `Array`. `IColumn` has the `operator []` method to get the n-th value as a `Field`, and the `insert` method to append a `Field` to the end of a column. These methods are not very efficient, because they require dealing with temporary `Field` objects representing an individual value. There are more efficient methods, such as `insertFrom`, `insertRangeFrom`, and so on.
`Field` doesn't have enough information about a specific data type for a table. For example, `UInt8`, `UInt16`, `UInt32`, and `UInt64` are all represented as `UInt64` in a `Field`. `Field` doesnt have enough information about a specific data type for a table. For example, `UInt8`, `UInt16`, `UInt32`, and `UInt64` are all represented as `UInt64` in a `Field`.
## Leaky Abstractions {#leaky-abstractions} ## Leaky Abstractions {#leaky-abstractions}
`IColumn` has methods for common relational transformations of data, but they dont meet all needs. For example, `ColumnUInt64` doesn't have a method to calculate the sum of two columns, and `ColumnString` doesn't have a method to run a substring search. These countless routines are implemented outside of `IColumn`. `IColumn` has methods for common relational transformations of data, but they dont meet all needs. For example, `ColumnUInt64` doesnt have a method to calculate the sum of two columns, and `ColumnString` doesnt have a method to run a substring search. These countless routines are implemented outside of `IColumn`.
Various functions on columns can be implemented in a generic, non-efficient way using `IColumn` methods to extract `Field` values, or in a specialized way using knowledge of inner memory layout of data in a specific `IColumn` implementation. It is implemented by casting functions to a specific `IColumn` type and deal with internal representation directly. For example, `ColumnUInt64` has the `getData` method that returns a reference to an internal array, then a separate routine reads or fills that array directly. We have “leaky abstractions” to allow efficient specializations of various routines. Various functions on columns can be implemented in a generic, non-efficient way using `IColumn` methods to extract `Field` values, or in a specialized way using knowledge of inner memory layout of data in a specific `IColumn` implementation. It is implemented by casting functions to a specific `IColumn` type and deal with internal representation directly. For example, `ColumnUInt64` has the `getData` method that returns a reference to an internal array, then a separate routine reads or fills that array directly. We have “leaky abstractions” to allow efficient specializations of various routines.
@ -35,7 +35,7 @@ Various functions on columns can be implemented in a generic, non-efficient way
`IDataType` and `IColumn` are only loosely related to each other. Different data types can be represented in memory by the same `IColumn` implementations. For example, `DataTypeUInt32` and `DataTypeDateTime` are both represented by `ColumnUInt32` or `ColumnConstUInt32`. In addition, the same data type can be represented by different `IColumn` implementations. For example, `DataTypeUInt8` can be represented by `ColumnUInt8` or `ColumnConstUInt8`. `IDataType` and `IColumn` are only loosely related to each other. Different data types can be represented in memory by the same `IColumn` implementations. For example, `DataTypeUInt32` and `DataTypeDateTime` are both represented by `ColumnUInt32` or `ColumnConstUInt32`. In addition, the same data type can be represented by different `IColumn` implementations. For example, `DataTypeUInt8` can be represented by `ColumnUInt8` or `ColumnConstUInt8`.
`IDataType` only stores metadata. For instance, `DataTypeUInt8` doesn't store anything at all (except virtual pointer `vptr`) and `DataTypeFixedString` stores just `N` (the size of fixed-size strings). `IDataType` only stores metadata. For instance, `DataTypeUInt8` doesnt store anything at all (except virtual pointer `vptr`) and `DataTypeFixedString` stores just `N` (the size of fixed-size strings).
`IDataType` has helper methods for various data formats. Examples are methods to serialize a value with possible quoting, to serialize a value for JSON, and to serialize a value as part of the XML format. There is no direct correspondence to data formats. For example, the different data formats `Pretty` and `TabSeparated` can use the same `serializeTextEscaped` helper method from the `IDataType` interface. `IDataType` has helper methods for various data formats. Examples are methods to serialize a value with possible quoting, to serialize a value for JSON, and to serialize a value as part of the XML format. There is no direct correspondence to data formats. For example, the different data formats `Pretty` and `TabSeparated` can use the same `serializeTextEscaped` helper method from the `IDataType` interface.
@ -120,9 +120,9 @@ There are ordinary functions and aggregate functions. For aggregate functions, s
Ordinary functions dont change the number of rows they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`s of data to implement vectorized query execution. Ordinary functions dont change the number of rows they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`s of data to implement vectorized query execution.
There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulatexploit block processing and violate the independence of rows. There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and \[runningAccumulate\](../sql-reference/functions/other-functions.md\#runningaccumulatexploit block processing and violate the independence of rows.
ClickHouse has strong typing, so theres no implicit type conversion. If a function doesn't support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function. ClickHouse has strong typing, so theres no implicit type conversion. If a function doesnt support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function.
Implementing a function may be slightly inconvenient because a function explicitly dispatches supported data types and supported `IColumns`. For example, the `plus` function has code generated by instantiation of a C++ template for each combination of numeric types, and constant or non-constant left and right arguments. Implementing a function may be slightly inconvenient because a function explicitly dispatches supported data types and supported `IColumns`. For example, the `plus` function has code generated by instantiation of a C++ template for each combination of numeric types, and constant or non-constant left and right arguments.
@ -169,13 +169,13 @@ There is no global query plan for distributed query execution. Each node has its
`MergeTree` is a family of storage engines that supports indexing by primary key. The primary key can be an arbitrary tuple of columns or expressions. Data in a `MergeTree` table is stored in “parts”. Each part stores data in the primary key order, so data is ordered lexicographically by the primary key tuple. All the table columns are stored in separate `column.bin` files in these parts. The files consist of compressed blocks. Each block is usually from 64 KB to 1 MB of uncompressed data, depending on the average value size. The blocks consist of column values placed contiguously one after the other. Column values are in the same order for each column (the primary key defines the order), so when you iterate by many columns, you get values for the corresponding rows. `MergeTree` is a family of storage engines that supports indexing by primary key. The primary key can be an arbitrary tuple of columns or expressions. Data in a `MergeTree` table is stored in “parts”. Each part stores data in the primary key order, so data is ordered lexicographically by the primary key tuple. All the table columns are stored in separate `column.bin` files in these parts. The files consist of compressed blocks. Each block is usually from 64 KB to 1 MB of uncompressed data, depending on the average value size. The blocks consist of column values placed contiguously one after the other. Column values are in the same order for each column (the primary key defines the order), so when you iterate by many columns, you get values for the corresponding rows.
The primary key itself is “sparse”. It doesn't address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks,” which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached. The primary key itself is “sparse”. It doesnt address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks,” which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached.
When we are going to read something from a part in `MergeTree`, we look at `primary.idx` data and locate ranges that could contain requested data, then look at `column.mrk` data and calculate offsets for where to start reading those ranges. Because of sparseness, excess data may be read. ClickHouse is not suitable for a high load of simple point queries, because the entire range with `index_granularity` rows must be read for each key, and the entire compressed block must be decompressed for each column. We made the index sparse because we must be able to maintain trillions of rows per single server without noticeable memory consumption for the index. Also, because the primary key is sparse, it is not unique: it cannot check the existence of the key in the table at INSERT time. You could have many rows with the same key in a table. When we are going to read something from a part in `MergeTree`, we look at `primary.idx` data and locate ranges that could contain requested data, then look at `column.mrk` data and calculate offsets for where to start reading those ranges. Because of sparseness, excess data may be read. ClickHouse is not suitable for a high load of simple point queries, because the entire range with `index_granularity` rows must be read for each key, and the entire compressed block must be decompressed for each column. We made the index sparse because we must be able to maintain trillions of rows per single server without noticeable memory consumption for the index. Also, because the primary key is sparse, it is not unique: it cannot check the existence of the key in the table at INSERT time. You could have many rows with the same key in a table.
When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. Thats why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts. When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. Thats why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts.
`MergeTree` is not an LSM tree because it doesn't contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently about once per second is ok, but a thousand times a second is not. We did it this way for simplicitys sake, and because we are already inserting data in batches in our applications. `MergeTree` is not an LSM tree because it doesnt contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently about once per second is ok, but a thousand times a second is not. We did it this way for simplicitys sake, and because we are already inserting data in batches in our applications.
> MergeTree tables can only have one (primary) index: there arent any secondary indices. It would be nice to allow multiple physical representations under one logical table, for example, to store data in more than one physical order or even to allow representations with pre-aggregated data along with original data. > MergeTree tables can only have one (primary) index: there arent any secondary indices. It would be nice to allow multiple physical representations under one logical table, for example, to store data in more than one physical order or even to allow representations with pre-aggregated data along with original data.
@ -187,7 +187,7 @@ Replication in ClickHouse can be configured on a per-table basis. You could have
Replication is implemented in the `ReplicatedMergeTree` storage engine. The path in `ZooKeeper` is specified as a parameter for the storage engine. All tables with the same path in `ZooKeeper` become replicas of each other: they synchronize their data and maintain consistency. Replicas can be added and removed dynamically simply by creating or dropping a table. Replication is implemented in the `ReplicatedMergeTree` storage engine. The path in `ZooKeeper` is specified as a parameter for the storage engine. All tables with the same path in `ZooKeeper` become replicas of each other: they synchronize their data and maintain consistency. Replicas can be added and removed dynamically simply by creating or dropping a table.
Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesn't support UPDATEs, replication is conflict-free. As there is no quorum acknowledgment of inserts, just-inserted data might be lost if one node fails. Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesnt support UPDATEs, replication is conflict-free. As there is no quorum acknowledgment of inserts, just-inserted data might be lost if one node fails.
Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, and so on. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the “get the part” action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. It is achieved by electing one replica as the leader, and that replica initiates merges and writes “merge parts” actions to the log. Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, and so on. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the “get the part” action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. It is achieved by electing one replica as the leader, and that replica initiates merges and writes “merge parts” actions to the log.

View File

@ -5,11 +5,11 @@ toc_priority: 25
toc_title: hidden toc_title: hidden
--- ---
# ClickHouse Engines # ClickHouse Engines {#clickhouse-engines}
There are two key engine kinds in ClickHouse: There are two key engine kinds in ClickHouse:
- [Table engines](table-engines/index.md) - [Table engines](../engines/table-engines/index.md)
- [Database engines](database-engines/index.md) - [Database engines](../engines/database-engines/index.md)
{## [Original article](https://clickhouse.tech/docs/en/engines/) ##} {## [Original article](https://clickhouse.tech/docs/en/engines/) ##}

View File

@ -3,14 +3,14 @@ toc_folder_title: Integrations
toc_priority: 30 toc_priority: 30
--- ---
# Table Engines for Integrations # Table Engines for Integrations {#table-engines-for-integrations}
ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like external dictionaries or table functions, which require to use custom query methods on each use. ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like external dictionaries or table functions, which require to use custom query methods on each use.
List of supported integrations: List of supported integrations:
- [ODBC](odbc.md) - [ODBC](../../../engines/table-engines/integrations/odbc.md)
- [JDBC](jdbc.md) - [JDBC](../../../engines/table-engines/integrations/jdbc.md)
- [MySQL](mysql.md) - [MySQL](../../../engines/table-engines/integrations/mysql.md)
- [HDFS](hdfs.md) - [HDFS](../../../engines/table-engines/integrations/hdfs.md)
- [Kafka](kafka.md) - [Kafka](../../../engines/table-engines/integrations/kafka.md)

View File

@ -18,7 +18,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']); ) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
``` ```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query. See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
The table structure can differ from the original MySQL table structure: The table structure can differ from the original MySQL table structure:

View File

@ -23,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
ENGINE = ODBC(connection_settings, external_database, external_table) ENGINE = ODBC(connection_settings, external_database, external_table)
``` ```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query. See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
The table structure can differ from the source table structure: The table structure can differ from the source table structure:

View File

@ -0,0 +1,119 @@
---
toc_priority: 6
toc_title: RabbitMQ
---
# RabbitMQ Engine
This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.com).
RabbitMQ lets you:
- Publish or subscribe to data flows.
- Process streams as they become available.
## Creating a Table {#table_engine-rabbitmq-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = RabbitMQ SETTINGS
rabbitmq_host_port = 'host:port',
rabbitmq_exchange_name = 'exchange_name',
rabbitmq_format = 'data_format'[,]
[rabbitmq_exchange_type = 'exchange_type',]
[rabbitmq_routing_key_list = 'key1,key2,...',]
[rabbitmq_row_delimiter = 'delimiter_symbol',]
[rabbitmq_num_consumers = N,]
[rabbitmq_num_queues = N,]
[rabbitmq_transactional_channel = 0]
```
Required parameters:
- `rabbitmq_host_port` host:port (for example, `localhost:5672`).
- `rabbitmq_exchange_name` RabbitMQ exchange name.
- `rabbitmq_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
Optional parameters:
- `rabbitmq_exchange_type` The type of RabbitMQ exchange: `direct`, `fanout`, `topic`, `headers`, `consistent-hash`. Default: `fanout`.
- `rabbitmq_routing_key_list` A comma-separated list of routing keys.
- `rabbitmq_row_delimiter` Delimiter character, which ends the message.
- `rabbitmq_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient.
- `rabbitmq_num_queues` The number of queues per consumer. Default: `1`. Specify more queues if the capacity of one queue per consumer is insufficient. Single queue can contain up to 50K messages at the same time.
- `rabbitmq_transactional_channel` Wrap insert queries in transactions. Default: `0`.
Required configuration:
The RabbitMQ server configuration should be added using the ClickHouse config file.
``` xml
<rabbitmq>
<username>root</username>
<password>clickhouse</password>
</rabbitmq>
```
Example:
``` sql
CREATE TABLE queue (
key UInt64,
value UInt64
) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
rabbitmq_exchange_name = 'exchange1',
rabbitmq_format = 'JSONEachRow',
rabbitmq_num_consumers = 5;
```
## Description {#description}
`SELECT` is not particularly useful for reading messages (except for debugging), because each message can be read only once. It is more practical to create real-time threads using materialized views. To do this:
1. Use the engine to create a RabbitMQ consumer and consider it a data stream.
2. Create a table with the desired structure.
3. Create a materialized view that converts data from the engine and puts it into a previously created table.
When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from RabbitMQ and convert them to the required format using `SELECT`.
One RabbitMQ table can have as many materialized views as you like.
Data can be channeled based on `rabbitmq_exchange_type` and the specified `rabbitmq_routing_key_list`.
There can be no more than one exchange per table. One exchange can be shared between multiple tables - it enables routing into multiple tables at the same time.
Exchange type options:
- `direct` - Routing is based on exact matching of keys. Example table key list: `key1,key2,key3,key4,key5`, message key can eqaul any of them.
- `fanout` - Routing to all tables (where exchange name is the same) regardless of the keys.
- `topic` - Routing is based on patterns with dot-separated keys. Examples: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`.
- `headers` - Routing is based on `key=value` matches with a setting `x-match=all` or `x-match=any`. Example table key list: `x-match=all,format=logs,type=report,year=2020`.
- `consistent-hash` - Data is evenly distributed between all bound tables (where exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
If exchange type is not specified, then default is `fanout` and routing keys for data publishing must be randomized in range `[1, num_consumers]` for every message/batch (or in range `[1, num_consumers * num_queues]` if `rabbitmq_num_queues` is set). This table configuration works quicker then any other, especially when `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` parameters are set.
If `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` parameters are specified along with `rabbitmq_exchange_type`, then:
- `rabbitmq-consistent-hash-exchange` plugin must be enabled.
- `message_id` property of the published messages must be specified (unique for each message/batch).
Example:
``` sql
CREATE TABLE queue (
key UInt64,
value UInt64
) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
rabbitmq_exchange_name = 'exchange1',
rabbitmq_exchange_type = 'headers',
rabbitmq_routing_key_list = 'format=logs,type=report,year=2020',
rabbitmq_format = 'JSONEachRow',
rabbitmq_num_consumers = 5;
CREATE TABLE daily (key UInt64, value UInt64)
ENGINE = MergeTree();
CREATE MATERIALIZED VIEW consumer TO daily
AS SELECT key, value FROM queue;
SELECT key, value FROM daily ORDER BY key;
```

View File

@ -1,6 +1,45 @@
--- ---
toc_folder_title: Log Family toc_folder_title: Log Family
toc_priority: 29 toc_priority: 29
toc_title: Introduction
--- ---
# Log Engine Family {#log-engine-family}
These engines were developed for scenarios when you need to quickly write many small tables (up to about 1 million rows) and read them later as a whole.
Engines of the family:
- [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
- [Log](../../../engines/table-engines/log-family/log.md)
- [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
## Common Properties {#common-properties}
Engines:
- Store data on a disk.
- Append data to the end of file when writing.
- Support locks for concurrent data access.
During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently.
- Do not support [mutation](../../../sql-reference/statements/alter.md#alter-mutations) operations.
- Do not support indexes.
This means that `SELECT` queries for ranges of data are not efficient.
- Do not write data atomically.
You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown.
## Differences {#differences}
The `TinyLog` engine is the simplest in the family and provides the poorest functionality and lowest efficiency. The `TinyLog` engine doesnt support parallel data reading by several threads in a single query. It reads data slower than other engines in the family that support parallel reading from a single query and it uses almost as many file descriptors as the `Log` engine because it stores each column in a separate file. Use it only in simple scenarios.
The `Log` and `StripeLog` engines support parallel data reading. When reading data, ClickHouse uses multiple threads. Each thread processes a separate data block. The `Log` engine uses a separate file for each column of the table. `StripeLog` stores all the data in one file. As a result, the `StripeLog` engine uses fewer file descriptors, but the `Log` engine provides higher efficiency when reading data.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log_family/) <!--hide-->

View File

@ -1,44 +0,0 @@
---
toc_priority: 31
toc_title: Introduction
---
# Log Engine Family {#log-engine-family}
These engines were developed for scenarios when you need to quickly write many small tables (up to about 1 million rows) and read them later as a whole.
Engines of the family:
- [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
- [Log](../../../engines/table-engines/log-family/log.md)
- [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
## Common Properties {#common-properties}
Engines:
- Store data on a disk.
- Append data to the end of file when writing.
- Support locks for concurrent data access.
During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently.
- Do not support [mutation](../../../sql-reference/statements/alter.md#alter-mutations) operations.
- Do not support indexes.
This means that `SELECT` queries for ranges of data are not efficient.
- Do not write data atomically.
You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown.
## Differences {#differences}
The `TinyLog` engine is the simplest in the family and provides the poorest functionality and lowest efficiency. The `TinyLog` engine doesnt support parallel data reading by several threads. It reads data slower than other engines in the family that support parallel reading and it uses almost as many descriptors as the `Log` engine because it stores each column in a separate file. Use it in simple low-load scenarios.
The `Log` and `StripeLog` engines support parallel data reading. When reading data, ClickHouse uses multiple threads. Each thread processes a separate data block. The `Log` engine uses a separate file for each column of the table. `StripeLog` stores all the data in one file. As a result, the `StripeLog` engine uses fewer descriptors in the operating system, but the `Log` engine provides higher efficiency when reading data.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log_family/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: Log
# Log {#log} # Log {#log}
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article. Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/index.md) article.
Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other. For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.

View File

@ -5,7 +5,7 @@ toc_title: StripeLog
# Stripelog {#stripelog} # Stripelog {#stripelog}
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article. This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/index.md) article.
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows). Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).
@ -20,7 +20,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
) ENGINE = StripeLog ) ENGINE = StripeLog
``` ```
See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query. See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
## Writing the Data {#table_engines-stripelog-writing-the-data} ## Writing the Data {#table_engines-stripelog-writing-the-data}

View File

@ -5,7 +5,7 @@ toc_title: TinyLog
# TinyLog {#tinylog} # TinyLog {#tinylog}
The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) for common properties of log engines and their differences. The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/index.md) for common properties of log engines and their differences.
This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient. This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.

View File

@ -32,7 +32,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[SETTINGS name=value, ...] [SETTINGS name=value, ...]
``` ```
For a description of request parameters, see [request description](../../../sql-reference/statements/create.md). For a description of request parameters, see [request description](../../../sql-reference/statements/create/table.md).
**Query clauses** **Query clauses**

View File

@ -26,7 +26,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[SETTINGS name=value, ...] [SETTINGS name=value, ...]
``` ```
For a description of query parameters, see [query description](../../../sql-reference/statements/create.md). For a description of query parameters, see [query description](../../../sql-reference/statements/create/table.md).
**CollapsingMergeTree Parameters** **CollapsingMergeTree Parameters**

View File

@ -77,7 +77,7 @@ Lets break down the name of the first part: `201901_1_3_1`:
The `active` column shows the status of the part. `1` is active; `0` is inactive. The inactive parts are, for example, source parts remaining after merging to a larger part. The corrupted data parts are also indicated as inactive. The `active` column shows the status of the part. `1` is active; `0` is inactive. The inactive parts are, for example, source parts remaining after merging to a larger part. The corrupted data parts are also indicated as inactive.
As you can see in the example, there are several separated parts of the same partition (for example, `201901_1_3_1` and `201901_1_9_2`). This means that these parts are not merged yet. ClickHouse merges the inserted parts of data periodically, approximately 15 minutes after inserting. In addition, you can perform a non-scheduled merge using the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query. Example: As you can see in the example, there are several separated parts of the same partition (for example, `201901_1_3_1` and `201901_1_9_2`). This means that these parts are not merged yet. ClickHouse merges the inserted parts of data periodically, approximately 15 minutes after inserting. In addition, you can perform a non-scheduled merge using the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query. Example:
``` sql ``` sql
OPTIMIZE TABLE visits PARTITION 201902; OPTIMIZE TABLE visits PARTITION 201902;

View File

@ -28,7 +28,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[SETTINGS name=value, ...] [SETTINGS name=value, ...]
``` ```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query. See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
A table for the Graphite data should have the following columns for the following data: A table for the Graphite data should have the following columns for the following data:

View File

@ -1,6 +1,17 @@
--- ---
toc_folder_title: MergeTree Family toc_folder_title: MergeTree Family
toc_priority: 28 toc_priority: 28
toc_title: Introduction
--- ---
# MergeTree Engine Family {#mergetree-engine-family}
Table engines from the MergeTree family are the core of ClickHouse data storage capabilities. They provide most features for resilience and high-performance data retrieval: columnar storage, custom partitioning, sparse primary index, secondary data-skipping indexes, etc.
Base [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) table engine can be considered the default table engine for single-node ClickHouse instances because it is versatile and practical for a wide range of use cases.
For production usage [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) is the way to go, because it adds high-availability to all features of regular MergeTree engine. A bonus is automatic data deduplication on data ingestion, so the software can safely retry if there was some network issue during insert.
All other engines of MergeTree family add extra functionality for some specific use cases. Usually, its implemented as additional data manipulation in background.
The main downside of MergeTree engines is that they are rather heavy-weight. So the typical pattern is to have not so many of them. If you need many small tables, for example for temporary data, consider [Log engine family](../../../engines/table-engines/log-family/index.md).

View File

@ -49,7 +49,7 @@ ORDER BY expr
[SETTINGS name=value, ...] [SETTINGS name=value, ...]
``` ```
For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md). For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create/table.md).
### Query Clauses {#mergetree-query-clauses} ### Query Clauses {#mergetree-query-clauses}
@ -482,7 +482,7 @@ Data with an expired TTL is removed when ClickHouse merges data parts.
When ClickHouse see that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set `merge_with_ttl_timeout`. If the value is too low, it will perform many off-schedule merges that may consume a lot of resources. When ClickHouse see that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set `merge_with_ttl_timeout`. If the value is too low, it will perform many off-schedule merges that may consume a lot of resources.
If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query before `SELECT`. If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query before `SELECT`.
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}

View File

@ -27,7 +27,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[SETTINGS name=value, ...] [SETTINGS name=value, ...]
``` ```
For a description of request parameters, see [request description](../../../sql-reference/statements/create.md). For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md).
**ReplacingMergeTree Parameters** **ReplacingMergeTree Parameters**

View File

@ -24,7 +24,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[SETTINGS name=value, ...] [SETTINGS name=value, ...]
``` ```
For a description of request parameters, see [request description](../../../sql-reference/statements/create.md). For a description of request parameters, see [request description](../../../sql-reference/statements/create/table.md).
**Parameters of SummingMergeTree** **Parameters of SummingMergeTree**

View File

@ -29,7 +29,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[SETTINGS name=value, ...] [SETTINGS name=value, ...]
``` ```
For a description of query parameters, see the [query description](../../../sql-reference/statements/create.md). For a description of query parameters, see the [query description](../../../sql-reference/statements/create/table.md).
**Engine Parameters** **Engine Parameters**

View File

@ -28,7 +28,7 @@ ClickHouse does not allow to specify filesystem path for`File`. It will use fold
When creating table using `File(Format)` it creates empty subdirectory in that folder. When data is written to that table, its put into `data.Format` file in that subdirectory. When creating table using `File(Format)` it creates empty subdirectory in that folder. When data is written to that table, its put into `data.Format` file in that subdirectory.
You may manually create this subfolder and file in server filesystem and then [ATTACH](../../../sql-reference/statements/misc.md) it to table information with matching name, so you can query data from that file. You may manually create this subfolder and file in server filesystem and then [ATTACH](../../../sql-reference/statements/attach.md) it to table information with matching name, so you can query data from that file.
!!! warning "Warning" !!! warning "Warning"
Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined.

View File

@ -3,7 +3,7 @@ toc_folder_title: Special
toc_priority: 31 toc_priority: 31
--- ---
# Special Table Engines # Special Table Engines {#special-table-engines}
There are three main categories of table engines: There are three main categories of table engines:

View File

@ -20,7 +20,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
) ENGINE = Join(join_strictness, join_type, k1[, k2, ...]) ) ENGINE = Join(join_strictness, join_type, k1[, k2, ...])
``` ```
See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query. See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
**Engine Parameters** **Engine Parameters**

View File

@ -5,6 +5,6 @@ toc_title: MaterializedView
# MaterializedView Table Engine {#materializedview} # MaterializedView Table Engine {#materializedview}
Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine. Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create/table.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) <!--hide-->

View File

@ -17,7 +17,7 @@ Indexes
: ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns. : ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns.
Data compression Data compression
: Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create.md#create-query-specialized-codecs) that can make data even more compact. : Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create/table.md#create-query-specialized-codecs) that can make data even more compact.
Vectorized query execution Vectorized query execution
: ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage. : ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage.

View File

@ -8,8 +8,8 @@ toc_priority: 101
ClickHouse is a generic data storage solution for [OLAP](../../faq/general/olap.md) workloads, while there are many specialized time-series database management systems. Nevertheless, ClickHouses [focus on query execution speed](../../faq/general/why-clickhouse-is-so-fast.md) allows it to outperform specialized systems in many cases. There are many independent benchmarks on this topic out there ([example](https://medium.com/@AltinityDB/clickhouse-for-time-series-scalability-benchmarks-e181132a895b)), so were not going to conduct one here. Instead, lets focus on ClickHouse features that are important to use if thats your use case. ClickHouse is a generic data storage solution for [OLAP](../../faq/general/olap.md) workloads, while there are many specialized time-series database management systems. Nevertheless, ClickHouses [focus on query execution speed](../../faq/general/why-clickhouse-is-so-fast.md) allows it to outperform specialized systems in many cases. There are many independent benchmarks on this topic out there ([example](https://medium.com/@AltinityDB/clickhouse-for-time-series-scalability-benchmarks-e181132a895b)), so were not going to conduct one here. Instead, lets focus on ClickHouse features that are important to use if thats your use case.
First of all, there are **[specialized codecs](../../sql-reference/statements/create.md#create-query-specialized-codecs)** which make typical time-series. Either common algorithms like `DoubleDelta` and `Gorilla` or specific to ClickHouse like `T64`. First of all, there are **[specialized codecs](../../sql-reference/statements/create/table.md#create-query-specialized-codecs)** which make typical time-series. Either common algorithms like `DoubleDelta` and `Gorilla` or specific to ClickHouse like `T64`.
Second, time-series queries often hit only recent data, like one day or one week old. It makes sense to use servers that have both fast nVME/SSD drives and high-capacity HDD drives. ClickHouse [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) feature allows to configure keeping fresh hot data on fast drives and gradually move it to slower drives as it ages. Rollup or removal of even older data is also possible if your requirements demand it. Second, time-series queries often hit only recent data, like one day or one week old. It makes sense to use servers that have both fast nVME/SSD drives and high-capacity HDD drives. ClickHouse [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) feature allows to configure keeping fresh hot data on fast drives and gradually move it to slower drives as it ages. Rollup or removal of even older data is also possible if your requirements demand it.
Even though its against ClickHouse philosophy of storing and processing raw data, you can use [materialized views](../../sql-reference/statements/create.md#create-view) to fit into even tighter latency or costs requirements. Even though its against ClickHouse philosophy of storing and processing raw data, you can use [materialized views](../../sql-reference/statements/create/view.md) to fit into even tighter latency or costs requirements.

View File

@ -98,9 +98,9 @@ To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.doc
For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` - [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` - [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` - [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. To run `clickhouse server`, you have to additionally download [server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) and [users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) configuration files from GitHub. After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. To run `clickhouse server`, you have to additionally download [server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) and [users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) configuration files from GitHub.

View File

@ -99,7 +99,7 @@ As in most databases management systems, ClickHouse logically groups tables into
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial"
``` ```
Syntax for creating tables is way more complicated compared to databases (see [reference](../sql-reference/statements/create.md). In general `CREATE TABLE` statement has to specify three key things: Syntax for creating tables is way more complicated compared to databases (see [reference](../sql-reference/statements/create/table.md). In general `CREATE TABLE` statement has to specify three key things:
1. Name of table to create. 1. Name of table to create.
2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md). 2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md).
@ -479,7 +479,7 @@ FORMAT TSV
max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion."
``` ```
Optionally you can [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: Optionally you can [OPTIMIZE](../sql-reference/statements/optimize.md) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later:
``` bash ``` bash
clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL"

View File

@ -13,7 +13,7 @@ The supported formats are:
| Format | Input | Output | | Format | Input | Output |
|-----------------------------------------------------------------|-------|--------| |-----------------------------------------------------------------|-------|--------|
| [TabSeparated](#tabseparated) | ✔ | ✔ | | [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | | ✔ | | [TabSeparatedRaw](#tabseparatedraw) | | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | | [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | | [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ | | [Template](#format-template) | ✔ | ✔ |
@ -143,7 +143,7 @@ SELECT * FROM nestedt FORMAT TSV
## TabSeparatedRaw {#tabseparatedraw} ## TabSeparatedRaw {#tabseparatedraw}
Differs from `TabSeparated` format in that the rows are written without escaping. Differs from `TabSeparated` format in that the rows are written without escaping.
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). When parsing with this format, tabs or linefeeds are not allowed in each field.
This format is also available under the name `TSVRaw`. This format is also available under the name `TSVRaw`.
@ -979,7 +979,7 @@ message MessageType {
} }
``` ```
are not applied; the [table defaults](../sql-reference/statements/create.md#create-default-values) are used instead of them. are not applied; the [table defaults](../sql-reference/statements/create/table.md#create-default-values) are used instead of them.
ClickHouse inputs and outputs protobuf messages in the `length-delimited` format. ClickHouse inputs and outputs protobuf messages in the `length-delimited` format.
It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints).

View File

@ -367,34 +367,34 @@ $ curl -v 'http://localhost:8123/predefined_query'
As you can see from the example if `http_handlers` is configured in the config.xml file and `http_handlers` can contain many `rules`. ClickHouse will match the HTTP requests received to the predefined type in `rule` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful. As you can see from the example if `http_handlers` is configured in the config.xml file and `http_handlers` can contain many `rules`. ClickHouse will match the HTTP requests received to the predefined type in `rule` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful.
Now `rule` can configure `method`, `headers`, `url`, `handler`: Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `method` is responsible for matching the method part of the HTTP request. `method` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request. - `method` is responsible for matching the method part of the HTTP request. `method` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request.
- `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request. - `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request.
- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. - `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`.
`type` currently supports three types: [predefined\_query\_handler](#predefined_query_handler), [dynamic\_query\_handler](#dynamic_query_handler), [static](#static).
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`.
`type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static).
- `query` — use with `predefined_query_handler` type, executes query when the handler is called. - `query` — use with `predefined_query_handler` type, executes query when the handler is called.
- `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request params. - `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request params.
- `status` — use with `static` type, response status code. - `status` — use with `static` type, response status code.
- `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). - `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client. - `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client.
Next are the configuration methods for different `type`. Next are the configuration methods for different `type`.
### predefined_query_handler {#predefined_query_handler} ### predefined\_query\_handler {#predefined_query_handler}
`predefined_query_handler` supports setting `Settings` and `query_params` values. You can configure `query` in the type of `predefined_query_handler`. `predefined_query_handler` supports setting `Settings` and `query_params` values. You can configure `query` in the type of `predefined_query_handler`.
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration. `query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully. The following example defines the values of [max\_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
Example: Example:
@ -425,13 +425,13 @@ max_alter_threads 2
!!! note "caution" !!! note "caution"
In one `predefined_query_handler` only supports one `query` of an insert type. In one `predefined_query_handler` only supports one `query` of an insert type.
### dynamic_query_handler {#dynamic_query_handler} ### dynamic\_query\_handler {#dynamic_query_handler}
In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`. In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`.
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in. ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully. To experiment with this functionality, the example defines the values of [max\_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
Example: Example:
@ -456,7 +456,7 @@ max_alter_threads 2
### static {#static} ### static {#static}
`static` can return [content_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content. `static` can return [content\_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content.
Example: Example:

View File

@ -3,15 +3,14 @@ toc_folder_title: Third-Party
toc_priority: 24 toc_priority: 24
--- ---
# Third-Party Interfaces # Third-Party Interfaces {#third-party-interfaces}
This is a collection of links to third-party tools that provide some sort of interface to ClickHouse. It can be either visual interface, command-line interface or an API: This is a collection of links to third-party tools that provide some sort of interface to ClickHouse. It can be either visual interface, command-line interface or an API:
- [Client libraries](client-libraries.md) - [Client libraries](../../interfaces/third-party/client-libraries.md)
- [Integrations](integrations.md) - [Integrations](../../interfaces/third-party/integrations.md)
- [GUI](gui.md) - [GUI](../../interfaces/third-party/gui.md)
- [Proxies](proxy.md) - [Proxies](../../interfaces/third-party/proxy.md)
!!! note "Note" !!! note "Note"
Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them.

View File

@ -104,6 +104,5 @@ toc_title: Integrations
- [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord)
- [GraphQL](https://github.com/graphql) - [GraphQL](https://github.com/graphql)
- [activecube-graphql](https://github.com/bitquery/activecube-graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql)
[Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) <!--hide-->

View File

@ -50,7 +50,7 @@ toc_title: Adopters
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | | <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | | <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | | <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | | <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | | <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |

View File

@ -17,11 +17,11 @@ Its also worth noting that ClickHouse is a database management system, not a
Some column-oriented DBMSs do not use data compression. However, data compression does play a key role in achieving excellent performance. Some column-oriented DBMSs do not use data compression. However, data compression does play a key role in achieving excellent performance.
In addition to efficient general-purpose compression codecs with different trade-offs between disk space and CPU consumption, ClickHouse provides [specialized codecs](../sql-reference/statements/create.md#create-query-specialized-codecs) for specific kinds of data, which allow ClickHouse to compete with and outperform more niche databases, like time-series ones. In addition to efficient general-purpose compression codecs with different trade-offs between disk space and CPU consumption, ClickHouse provides [specialized codecs](../sql-reference/statements/create/table.md#create-query-specialized-codecs) for specific kinds of data, which allow ClickHouse to compete with and outperform more niche databases, like time-series ones.
## Disk Storage of Data {#disk-storage-of-data} ## Disk Storage of Data {#disk-storage-of-data}
Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis. Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis.
ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available. ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available.
@ -57,11 +57,11 @@ Having a data physically sorted by primary key makes it possible to extract data
## Secondary Indexes {#secondary-indexes} ## Secondary Indexes {#secondary-indexes}
Unlike other database management systems, secondary indexes in ClickHouse does not point to specific rows or row ranges. Instead, they allow the database to know in advance that all rows in some data parts wouldn't match the query filtering conditions and do not read them at all, thus they are called [data skipping indexes](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). Unlike other database management systems, secondary indexes in ClickHouse does not point to specific rows or row ranges. Instead, they allow the database to know in advance that all rows in some data parts wouldnt match the query filtering conditions and do not read them at all, thus they are called [data skipping indexes](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes).
## Suitable for Online Queries {#suitable-for-online-queries} ## Suitable for Online Queries {#suitable-for-online-queries}
Most OLAP database management systems don't aim for online queries with sub-second latencies. In alternative systems, report building time of tens of seconds or even minutes is often considered acceptable. Sometimes it takes even more which forces to prepare reports offline (in advance or by responding with "come back later"). Most OLAP database management systems dont aim for online queries with sub-second latencies. In alternative systems, report building time of tens of seconds or even minutes is often considered acceptable. Sometimes it takes even more which forces to prepare reports offline (in advance or by responding with “come back later”).
In ClickHouse low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online. In ClickHouse low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online.
@ -73,9 +73,9 @@ ClickHouse provides various ways to trade accuracy for performance:
2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk. 2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk.
3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources. 3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources.
## Adaptive Join Algorithm ## Adaptive Join Algorithm {#adaptive-join-algorithm}
ClickHouse adaptively chooses how to [JOIN](../sql-reference/statements/select/join.md) multiple tables, by preferring hash-join algorithm and falling back to the merge-join algorithm if there's more than one large table. ClickHouse adaptively chooses how to [JOIN](../sql-reference/statements/select/join.md) multiple tables, by preferring hash-join algorithm and falling back to the merge-join algorithm if theres more than one large table.
## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support} ## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support}
@ -83,9 +83,9 @@ ClickHouse uses asynchronous multi-master replication. After being written to an
For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md). For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md).
## Role-Based Access Control ## Role-Based Access Control {#role-based-access-control}
ClickHouse implements user account management using SQL queries and allows for [role-based access control configuration](../operations/access-rights.md) similar to what can be found in ANSI SQL standard and popular relational database management systems. ClickHouse implements user account management using SQL queries and allows for [role-based access control configuration](../operations/access-rights.md) similar to what can be found in ANSI SQL standard and popular relational database management systems.
## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} ## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages}

View File

@ -58,9 +58,9 @@ Privileges can be granted to a user account by the [GRANT](../sql-reference/stat
Management queries: Management queries:
- [CREATE USER](../sql-reference/statements/create.md#create-user-statement) - [CREATE USER](../sql-reference/statements/create/user.md)
- [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement) - [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement)
- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement) - [DROP USER](../sql-reference/statements/drop.md)
- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement) - [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
### Settings Applying {#access-control-settings-applying} ### Settings Applying {#access-control-settings-applying}
@ -84,11 +84,11 @@ Role contains:
Management queries: Management queries:
- [CREATE ROLE](../sql-reference/statements/create.md#create-role-statement) - [CREATE ROLE](../sql-reference/statements/create/role.md)
- [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement) - [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement)
- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement) - [DROP ROLE](../sql-reference/statements/drop.md)
- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement) - [SET ROLE](../sql-reference/statements/set-role.md)
- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement) - [SET DEFAULT ROLE](../sql-reference/statements/set-role.md#set-default-role-statement)
- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement) - [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
Privileges can be granted to a role by the [GRANT](../sql-reference/statements/grant.md) query. To revoke privileges from a role ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. Privileges can be granted to a role by the [GRANT](../sql-reference/statements/grant.md) query. To revoke privileges from a role ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query.
@ -99,9 +99,9 @@ Row policy is a filter that defines which of the rows are available to a user or
Management queries: Management queries:
- [CREATE ROW POLICY](../sql-reference/statements/create.md#create-row-policy-statement) - [CREATE ROW POLICY](../sql-reference/statements/create/row-policy.md)
- [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement) - [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement)
- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement) - [DROP ROW POLICY](../sql-reference/statements/drop.md#drop-row-policy-statement)
- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement) - [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
## Settings Profile {#settings-profiles-management} ## Settings Profile {#settings-profiles-management}
@ -110,9 +110,9 @@ Settings profile is a collection of [settings](../operations/settings/index.md).
Management queries: Management queries:
- [CREATE SETTINGS PROFILE](../sql-reference/statements/create.md#create-settings-profile-statement) - [CREATE SETTINGS PROFILE](../sql-reference/statements/create/settings-profile.md#create-settings-profile-statement)
- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement) - [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement)
- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement) - [DROP SETTINGS PROFILE](../sql-reference/statements/drop.md#drop-settings-profile-statement)
- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement) - [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
## Quota {#quotas-management} ## Quota {#quotas-management}
@ -123,9 +123,9 @@ Quota contains a set of limits for some durations, as well as a list of roles an
Management queries: Management queries:
- [CREATE QUOTA](../sql-reference/statements/create.md#create-quota-statement) - [CREATE QUOTA](../sql-reference/statements/create/quota.md)
- [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement) - [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement)
- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement) - [DROP QUOTA](../sql-reference/statements/drop.md#drop-quota-statement)
- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement) - [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
## Enabling SQL-driven Access Control and Account Management {#enabling-access-control} ## Enabling SQL-driven Access Control and Account Management {#enabling-access-control}

View File

@ -1,9 +1,9 @@
--- ---
toc_folder_title: Optimizing Performance toc_folder_title: Optimizing Performance
toc_priority: 52
toc_hidden: true toc_hidden: true
toc_priority: 52
--- ---
# Optimizing Performance # Optimizing Performance {#optimizing-performance}
- [Sampling query profiler](sampling-query-profiler.md) - [Sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md)

View File

@ -348,7 +348,7 @@ Keys:
Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON otherwise.` Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON otherwise.`
- format Message format. Possible values: `bsd` and `syslog.` - format Message format. Possible values: `bsd` and `syslog.`
## send_crash_reports {#server_configuration_parameters-logger} ## send\_crash\_reports {#server_configuration_parameters-logger}
Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io).
Enabling it, especially in pre-production environments, is greatly appreciated. Enabling it, especially in pre-production environments, is greatly appreciated.
@ -398,8 +398,7 @@ The cache is shared for the server and memory is allocated as needed. The cache
<mark_cache_size>5368709120</mark_cache_size> <mark_cache_size>5368709120</mark_cache_size>
``` ```
## max\_server\_memory\_usage {#max_server_memory_usage}
## max_server_memory_usage {#max_server_memory_usage}
Limits total RAM usage by the ClickHouse server. You can specify it only for the default profile. Limits total RAM usage by the ClickHouse server. You can specify it only for the default profile.
@ -416,8 +415,7 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa
**See also** **See also**
- [max_memory_usage](../settings/query-complexity.md#settings_max_memory_usage) - [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
## max\_concurrent\_queries {#max-concurrent-queries} ## max\_concurrent\_queries {#max-concurrent-queries}

View File

@ -36,7 +36,7 @@ Memory usage is not monitored for the states of certain aggregate functions.
Memory usage is not fully tracked for states of the aggregate functions `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` from `String` and `Array` arguments. Memory usage is not fully tracked for states of the aggregate functions `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` from `String` and `Array` arguments.
Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and [max_server_memory_usage](../server-configuration-parameters/settings.md#max_server_memory_usage). Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and [max\_server\_memory\_usage](../../operations/server-configuration-parameters/settings.md#max_server_memory_usage).
## max\_memory\_usage\_for\_user {#max-memory-usage-for-user} ## max\_memory\_usage\_for\_user {#max-memory-usage-for-user}
@ -46,7 +46,6 @@ Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHo
See also the description of [max\_memory\_usage](#settings_max_memory_usage). See also the description of [max\_memory\_usage](#settings_max_memory_usage).
## max\_rows\_to\_read {#max-rows-to-read} ## max\_rows\_to\_read {#max-rows-to-read}
The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little. The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little.

View File

@ -129,7 +129,7 @@ Default value: 0.
## max\_http\_get\_redirects {#setting-max_http_get_redirects} ## max\_http\_get\_redirects {#setting-max_http_get_redirects}
Limits the maximum number of HTTP GET redirect hops for [URL](../../engines/table-engines/special/url.md)-engine tables. The setting applies to both types of tables: those created by the [CREATE TABLE](../../sql-reference/statements/create.md#create-table-query) query and by the [url](../../sql-reference/table-functions/url.md) table function. Limits the maximum number of HTTP GET redirect hops for [URL](../../engines/table-engines/special/url.md)-engine tables. The setting applies to both types of tables: those created by the [CREATE TABLE](../../sql-reference/statements/create/table.md) query and by the [url](../../sql-reference/table-functions/url.md) table function.
Possible values: Possible values:
@ -733,8 +733,8 @@ Limits maximum recursion depth in the recursive descent parser. Allows to contro
Possible values: Possible values:
- Positive integer. - Positive integer.
- 0 — Recursion depth is unlimited. - 0 — Recursion depth is unlimited.
Default value: 1000. Default value: 1000.
@ -1427,20 +1427,20 @@ Possible values:
Default value: 16. Default value: 16.
## always_fetch_merged_part {#always_fetch_merged_part} ## always\_fetch\_merged\_part {#always_fetch_merged_part}
Prohibits data parts merging in [Replicated*MergeTree](../../engines/table-engines/mergetree-family/replication.md)-engine tables. Prohibits data parts merging in [Replicated\*MergeTree](../../engines/table-engines/mergetree-family/replication.md)-engine tables.
When merging is prohibited, the replica never merges parts and always downloads merged parts from other replicas. If there is no required data yet, the replica waits for it. CPU and disk load on the replica server decreases, but the network load on cluster increases. This setting can be useful on servers with relatively weak CPUs or slow disks, such as servers for backups storage. When merging is prohibited, the replica never merges parts and always downloads merged parts from other replicas. If there is no required data yet, the replica waits for it. CPU and disk load on the replica server decreases, but the network load on cluster increases. This setting can be useful on servers with relatively weak CPUs or slow disks, such as servers for backups storage.
Possible values: Possible values:
- 0 — `Replicated*MergeTree`-engine tables merge data parts at the replica. - 0 — `Replicated*MergeTree`-engine tables merge data parts at the replica.
- 1 — `Replicated*MergeTree`-engine tables don't merge data parts at the replica. The tables download merged data parts from other replicas. - 1 — `Replicated*MergeTree`-engine tables dont merge data parts at the replica. The tables download merged data parts from other replicas.
Default value: 0. Default value: 0.
**See Also** **See Also**
- [Data Replication](../../engines/table-engines/mergetree-family/replication.md) - [Data Replication](../../engines/table-engines/mergetree-family/replication.md)
@ -1454,11 +1454,11 @@ Possible values:
Default value: 16. Default value: 16.
## transform_null_in {#transform_null_in} ## transform\_null\_in {#transform_null_in}
Enables equality of [NULL](../../sql-reference/syntax.md#null-literal) values for [IN](../../sql-reference/operators/in.md) operator. Enables equality of [NULL](../../sql-reference/syntax.md#null-literal) values for [IN](../../sql-reference/operators/in.md) operator.
By default, `NULL` values can't be compared because `NULL` means undefined value. Thus, comparison `expr = NULL` must always return `false`. With this setting `NULL = NULL` returns `true` for `IN` operator. By default, `NULL` values cant be compared because `NULL` means undefined value. Thus, comparison `expr = NULL` must always return `false`. With this setting `NULL = NULL` returns `true` for `IN` operator.
Possible values: Possible values:
@ -1467,11 +1467,11 @@ Possible values:
Default value: 0. Default value: 0.
**Example** **Example**
Consider the `null_in` table: Consider the `null_in` table:
```text ``` text
┌──idx─┬─────i─┐ ┌──idx─┬─────i─┐
│ 1 │ 1 │ │ 1 │ 1 │
│ 2 │ NULL │ │ 2 │ NULL │
@ -1481,13 +1481,13 @@ Consider the `null_in` table:
Query: Query:
```sql ``` sql
SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 0; SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 0;
``` ```
Result: Result:
```text ``` text
┌──idx─┬────i─┐ ┌──idx─┬────i─┐
│ 1 │ 1 │ │ 1 │ 1 │
└──────┴──────┘ └──────┴──────┘
@ -1495,24 +1495,23 @@ Result:
Query: Query:
```sql ``` sql
SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
``` ```
Result: Result:
```text ``` text
┌──idx─┬─────i─┐ ┌──idx─┬─────i─┐
│ 1 │ 1 │ │ 1 │ 1 │
│ 2 │ NULL │ │ 2 │ NULL │
└──────┴───────┘ └──────┴───────┘
``` ```
**See Also** **See Also**
- [NULL Processing in IN Operators](../../sql-reference/operators/in.md#in-null-processing) - [NULL Processing in IN Operators](../../sql-reference/operators/in.md#in-null-processing)
## low\_cardinality\_max\_dictionary\_size {#low_cardinality_max_dictionary_size} ## low\_cardinality\_max\_dictionary\_size {#low_cardinality_max_dictionary_size}
Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that cant be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method. Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that cant be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method.
@ -1570,9 +1569,9 @@ Possible values:
Default value: 0. Default value: 0.
## min_insert_block_size_rows_for_materialized_views {#min-insert-block-size-rows-for-materialized-views} ## min\_insert\_block\_size\_rows\_for\_materialized\_views {#min-insert-block-size-rows-for-materialized-views}
Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create.md#create-view). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
Possible values: Possible values:
@ -1583,11 +1582,11 @@ Default value: 1048576.
**See Also** **See Also**
- [min_insert_block_size_rows](#min-insert-block-size-rows) - [min\_insert\_block\_size\_rows](#min-insert-block-size-rows)
## min_insert_block_size_bytes_for_materialized_views {#min-insert-block-size-bytes-for-materialized-views} ## min\_insert\_block\_size\_bytes\_for\_materialized\_views {#min-insert-block-size-bytes-for-materialized-views}
Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create.md#create-view). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
Possible values: Possible values:
@ -1598,6 +1597,27 @@ Default value: 268435456.
**See also** **See also**
- [min_insert_block_size_bytes](#min-insert-block-size-bytes) - [min\_insert\_block\_size\_bytes](#min-insert-block-size-bytes)
## output_format_pretty_grid_charset {#output-format-pretty-grid-charset}
Allows to change a charset which is used for printing grids borders. Available charsets are following: UTF-8, ASCII.
**Example**
```text
SET output_format_pretty_grid_charset = 'UTF-8';
SELECT * FROM a;
┌─a─┐
│ 1 │
└───┘
SET output_format_pretty_grid_charset = 'ASCII';
SELECT * FROM a;
+-a-+
| 1 |
+---+
```
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide --> [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -1,3 +1,3 @@
## system.asynchronous\_metric\_log {#system-tables-async-log} ## system.asynchronous\_metric\_log {#system-tables-async-log}
Contains the historical values for `system.asynchronous_log` (see [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics)) Contains the historical values for `system.asynchronous_log` (see [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics))

View File

@ -31,6 +31,6 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
**See Also** **See Also**
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics. - [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.events](events.md#system_tables-events) — Contains a number of events that have occurred. - [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred.
- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.

View File

@ -4,6 +4,6 @@ Contains information about detached parts of [MergeTree](../../engines/table-eng
For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) command. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) command.
For the description of other columns, see [system.parts](parts.md#system_tables-parts). For the description of other columns, see [system.parts](../../operations/system-tables/parts.md#system_tables-parts).
If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached).

View File

@ -26,7 +26,7 @@ SELECT * FROM system.events LIMIT 5
**See Also** **See Also**
- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. - [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics. - [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -20,7 +20,7 @@ System tables:
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
Unlike other system tables, the system tables [metric\_log](metric_log.md#system_tables-metric_log), [query\_log](query_log.md#system_tables-query_log), [query\_thread\_log](query_thread_log.md#system_tables-query_thread_log), [trace\_log](trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. Unlike other system tables, the system tables [metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query\_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables. By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.
@ -45,5 +45,4 @@ If procfs is supported and enabled on the system, ClickHouse server collects the
- `OSReadBytes` - `OSReadBytes`
- `OSWriteBytes` - `OSWriteBytes`
[Original article](https://clickhouse.tech/docs/en/operations/system-tables/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/system-tables/) <!--hide-->

View File

@ -49,7 +49,7 @@ CurrentMetric_ReplicatedChecks: 0
**See also** **See also**
- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. - [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](events.md#system_tables-events) — Contains a number of events that occurred. - [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics. - [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -33,7 +33,7 @@ SELECT * FROM system.metrics LIMIT 10
**See Also** **See Also**
- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. - [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](events.md#system_tables-events) — Contains a number of events that occurred. - [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -1,5 +1,5 @@
# system.numbers\_mt {#system-numbers-mt} # system.numbers\_mt {#system-numbers-mt}
The same as [system.numbers](numbers.md) but reads are parallelized. The numbers can be returned in any order. The same as [system.numbers](../../operations/system-tables/numbers.md) but reads are parallelized. The numbers can be returned in any order.
Used for tests. Used for tests.

View File

@ -11,7 +11,7 @@ You can disable queries logging by setting [log\_queries = 0](../../operations/s
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
ClickHouse doesnt delete data from the table automatically. See [Introduction](index.md#system-tables-introduction) for more details. ClickHouse doesnt delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
The `system.query_log` table registers two kinds of queries: The `system.query_log` table registers two kinds of queries:
@ -75,7 +75,7 @@ Columns:
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). - `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution. - `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](events.md#system_tables-events) - `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column. - `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column.
- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. - `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column. - `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column.
@ -135,4 +135,4 @@ Settings.Values: ['0','random','1','10000000000']
**See Also** **See Also**
- [system.query\_thread\_log](query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. - [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.

View File

@ -9,7 +9,7 @@ To start logging:
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
ClickHouse doesnt delete data from the table automatically. See [Introduction](index.md#system-tables-introduction) for more details. ClickHouse doesnt delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
Columns: Columns:
@ -110,4 +110,4 @@ ProfileEvents.Values: [1,97,81,5,81]
**See Also** **See Also**
- [system.query\_log](query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. - [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution.

View File

@ -0,0 +1,16 @@
# system.quota_limits {#system_tables-quota_limits}
Contains information about maximums for all intervals of all quotas. Any number of rows or zero can correspond to one quota.
Columns:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Quota name.
- `duration` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Length of the time interval for calculating resource consumption, in seconds.
- `is_randomized_interval` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows whether the interval is randomized. Interval always starts at the same time if it is not randomized. For example, an interval of 1 minute always starts at an integer number of minutes (i.e. it can start at 11:20:00, but it never starts at 11:20:01), an interval of one day always starts at midnight UTC. If interval is randomized, the very first interval starts at random time, and subsequent intervals starts one by one. Values:
- `0` — Interval is not randomized.
- `1` — Interval is randomized.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of queries.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of RAM volume in bytes used to store a queries result.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of bytes read from all tables and table functions participated in queries.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of the query execution time, in seconds.

View File

@ -0,0 +1,24 @@
# system.quota_usage {#system_tables-quota_usage}
Quota usage by the current user: how much is used and how much is left.
Columns:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Quota name.
- `quota_key`([String](../../sql-reference/data-types/string.md)) — Key value. For example, if keys = [`ip address`], `quota_key` may have a value '192.168.1.1'.
- `start_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Start time for calculating resource consumption.
- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — End time for calculating resource consumption.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests on this interval.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — RAM volume in bytes used to store a queries result.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum RAM volume used to store a queries result, in bytes.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of source rows read from tables for running the query on all remote servers.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.

View File

@ -0,0 +1,20 @@
# system.quotas {#system_tables-quotas}
Contains information about [quotas](quotas.md).
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Quota name.
- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Quota ID.
- `storage`([String](../../sql-reference/data-types/string.md)) — Storage of quotas. Possible value: "users.xml" if a quota configured in the users.xml file, "disk" if a quota configured by an SQL-query.
- `keys` ([Array](../../sql-reference/data-types/array.md)([Enum8](../../sql-reference/data-types/enum.md))) — Key specifies how the quota should be shared. If two connections use the same quota and key, they share the same amounts of resources. Values:
- `[]` — All users share the same quota.
- `['user_name']` — Connections with the same user name share the same quota.
- `['ip_address']` — Connections from the same IP share the same quota.
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn't provided by a client, the qouta is tracked for `user_name`.
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn't provided by a client, the qouta is tracked for `ip_address`.
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
- `0` — The quota applies to users specify in the `apply_to_list`.
- `1` — The quota applies to all users except those listed in `apply_to_except`.
- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to.
- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to.

View File

@ -0,0 +1,25 @@
# system.quotas_usage {#system_tables-quotas_usage}
Quota usage by all users.
Columns:
- `quota_name` ([String](../../sql-reference/data-types/string.md)) — Quota name.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — Key value.
- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Quota usage for current user.
- `start_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — Start time for calculating resource consumption.
- `end_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — End time for calculating resource consumption.
- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds.
- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests in this interval.
- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests.
- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception.
- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors.
- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result.
- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of source rows read from tables.
- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — RAM volume in bytes used to store a queries result.
- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum RAM volume used to store a queries result, in bytes.
- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md)))) — The total number of source rows read from tables for running the query on all remote servers.
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.

View File

@ -46,4 +46,8 @@ This table contains the following columns (the column type is shown in brackets)
- If the table stores data on disk, returns used space on disk (i.e. compressed). - If the table stores data on disk, returns used space on disk (i.e. compressed).
- If the table stores data in memory, returns approximated number of used bytes in memory. - If the table stores data in memory, returns approximated number of used bytes in memory.
- `lifetime_rows` (Nullable(UInt64)) - Total number of rows INSERTed since server start (only for `Buffer` tables).
- `lifetime_bytes` (Nullable(UInt64)) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
The `system.tables` table is used in `SHOW TABLES` query implementation. The `system.tables` table is used in `SHOW TABLES` query implementation.

View File

@ -1,6 +1,7 @@
--- ---
toc_priority: 103 toc_priority: 103
--- ---
# anyHeavy {#anyheavyx} # anyHeavy {#anyheavyx}
Selects a frequently occurring value using the [heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) algorithm. If there is a value that occurs more than in half the cases in each of the querys execution threads, this value is returned. Normally, the result is nondeterministic. Selects a frequently occurring value using the [heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) algorithm. If there is a value that occurs more than in half the cases in each of the querys execution threads, this value is returned. Normally, the result is nondeterministic.

View File

@ -5,4 +5,4 @@ toc_priority: 104
## anyLast {#anylastx} ## anyLast {#anylastx}
Selects the last value encountered. Selects the last value encountered.
The result is just as indeterminate as for the [any](any.md) function. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function.

View File

@ -26,7 +26,7 @@ In both cases the type of the returned value is [UInt64](../../../sql-reference/
**Details** **Details**
ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count\_distinct\_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](uniqexact.md#agg_function-uniqexact) function. ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count\_distinct\_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function.
The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it.

View File

@ -21,7 +21,7 @@ If in one query several values are inserted into the same position, the function
- `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). - `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md).
- `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). - `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges).
- `default_x`— Default value for substituting in empty positions. Optional parameter. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in the data type configured for the `x` parameter. If `default_x` is not defined, the [default values](../../../sql-reference/statements/create.md#create-default-values) are used. - `default_x`— Default value for substituting in empty positions. Optional parameter. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in the data type configured for the `x` parameter. If `default_x` is not defined, the [default values](../../../sql-reference/statements/create/table.md#create-default-values) are used.
- `size`— Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` must be specified. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). - `size`— Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` must be specified. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges).
**Returned value** **Returned value**

View File

@ -2,11 +2,11 @@
toc_priority: 111 toc_priority: 111
--- ---
# groupUniqArray # groupUniqArray {#groupuniqarray}
Syntax: `groupUniqArray(x)` or `groupUniqArray(max_size)(x)` Syntax: `groupUniqArray(x)` or `groupUniqArray(max_size)(x)`
Creates an array from different argument values. Memory consumption is the same as for the [uniqExact](uniqexact.md) function. Creates an array from different argument values. Memory consumption is the same as for the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) function.
The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements.
For example, `groupUniqArray(1)(x)` is equivalent to `[any(x)]`. For example, `groupUniqArray(1)(x)` is equivalent to `[any(x)]`.

View File

@ -1,7 +1,7 @@
--- ---
toc_folder_title: Reference
toc_priority: 36 toc_priority: 36
toc_title: Reference toc_title: Reference
toc_folder_title: Reference
--- ---
# Aggregate Function Reference {#aggregate-functions-reference} # Aggregate Function Reference {#aggregate-functions-reference}
@ -24,50 +24,95 @@ Standard aggregate functions:
ClickHouse-specific aggregate functions: ClickHouse-specific aggregate functions:
- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md) - [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md)
- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md) - [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md)
- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) - [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) - [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) - [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)
- [topK](../../../sql-reference/aggregate-functions/reference/topkweighted.md) - [topK](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) - [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) - [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md)
- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) - [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) - [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) - [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) - [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)
- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md) - [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md)
- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md) - [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md)
- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md) - [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md)
- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md) - [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md)
- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md) - [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md)
- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md) - [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md)
- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md) - [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md)
- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md) - [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md)
- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md) - [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md)
- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) - [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md)
- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) - [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md)
- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) - [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md)
- [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md) - [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md)
- [timeSeriesGroupRateSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md) - [timeSeriesGroupRateSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md)
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) - [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md)
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) - [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md)
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) - [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md) - [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md)
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md) - [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md)
- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md) - [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md)
- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md) - [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md)
- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md) - [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md)
- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md) - [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md)
- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md) - [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md)
- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md) - [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md)
- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) - [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md)
- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) - [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md) - [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md)
- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md) - [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) - [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) - [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md)

View File

@ -6,9 +6,9 @@ toc_priority: 200
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and a random number generator for sampling. The result is non-deterministic. To get an exact quantile, use the [quantileExact](quantileexact.md#quantileexact) function. This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and a random number generator for sampling. The result is non-deterministic. To get an exact quantile, use the [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact) function.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
**Syntax** **Syntax**
@ -62,5 +62,5 @@ Result:
**See Also** **See Also**
- [median](median.md#median) - [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](quantiles.md#quantiles) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -6,9 +6,9 @@ toc_priority: 206
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and deterministic algorithm of sampling. The result is deterministic. To get an exact quantile, use the [quantileExact](quantileexact.md#quantileexact) function. This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and deterministic algorithm of sampling. The result is deterministic. To get an exact quantile, use the [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact) function.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
**Syntax** **Syntax**
@ -63,5 +63,5 @@ Result:
**See Also** **See Also**
- [median](median.md#median) - [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](quantiles.md#quantiles) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -8,7 +8,7 @@ Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a num
To get exact value, all the passed values are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. To get exact value, all the passed values are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
**Syntax** **Syntax**
@ -51,5 +51,5 @@ Result:
**See Also** **See Also**
- [median](median.md#median) - [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](quantiles.md#quantiles) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -2,14 +2,13 @@
toc_priority: 203 toc_priority: 203
--- ---
# quantileExactWeighted {#quantileexactweighted} # quantileExactWeighted {#quantileexactweighted}
Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence, taking into account the weight of each element. Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence, taking into account the weight of each element.
To get exact value, all the passed values are combined into an array, which is then partially sorted. Each value is counted with its weight, as if it is present `weight` times. A hash table is used in the algorithm. Because of this, if the passed values are frequently repeated, the function consumes less RAM than [quantileExact](quantileexact.md#quantileexact). You can use this function instead of `quantileExact` and specify the weight 1. To get exact value, all the passed values are combined into an array, which is then partially sorted. Each value is counted with its weight, as if it is present `weight` times. A hash table is used in the algorithm. Because of this, if the passed values are frequently repeated, the function consumes less RAM than [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). You can use this function instead of `quantileExact` and specify the weight 1.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
**Syntax** **Syntax**
@ -64,5 +63,5 @@ Result:
**See Also** **See Also**
- [median](median.md#median) - [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](quantiles.md#quantiles) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -8,9 +8,9 @@ Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a
The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic.
The performance of the function is lower than performance of [quantile](quantile.md#quantile) or [quantileTiming](quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. The performance of the function is lower than performance of [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile) or [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
**Syntax** **Syntax**
@ -53,5 +53,5 @@ Result:
**See Also** **See Also**
- [median](median.md#median) - [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](quantiles.md#quantiles) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -6,11 +6,11 @@ toc_priority: 208
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values.
The performance of the function is lower than performance of [quantile](quantile.md#quantile) or [quantileTiming](quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. The performance of the function is lower than performance of [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile) or [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`.
The result depends on the order of running the query, and is nondeterministic. The result depends on the order of running the query, and is nondeterministic.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
**Syntax** **Syntax**
@ -54,5 +54,5 @@ Result:
**See Also** **See Also**
- [median](median.md#median) - [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](quantiles.md#quantiles) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

Some files were not shown because too many files have changed in this diff Show More